4 __global__
void add(
int n,
float *x,
float *y) {
5 for (
int i = 0; i < n; i++)
14 cudaMallocManaged(&x,
N *
sizeof(
float));
15 cudaMallocManaged(&y,
N *
sizeof(
float));
18 for (
int i = 0; i <
N; i++) {
24 add<<<1, 1>>>(
N, x, y);
27 cudaDeviceSynchronize();
30 float maxError = 0.0f;
31 for (
int i = 0; i <
N; i++)
32 maxError = fmax(maxError, fabs(y[i] - 3.0f));
33 std::cout <<
"Max error: " << maxError << std::endl;
__global__ void add(int n, float *x, float *y)