8 __global__
void add(
int n,
float *x,
float *y) {
9 for (
int i = 0; i < n; i++)
18 cudaMallocManaged(&x,
N *
sizeof(
float));
19 cudaMallocManaged(&y,
N *
sizeof(
float));
22 for (
int i = 0; i <
N; i++) {
28 add<<<1, 1>>>(
N, x, y);
31 cudaDeviceSynchronize();
34 float maxError = 0.0f;
35 for (
int i = 0; i <
N; i++)
36 maxError = fmax(maxError, fabs(y[i] - 3.0f));
37 std::cout <<
"Max error: " << maxError << std::endl;
__global__ void add(int n, float *x, float *y)