openGPMP
Open Source Mathematics Package
_gpu_mtx_add.cu
Go to the documentation of this file.
1 #include <iostream>
2 #include <math.h>
3 // Kernel function to add the elements of two arrays
4 __global__ void add(int n, float *x, float *y) {
5  for (int i = 0; i < n; i++)
6  y[i] = x[i] + y[i];
7 }
8 
9 int main(void) {
10  int N = 1 << 20;
11  float *x, *y;
12 
13  // Allocate Unified Memory – accessible from CPU or GPU
14  cudaMallocManaged(&x, N * sizeof(float));
15  cudaMallocManaged(&y, N * sizeof(float));
16 
17  // initialize x and y arrays on the host
18  for (int i = 0; i < N; i++) {
19  x[i] = 1.0f;
20  y[i] = 2.0f;
21  }
22 
23  // Run kernel on 1M elements on the GPU
24  add<<<1, 1>>>(N, x, y);
25 
26  // Wait for GPU to finish before accessing on host
27  cudaDeviceSynchronize();
28 
29  // Check for errors (all values should be 3.0f)
30  float maxError = 0.0f;
31  for (int i = 0; i < N; i++)
32  maxError = fmax(maxError, fabs(y[i] - 3.0f));
33  std::cout << "Max error: " << maxError << std::endl;
34 
35  // Free memory
36  cudaFree(x);
37  cudaFree(y);
38 
39  return 0;
40 }
__global__ void add(int n, float *x, float *y)
Definition: _gpu_mtx_add.cu:4
int main(void)
Definition: _gpu_mtx_add.cu:9
const int N