openGPMP
Open Source Mathematics Package
_gpu_mtx.cu
Go to the documentation of this file.
1 
5 #include <iostream>
6 #include <math.h>
7 // Kernel function to add the elements of two arrays
8 __global__ void add(int n, float *x, float *y) {
9  for (int i = 0; i < n; i++)
10  y[i] = x[i] + y[i];
11 }
12 
13 int main(void) {
14  int N = 1 << 20;
15  float *x, *y;
16 
17  // Allocate Unified Memory – accessible from CPU or GPU
18  cudaMallocManaged(&x, N * sizeof(float));
19  cudaMallocManaged(&y, N * sizeof(float));
20 
21  // initialize x and y arrays on the host
22  for (int i = 0; i < N; i++) {
23  x[i] = 1.0f;
24  y[i] = 2.0f;
25  }
26 
27  // Run kernel on 1M elements on the GPU
28  add<<<1, 1>>>(N, x, y);
29 
30  // Wait for GPU to finish before accessing on host
31  cudaDeviceSynchronize();
32 
33  // Check for errors (all values should be 3.0f)
34  float maxError = 0.0f;
35  for (int i = 0; i < N; i++)
36  maxError = fmax(maxError, fabs(y[i] - 3.0f));
37  std::cout << "Max error: " << maxError << std::endl;
38 
39  // Free memory
40  cudaFree(x);
41  cudaFree(y);
42 
43  return 0;
44 }
__global__ void add(int n, float *x, float *y)
Definition: _gpu_mtx.cu:8
int main(void)
Definition: _gpu_mtx.cu:13
const int N