openGPMP
Open Source Mathematics Package
Functions
_gpu_mtx_add.cu File Reference
#include <iostream>
#include <math.h>

Go to the source code of this file.

Functions

__global__ void add (int n, float *x, float *y)
 
int main (void)
 

Function Documentation

◆ add()

__global__ void add ( int  n,
float *  x,
float *  y 
)

Definition at line 4 of file _gpu_mtx_add.cu.

4  {
5  for (int i = 0; i < n; i++)
6  y[i] = x[i] + y[i];
7 }

◆ main()

int main ( void  )

Definition at line 9 of file _gpu_mtx_add.cu.

9  {
10  int N = 1 << 20;
11  float *x, *y;
12 
13  // Allocate Unified Memory – accessible from CPU or GPU
14  cudaMallocManaged(&x, N * sizeof(float));
15  cudaMallocManaged(&y, N * sizeof(float));
16 
17  // initialize x and y arrays on the host
18  for (int i = 0; i < N; i++) {
19  x[i] = 1.0f;
20  y[i] = 2.0f;
21  }
22 
23  // Run kernel on 1M elements on the GPU
24  add<<<1, 1>>>(N, x, y);
25 
26  // Wait for GPU to finish before accessing on host
27  cudaDeviceSynchronize();
28 
29  // Check for errors (all values should be 3.0f)
30  float maxError = 0.0f;
31  for (int i = 0; i < N; i++)
32  maxError = fmax(maxError, fabs(y[i] - 3.0f));
33  std::cout << "Max error: " << maxError << std::endl;
34 
35  // Free memory
36  cudaFree(x);
37  cudaFree(y);
38 
39  return 0;
40 }
const int N

References N.