#include <iostream>
#include <math.h>

Functions
__global__ void	add (int n, float x, float y)

int	main (void)

Function Documentation

◆ add()

__global__ void add	(	int	n,
		float *	x,
		float *	y
	)

This file implements CUDA kernel functions as well as utility/setup in order to execute the kernel functions

Definition at line 8 of file _gpu_mtx.cu.

                                                {
     for (int i = 0; i < n; i++)
         y[i] = x[i] + y[i];
 }

◆ main()

int main ( void )

Definition at line 13 of file _gpu_mtx.cu.

                {
     int N = 1 << 20;
     float *x, *y;
  
     // Allocate Unified Memory – accessible from CPU or GPU
     cudaMallocManaged(&x, N * sizeof(float));
     cudaMallocManaged(&y, N * sizeof(float));
  
     // initialize x and y arrays on the host
     for (int i = 0; i < N; i++) {
         x[i] = 1.0f;
         y[i] = 2.0f;
     }
  
     // Run kernel on 1M elements on the GPU
     add<<<1, 1>>>(N, x, y);
  
     // Wait for GPU to finish before accessing on host
     cudaDeviceSynchronize();
  
     // Check for errors (all values should be 3.0f)
     float maxError = 0.0f;
     for (int i = 0; i < N; i++)
         maxError = fmax(maxError, fabs(y[i] - 3.0f));
     std::cout << "Max error: " << maxError << std::endl;
  
     // Free memory
     cudaFree(x);
     cudaFree(y);
  
     return 0;
 }

References N.

Functions

Function Documentation

◆ add()

◆ main()