Functions
__kernel void	gpu_mtx_add (__global const int A, __global const int B, __global int *C)

__kernel void	matrixMul (__global float C, __global float A, __global float *B, int wA, int wB)

Function Documentation

◆ gpu_mtx_add()

__kernel void gpu_mtx_add	(	__global const int *	A,
		__global const int *	B,
		__global int *	C
	)

This file implements OpenCL matrix operation related kernel functions add vectors using GPU

Definition at line 9 of file _gpu_mtx_kernel.c.

                                                                            {
     int i = get_global_id(0);
     int j = get_global_id(1);
     int index = i * get_global_size(1) + j;
     C[index] = A[index] + B[index];
 }

References python.linalg::A, python.linalg::B, and python.linalg::C.

◆ matrixMul()

__kernel void matrixMul	(	__global float *	C,
		__global float *	A,
		__global float *	B,
		int	wA,
		int	wB
	)

Definition at line 16 of file _gpu_mtx_kernel.c.

                                 {
  
     int tx = get_global_id(0);
     int ty = get_global_id(1);
  
     // value stores the element that is
     // computed by the thread
     float value = 0;
     for (int k = 0; k < wA; ++k) {
         float elementA = A[ty * wA + k];
         float elementB = B[k * wB + tx];
         value += elementA * elementB;
     }
  
     // Write the matrix to device memory each
     // thread writes one element
     C[ty * wA + tx] = value;
 }

References python.linalg::A, python.linalg::B, and python.linalg::C.

Functions

Function Documentation

◆ gpu_mtx_add()

◆ matrixMul()