openGPMP
Open Source Mathematics Package
_gpu_mtx_kernel.c
Go to the documentation of this file.
1 
8 __kernel void
9 gpu_mtx_add(__global const int *A, __global const int *B, __global int *C) {
10  int i = get_global_id(0);
11  int j = get_global_id(1);
12  int index = i * get_global_size(1) + j;
13  C[index] = A[index] + B[index];
14 }
15 
16 __kernel void matrixMul(__global float *C,
17  __global float *A,
18  __global float *B,
19  int wA,
20  int wB) {
21 
22  int tx = get_global_id(0);
23  int ty = get_global_id(1);
24 
25  // value stores the element that is
26  // computed by the thread
27  float value = 0;
28  for (int k = 0; k < wA; ++k) {
29  float elementA = A[ty * wA + k];
30  float elementB = B[k * wB + tx];
31  value += elementA * elementB;
32  }
33 
34  // Write the matrix to device memory each
35  // thread writes one element
36  C[ty * wA + tx] = value;
37 }
__kernel void gpu_mtx_add(__global const int *A, __global const int *B, __global int *C)
__kernel void matrixMul(__global float *C, __global float *A, __global float *B, int wA, int wB)
list C
Definition: linalg.py:24
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23