openGPMP
Open Source Mathematics Package
_gpu_kernel_mtx_mul.c
Go to the documentation of this file.
1 
4 __kernel void matrixMul(__global float *C,
5  __global float *A,
6  __global float *B,
7  int wA,
8  int wB) {
9 
10  int tx = get_global_id(0);
11  int ty = get_global_id(1);
12 
13  // value stores the element that is
14  // computed by the thread
15  float value = 0;
16  for (int k = 0; k < wA; ++k) {
17  float elementA = A[ty * wA + k];
18  float elementB = B[k * wB + tx];
19  value += elementA * elementB;
20  }
21 
22  // Write the matrix to device memory each
23  // thread writes one element
24  C[ty * wA + tx] = value;
25 }
__kernel void matrixMul(__global float *C, __global float *A, __global float *B, int wA, int wB)
list C
Definition: linalg.py:24
list A
Definition: linalg.py:22
list B
Definition: linalg.py:23