openGPMP
Open Source Mathematics Package
modules
linalg
gpu
_gpu_kernel_mtx_mul.c
Go to the documentation of this file.
1
4
__kernel
void
matrixMul
(__global
float
*
C
,
5
__global
float
*
A
,
6
__global
float
*
B
,
7
int
wA,
8
int
wB) {
9
10
int
tx = get_global_id(0);
11
int
ty = get_global_id(1);
12
13
// value stores the element that is
14
// computed by the thread
15
float
value = 0;
16
for
(
int
k = 0; k < wA; ++k) {
17
float
elementA =
A
[ty * wA + k];
18
float
elementB =
B
[k * wB + tx];
19
value += elementA * elementB;
20
}
21
22
// Write the matrix to device memory each
23
// thread writes one element
24
C
[ty * wA + tx] = value;
25
}
matrixMul
__kernel void matrixMul(__global float *C, __global float *A, __global float *B, int wA, int wB)
Definition:
_gpu_kernel_mtx_mul.c:4
python.linalg.C
list C
Definition:
linalg.py:24
python.linalg.A
list A
Definition:
linalg.py:22
python.linalg.B
list B
Definition:
linalg.py:23
Generated by
1.9.1