9 gpu_mtx_add(__global
const int *
A, __global
const int *
B, __global
int *
C) {
10 int i = get_global_id(0);
11 int j = get_global_id(1);
12 int index = i * get_global_size(1) + j;
13 C[index] =
A[index] +
B[index];
22 int tx = get_global_id(0);
23 int ty = get_global_id(1);
28 for (
int k = 0; k < wA; ++k) {
29 float elementA =
A[ty * wA + k];
30 float elementB =
B[k * wB + tx];
31 value += elementA * elementB;
36 C[ty * wA + tx] = value;
__kernel void gpu_mtx_add(__global const int *A, __global const int *B, __global int *C)
__kernel void matrixMul(__global float *C, __global float *A, __global float *B, int wA, int wB)