openGPMP
Open Source Mathematics Package
Functions
_gpu_mtx_add_wrapper.cpp File Reference
#include <iostream>

Go to the source code of this file.

Functions

int accl_mtx_exec ()
 
int main ()
 

Function Documentation

◆ accl_mtx_exec()

int accl_mtx_exec ( )

Definition at line 98 of file _gpu_mtx_add.c.

98  {
99  struct timeval start, finish;
100  double duration;
101  srand((unsigned)time(NULL));
102  clock_t startTime, endTime;
103  double totalTime;
104 
105  /* generate vector a and b */
106  int len = 64;
107  int *a, *b, *c, *c_d;
108  a = (int *)malloc(len * sizeof(int));
109  b = (int *)malloc(len * sizeof(int));
110  c = (int *)malloc(len * sizeof(int));
111  c_d = (int *)malloc(len * sizeof(int));
112  size_t data_size = len * sizeof(int);
113 
114  PRINT_LINE("INIT VALUE");
115  /* vector addition, cpu version */
116  printf("a: ");
117  init_vec(a, len, 1);
118  print_vec(a, len);
119 
120  printf("b: ");
121  rand_vec(b, len);
122  print_vec(b, len);
123 
124  printf("c: ");
125  init_vec(c, len, 0);
126 
127  startTime = clock();
128  add_vec_cpu(a, b, c, len);
129  endTime = clock();
130  // calculate difference to get total
131  totalTime = (double)(endTime - startTime) / CLOCKS_PER_SEC;
132  print_vec(c, len);
133  printf("CPU: %f\n", totalTime);
134 
135  /* vector addition, gpu version */
136  cl_mem a_buff, b_buff, c_buff;
137  a_buff = b_buff = c_buff = NULL;
138 
139  cl_platform_id platform_id = NULL;
140  cl_uint ret_num_platforms;
141 
142  cl_device_id device_id = NULL;
143  cl_uint ret_num_devices;
144 
145  cl_context context = NULL;
146  cl_kernel kernel = NULL;
147  cl_program program = NULL;
148 
149  cl_command_queue command_queue = NULL;
150  cl_int ret;
151 
152  /* Load the source code containing the kernel */
153  char string[MEM_SIZE];
154  FILE *fp;
155  char fileName[] = "./_gpu_kernel_mtx_add.c";
156  char *source_str;
157  size_t source_size;
158 
159  fp = fopen(fileName, "r");
160  if (!fp) {
161 
162  fprintf(stderr, "Failed to load kernel.\n");
163  exit(1);
164  }
165  source_str = (char *)malloc(MAX_SOURCE_SIZE);
166  source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
167  fclose(fp);
168 
169  // Platform
170  ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
171  if (ret != CL_SUCCESS) {
172  printf("Failed to get platform ID.\n");
173  goto error;
174  }
175  // Device
176  ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
177  if (ret != CL_SUCCESS) {
178  printf("Failed to get device ID.\n");
179  goto error;
180  }
181  // Context
182  context = clCreateContext(NULL, 1, &device_id, NULL, NULL, NULL); //&ret);
183  if (ret != CL_SUCCESS) {
184  printf("Failed to create OpenCL context.\n");
185  goto error;
186  }
187  command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
188  if (ret != CL_SUCCESS) {
189  printf("Failed to create command queue %d\n", (int)ret);
190  goto error;
191  }
192  // Memory Buffer
193  a_buff = clCreateBuffer(context, CL_MEM_READ_ONLY, data_size, NULL, &ret);
194  b_buff = clCreateBuffer(context, CL_MEM_READ_ONLY, data_size, NULL, &ret);
195  c_buff = clCreateBuffer(context, CL_MEM_WRITE_ONLY, data_size, NULL, &ret);
196 
197  ret = clEnqueueWriteBuffer(command_queue,
198  a_buff,
199  CL_TRUE,
200  0,
201  data_size,
202  (void *)a,
203  0,
204  NULL,
205  NULL);
206  ret |= clEnqueueWriteBuffer(command_queue,
207  b_buff,
208  CL_TRUE,
209  0,
210  data_size,
211  (void *)b,
212  0,
213  NULL,
214  NULL);
215  if (ret != CL_SUCCESS) {
216  printf("Failed to copy date from host to device: %d\n", (int)ret);
217  goto error;
218  }
219  // Create Kernel Program from source
220  program = clCreateProgramWithSource(context,
221  1,
222  (const char **)&source_str,
223  (const size_t *)&source_size,
224  &ret);
225  if (ret != CL_SUCCESS) {
226  printf("Failed to create OpenCL program from source %d\n", (int)ret);
227  goto error;
228  }
229  // Build Kernel Program
230  ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
231  if (ret != CL_SUCCESS) {
232  printf("Failed to build program %d\n", (int)ret);
233  char build_log[16348];
234  clGetProgramBuildInfo(program,
235  device_id,
236  CL_PROGRAM_BUILD_LOG,
237  sizeof(build_log),
238  build_log,
239  NULL);
240  printf("Error in kernel: %s\n", build_log);
241  goto error;
242  }
243  // Create OpenCL Kernel
244  kernel = clCreateKernel(program, "add_vec_gpu", &ret);
245  if (ret != CL_SUCCESS) {
246  printf("Failed to create kernel %d\n", (int)ret);
247  goto error;
248  }
249  ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_buff);
250  ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_buff);
251  ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_buff);
252  ret |= clSetKernelArg(kernel, 3, sizeof(cl_int), (void *)&len);
253  if (ret != CL_SUCCESS) {
254  printf("Failed to set kernel arguments %d\n", (int)ret);
255  goto error;
256  }
257 
258  /* Execute OpenCL Kernel */
259  // executed using a single work-item
260  // ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
261 
262  size_t global_work_size, local_work_size;
263  // Number of work items in each local work group
264  local_work_size = len;
265  // Number of total work items - localSize must be devisor
266  global_work_size =
267  (size_t)ceil(len / (float)local_work_size) * local_work_size;
268 
269  // size_t local_work_size[2] = { 8, 8 };
270  // size_t global_work_size[2] = { 1, len };
271  ret = clEnqueueNDRangeKernel(command_queue,
272  kernel,
273  1,
274  NULL,
275  &global_work_size,
276  &local_work_size,
277  0,
278  NULL,
279  NULL);
280  if (ret != CL_SUCCESS) {
281  printf("Failed to execute kernel for execution %d\n", (int)ret);
282  goto error;
283  }
284 
285  init_vec(c_d, len, 0);
286  /* Copy results from the memory buffer */
287  ret = clEnqueueReadBuffer(command_queue,
288  c_buff,
289  CL_TRUE,
290  0,
291  data_size,
292  (void *)c_d,
293  0,
294  NULL,
295  NULL);
296  if (ret != CL_SUCCESS) {
297  printf("Failed to copy data from device to host %d\n", (int)ret);
298  goto error;
299  }
300 
301  /* Display Result */
302  PRINT_LINE("CHECK RESULT cpu-verison && gpu-version");
303  printf("c_d: ");
304  print_vec(c_d, len);
305  check_result(c, c_d, len);
306  printf("len-1=%d, c_d[%d]==c[%d]: %d, c_d[%d]=%d, c[%d]=%d \n",
307  len - 1,
308  len - 1,
309  len - 1,
310  c_d[len - 1] == c[len - 1],
311  len - 1,
312  c_d[len - 1],
313  len - 1,
314  c[len - 1]);
315 
316  PRINT_LINE("CHECK RESULT ELEMENT BY ELEMENT");
317  printf("idx c c_d\n");
318  for (int i = 0; i < len; i++) {
319  printf("%2d %2d %2d \n", i, c[i], c_d[i]);
320  }
321 
322  /* Finalization */
323 error:
324 
325  /* free device resources */
326  clFlush(command_queue);
327  clFinish(command_queue);
328  clReleaseKernel(kernel);
329  clReleaseProgram(program);
330 
331  clReleaseMemObject(a_buff);
332  clReleaseMemObject(b_buff);
333  clReleaseMemObject(c_buff);
334 
335  clReleaseCommandQueue(command_queue);
336  clReleaseContext(context);
337 
338  /* free host resources */
339  free(source_str);
340  free(a);
341  free(b);
342  free(c);
343 }
void rand_vec(int *vec, int len)
Definition: _gpu_mtx_add.c:65
#define MAX_SOURCE_SIZE
Definition: _gpu_mtx_add.c:53
#define MEM_SIZE
Definition: _gpu_mtx_add.c:52
#define PRINT_LINE(title)
Definition: _gpu_mtx_add.c:54
void add_vec_cpu(const int *a, const int *b, int *res, const int len)
Definition: _gpu_mtx_add.c:72
void init_vec(int *vec, int len, int set_one_flag)
GPU kernel acceleration utility/helper functions.
Definition: _gpu_mtx_add.c:56
void print_vec(int *vec, int len)
Definition: _gpu_mtx_add.c:78
void check_result(int *v1, int *v2, int len)
Definition: _gpu_mtx_add.c:85

References add_vec_cpu(), check_result(), init_vec(), MAX_SOURCE_SIZE, MEM_SIZE, PRINT_LINE, print_vec(), and rand_vec().

Referenced by main().

◆ main()

int main ( void  )

Definition at line 7 of file _gpu_mtx_add_wrapper.cpp.

7  {
9  return 0;
10 }
int accl_mtx_exec()
Definition: _gpu_mtx_add.c:98

References accl_mtx_exec().