1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25
26 const char *wg_reduce_min_kernel_code_int =
27 "__kernel void test_wg_reduce_min_int(global int *input, global int *output)\n"
28 "{\n"
29 " int tid = get_global_id(0);\n"
30 "\n"
31 " int result = work_group_reduce_min(input[tid]);\n"
32 " output[tid] = result;\n"
33 "}\n";
34
35
36 const char *wg_reduce_min_kernel_code_uint =
37 "__kernel void test_wg_reduce_min_uint(global uint *input, global uint *output)\n"
38 "{\n"
39 " int tid = get_global_id(0);\n"
40 "\n"
41 " uint result = work_group_reduce_min(input[tid]);\n"
42 " output[tid] = result;\n"
43 "}\n";
44
45 const char *wg_reduce_min_kernel_code_long =
46 "__kernel void test_wg_reduce_min_long(global long *input, global long *output)\n"
47 "{\n"
48 " int tid = get_global_id(0);\n"
49 "\n"
50 " long result = work_group_reduce_min(input[tid]);\n"
51 " output[tid] = result;\n"
52 "}\n";
53
54
55 const char *wg_reduce_min_kernel_code_ulong =
56 "__kernel void test_wg_reduce_min_ulong(global ulong *input, global ulong *output)\n"
57 "{\n"
58 " int tid = get_global_id(0);\n"
59 "\n"
60 " ulong result = work_group_reduce_min(input[tid]);\n"
61 " output[tid] = result;\n"
62 "}\n";
63
64
65 static int
verify_wg_reduce_min_int(int * inptr,int * outptr,size_t n,size_t wg_size)66 verify_wg_reduce_min_int(int *inptr, int *outptr, size_t n, size_t wg_size)
67 {
68 size_t i, j;
69
70 for (i=0; i<n; i+=wg_size)
71 {
72 int min = CL_INT_MAX;
73 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
74 min = (min < inptr[i+j]) ? min : inptr[i+j];
75
76 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
77 {
78 if ( min != outptr[i+j] )
79 {
80 log_info("work_group_reduce_min int: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
81 return -1;
82 }
83 }
84 }
85
86 return 0;
87 }
88
89 static int
verify_wg_reduce_min_uint(unsigned int * inptr,unsigned int * outptr,size_t n,size_t wg_size)90 verify_wg_reduce_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
91 {
92 size_t i, j;
93
94 for (i=0; i<n; i+=wg_size)
95 {
96 unsigned int min = CL_UINT_MAX;
97 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
98 min = (min < inptr[i+j]) ? min : inptr[i+j];
99
100 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
101 {
102 if ( min != outptr[i+j] )
103 {
104 log_info("work_group_reduce_min uint: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
105 return -1;
106 }
107 }
108 }
109
110 return 0;
111 }
112
113 static int
verify_wg_reduce_min_long(cl_long * inptr,cl_long * outptr,size_t n,size_t wg_size)114 verify_wg_reduce_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
115 {
116 size_t i, j;
117
118 for (i=0; i<n; i+=wg_size)
119 {
120 cl_long min = CL_ULONG_MAX;
121 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
122 min = (min < inptr[i+j]) ? min : inptr[i+j];
123
124 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
125 {
126 if ( min != outptr[i+j] )
127 {
128 log_info("work_group_reduce_min long: Error at %u: expected = %lld, got = %lld\n", i+j, min, outptr[i+j]);
129 return -1;
130 }
131 }
132 }
133
134 return 0;
135 }
136
137 static int
verify_wg_reduce_min_ulong(cl_ulong * inptr,cl_ulong * outptr,size_t n,size_t wg_size)138 verify_wg_reduce_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
139 {
140 size_t i, j;
141
142 for (i=0; i<n; i+=wg_size)
143 {
144 cl_ulong min = CL_ULONG_MAX;
145 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
146 min = (min < inptr[i+j]) ? min : inptr[i+j];
147
148 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
149 {
150 if ( min != outptr[i+j] )
151 {
152 log_info("work_group_reduce_min ulong: Error at %u: expected = %llu, got = %llu\n", i+j, min, outptr[i+j]);
153 return -1;
154 }
155 }
156 }
157
158 return 0;
159 }
160
161
162
163 int
test_work_group_reduce_min_int(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)164 test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
165 {
166 cl_mem streams[2];
167 cl_int *input_ptr[1], *p;
168 cl_int *output_ptr;
169 cl_program program;
170 cl_kernel kernel;
171 void *values[2];
172 size_t threads[1];
173 size_t wg_size[1];
174 size_t wg_sizes_per_dimension[3];
175 size_t num_elements;
176 int err;
177 int i;
178 MTdata d;
179
180 err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_int, "test_wg_reduce_min_int", "-cl-std=CL2.0" );
181 if (err)
182 return -1;
183
184 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
185 if (err)
186 return -1;
187
188 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
189 if (err)
190 return -1;
191 if(wg_sizes_per_dimension[0] < wg_size[0])
192 {
193 wg_size[0] = wg_sizes_per_dimension[0];
194 }
195
196 num_elements = n_elems;
197
198 input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
199 output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
200 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL );
201 if (!streams[0])
202 {
203 log_error("clCreateBuffer failed\n");
204 return -1;
205 }
206
207 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL );
208 if (!streams[1])
209 {
210 log_error("clCreateBuffer failed\n");
211 return -1;
212 }
213
214 p = input_ptr[0];
215 d = init_genrand( gRandomSeed );
216 for (i=0; i<num_elements; i++)
217 p[i] = genrand_int32(d);
218 free_mtdata(d); d = NULL;
219
220 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
221 if (err != CL_SUCCESS)
222 {
223 log_error("clWriteArray failed\n");
224 return -1;
225 }
226
227 values[0] = streams[0];
228 values[1] = streams[1];
229 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
230 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
231 if (err != CL_SUCCESS)
232 {
233 log_error("clSetKernelArgs failed\n");
234 return -1;
235 }
236
237 // Line below is troublesome...
238 threads[0] = (size_t)num_elements;
239 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
240 if (err != CL_SUCCESS)
241 {
242 log_error("clEnqueueNDRangeKernel failed\n");
243 return -1;
244 }
245
246 cl_uint dead = 0xdeaddead;
247 memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
248 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
249 if (err != CL_SUCCESS)
250 {
251 log_error("clEnqueueReadBuffer failed\n");
252 return -1;
253 }
254
255 if (verify_wg_reduce_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
256 {
257 log_error("work_group_reduce_min int failed\n");
258 return -1;
259 }
260 log_info("work_group_reduce_min int passed\n");
261
262 clReleaseMemObject(streams[0]);
263 clReleaseMemObject(streams[1]);
264 clReleaseKernel(kernel);
265 clReleaseProgram(program);
266 free(input_ptr[0]);
267 free(output_ptr);
268
269 return err;
270 }
271
272
273 int
test_work_group_reduce_min_uint(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)274 test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
275 {
276 cl_mem streams[2];
277 cl_uint *input_ptr[1], *p;
278 cl_uint *output_ptr;
279 cl_program program;
280 cl_kernel kernel;
281 void *values[2];
282 size_t threads[1];
283 size_t wg_size[1];
284 size_t wg_sizes_per_dimension[3];
285 size_t num_elements;
286 int err;
287 int i;
288 MTdata d;
289
290 err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_uint, "test_wg_reduce_min_uint", "-cl-std=CL2.0" );
291 if (err)
292 return -1;
293
294 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
295 if (err)
296 return -1;
297
298 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
299 if (err)
300 return -1;
301 if(wg_sizes_per_dimension[0] < wg_size[0])
302 {
303 wg_size[0] = wg_sizes_per_dimension[0];
304 }
305
306 num_elements = n_elems;
307
308 input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
309 output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
310 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, NULL );
311 if (!streams[0])
312 {
313 log_error("clCreateBuffer failed\n");
314 return -1;
315 }
316
317 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, NULL );
318 if (!streams[1])
319 {
320 log_error("clCreateBuffer failed\n");
321 return -1;
322 }
323
324 p = input_ptr[0];
325 d = init_genrand( gRandomSeed );
326 for (i=0; i<num_elements; i++)
327 p[i] = genrand_int32(d);
328 free_mtdata(d); d = NULL;
329
330 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
331 if (err != CL_SUCCESS)
332 {
333 log_error("clWriteArray failed\n");
334 return -1;
335 }
336
337 values[0] = streams[0];
338 values[1] = streams[1];
339 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
340 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
341 if (err != CL_SUCCESS)
342 {
343 log_error("clSetKernelArgs failed\n");
344 return -1;
345 }
346
347 // Line below is troublesome...
348 threads[0] = (size_t)n_elems;
349 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
350 if (err != CL_SUCCESS)
351 {
352 log_error("clEnqueueNDRangeKernel failed\n");
353 return -1;
354 }
355
356 cl_uint dead = 0xdeaddead;
357 memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
358 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
359 if (err != CL_SUCCESS)
360 {
361 log_error("clEnqueueReadBuffer failed\n");
362 return -1;
363 }
364
365 if (verify_wg_reduce_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
366 {
367 log_error("work_group_reduce_min uint failed\n");
368 return -1;
369 }
370 log_info("work_group_reduce_min uint passed\n");
371
372 clReleaseMemObject(streams[0]);
373 clReleaseMemObject(streams[1]);
374 clReleaseKernel(kernel);
375 clReleaseProgram(program);
376 free(input_ptr[0]);
377 free(output_ptr);
378
379 return err;
380 }
381
382 int
test_work_group_reduce_min_long(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)383 test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
384 {
385 cl_mem streams[2];
386 cl_long *input_ptr[1], *p;
387 cl_long *output_ptr;
388 cl_program program;
389 cl_kernel kernel;
390 void *values[2];
391 size_t threads[1];
392 size_t wg_size[1];
393 size_t wg_sizes_per_dimension[3];
394 size_t num_elements;
395 int err;
396 int i;
397 MTdata d;
398
399 err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_long, "test_wg_reduce_min_long", "-cl-std=CL2.0" );
400 if (err)
401 return -1;
402
403 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
404 if (err)
405 return -1;
406
407 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
408 if (err)
409 return -1;
410 if(wg_sizes_per_dimension[0] < wg_size[0])
411 {
412 wg_size[0] = wg_sizes_per_dimension[0];
413 }
414
415 num_elements = n_elems;
416
417 input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
418 output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
419 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_long) * num_elements, NULL, NULL );
420 if (!streams[0])
421 {
422 log_error("clCreateBuffer failed\n");
423 return -1;
424 }
425
426 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_long) * num_elements, NULL, NULL );
427 if (!streams[1])
428 {
429 log_error("clCreateBuffer failed\n");
430 return -1;
431 }
432
433 p = input_ptr[0];
434 d = init_genrand( gRandomSeed );
435 for (i=0; i<num_elements; i++)
436 p[i] = genrand_int64(d);
437 free_mtdata(d); d = NULL;
438
439 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
440 if (err != CL_SUCCESS)
441 {
442 log_error("clWriteArray failed\n");
443 return -1;
444 }
445
446 values[0] = streams[0];
447 values[1] = streams[1];
448 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
449 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
450 if (err != CL_SUCCESS)
451 {
452 log_error("clSetKernelArgs failed\n");
453 return -1;
454 }
455
456 // Line below is troublesome...
457 threads[0] = (size_t)n_elems;
458 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
459 if (err != CL_SUCCESS)
460 {
461 log_error("clEnqueueNDRangeKernel failed\n");
462 return -1;
463 }
464
465 cl_uint dead = 0xdeaddead;
466 memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
467 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
468 if (err != CL_SUCCESS)
469 {
470 log_error("clEnqueueReadBuffer failed\n");
471 return -1;
472 }
473
474 if (verify_wg_reduce_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
475 {
476 log_error("work_group_reduce_min long failed\n");
477 return -1;
478 }
479 log_info("work_group_reduce_min long passed\n");
480
481 clReleaseMemObject(streams[0]);
482 clReleaseMemObject(streams[1]);
483 clReleaseKernel(kernel);
484 clReleaseProgram(program);
485 free(input_ptr[0]);
486 free(output_ptr);
487
488 return err;
489 }
490
491
492 int
test_work_group_reduce_min_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)493 test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
494 {
495 cl_mem streams[2];
496 cl_ulong *input_ptr[1], *p;
497 cl_ulong *output_ptr;
498 cl_program program;
499 cl_kernel kernel;
500 void *values[2];
501 size_t threads[1];
502 size_t wg_size[1];
503 size_t wg_sizes_per_dimension[3];
504 size_t num_elements;
505 int err;
506 int i;
507 MTdata d;
508
509 err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_ulong, "test_wg_reduce_min_ulong", "-cl-std=CL2.0" );
510 if (err)
511 return -1;
512
513 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
514 if (err)
515 return -1;
516
517 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
518 if (err)
519 return -1;
520 if(wg_sizes_per_dimension[0] < wg_size[0])
521 {
522 wg_size[0] = wg_sizes_per_dimension[0];
523 }
524
525 num_elements = n_elems;
526
527 input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
528 output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
529 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_ulong) * num_elements, NULL, NULL );
530 if (!streams[0])
531 {
532 log_error("clCreateBuffer failed\n");
533 return -1;
534 }
535
536 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_ulong) * num_elements, NULL, NULL );
537 if (!streams[1])
538 {
539 log_error("clCreateBuffer failed\n");
540 return -1;
541 }
542
543 p = input_ptr[0];
544 d = init_genrand( gRandomSeed );
545 for (i=0; i<num_elements; i++)
546 p[i] = genrand_int64(d);
547 free_mtdata(d); d = NULL;
548
549 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
550 if (err != CL_SUCCESS)
551 {
552 log_error("clWriteArray failed\n");
553 return -1;
554 }
555
556 values[0] = streams[0];
557 values[1] = streams[1];
558 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
559 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
560 if (err != CL_SUCCESS)
561 {
562 log_error("clSetKernelArgs failed\n");
563 return -1;
564 }
565
566 // Line below is troublesome...
567 threads[0] = (size_t)n_elems;
568 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
569 if (err != CL_SUCCESS)
570 {
571 log_error("clEnqueueNDRangeKernel failed\n");
572 return -1;
573 }
574
575 cl_uint dead = 0xdeaddead;
576 memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
577 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
578 if (err != CL_SUCCESS)
579 {
580 log_error("clEnqueueReadBuffer failed\n");
581 return -1;
582 }
583
584 if (verify_wg_reduce_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
585 {
586 log_error("work_group_reduce_min ulong failed\n");
587 return -1;
588 }
589 log_info("work_group_reduce_min ulong passed\n");
590
591 clReleaseMemObject(streams[0]);
592 clReleaseMemObject(streams[1]);
593 clReleaseKernel(kernel);
594 clReleaseProgram(program);
595 free(input_ptr[0]);
596 free(output_ptr);
597
598 return err;
599 }
600
601
602 int
test_work_group_reduce_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)603 test_work_group_reduce_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
604 {
605 int err;
606
607 err = test_work_group_reduce_min_int(device, context, queue, n_elems);
608 if (err) return err;
609 err = test_work_group_reduce_min_uint(device, context, queue, n_elems);
610 if (err) return err;
611 err = test_work_group_reduce_min_long(device, context, queue, n_elems);
612 if (err) return err;
613 err = test_work_group_reduce_min_ulong(device, context, queue, n_elems);
614 return err;
615 }
616
617