1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25
26 const char *wg_reduce_min_kernel_code_int =
27 "__kernel void test_wg_reduce_min_int(global int *input, global int *output)\n"
28 "{\n"
29 " int tid = get_global_id(0);\n"
30 "\n"
31 " int result = work_group_reduce_min(input[tid]);\n"
32 " output[tid] = result;\n"
33 "}\n";
34
35
36 const char *wg_reduce_min_kernel_code_uint =
37 "__kernel void test_wg_reduce_min_uint(global uint *input, global uint *output)\n"
38 "{\n"
39 " int tid = get_global_id(0);\n"
40 "\n"
41 " uint result = work_group_reduce_min(input[tid]);\n"
42 " output[tid] = result;\n"
43 "}\n";
44
45 const char *wg_reduce_min_kernel_code_long =
46 "__kernel void test_wg_reduce_min_long(global long *input, global long *output)\n"
47 "{\n"
48 " int tid = get_global_id(0);\n"
49 "\n"
50 " long result = work_group_reduce_min(input[tid]);\n"
51 " output[tid] = result;\n"
52 "}\n";
53
54
55 const char *wg_reduce_min_kernel_code_ulong =
56 "__kernel void test_wg_reduce_min_ulong(global ulong *input, global ulong *output)\n"
57 "{\n"
58 " int tid = get_global_id(0);\n"
59 "\n"
60 " ulong result = work_group_reduce_min(input[tid]);\n"
61 " output[tid] = result;\n"
62 "}\n";
63
64
65 static int
verify_wg_reduce_min_int(int * inptr,int * outptr,size_t n,size_t wg_size)66 verify_wg_reduce_min_int(int *inptr, int *outptr, size_t n, size_t wg_size)
67 {
68 size_t i, j;
69
70 for (i=0; i<n; i+=wg_size)
71 {
72 int min = CL_INT_MAX;
73 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
74 min = (min < inptr[i+j]) ? min : inptr[i+j];
75
76 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
77 {
78 if ( min != outptr[i+j] )
79 {
80 log_info("work_group_reduce_min int: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
81 return -1;
82 }
83 }
84 }
85
86 return 0;
87 }
88
89 static int
verify_wg_reduce_min_uint(unsigned int * inptr,unsigned int * outptr,size_t n,size_t wg_size)90 verify_wg_reduce_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
91 {
92 size_t i, j;
93
94 for (i=0; i<n; i+=wg_size)
95 {
96 unsigned int min = CL_UINT_MAX;
97 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
98 min = (min < inptr[i+j]) ? min : inptr[i+j];
99
100 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
101 {
102 if ( min != outptr[i+j] )
103 {
104 log_info("work_group_reduce_min uint: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
105 return -1;
106 }
107 }
108 }
109
110 return 0;
111 }
112
113 static int
verify_wg_reduce_min_long(cl_long * inptr,cl_long * outptr,size_t n,size_t wg_size)114 verify_wg_reduce_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
115 {
116 size_t i, j;
117
118 for (i=0; i<n; i+=wg_size)
119 {
120 cl_long min = CL_ULONG_MAX;
121 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
122 min = (min < inptr[i+j]) ? min : inptr[i+j];
123
124 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
125 {
126 if ( min != outptr[i+j] )
127 {
128 log_info("work_group_reduce_min long: Error at %u: expected = %lld, got = %lld\n", i+j, min, outptr[i+j]);
129 return -1;
130 }
131 }
132 }
133
134 return 0;
135 }
136
137 static int
verify_wg_reduce_min_ulong(cl_ulong * inptr,cl_ulong * outptr,size_t n,size_t wg_size)138 verify_wg_reduce_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
139 {
140 size_t i, j;
141
142 for (i=0; i<n; i+=wg_size)
143 {
144 cl_ulong min = CL_ULONG_MAX;
145 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
146 min = (min < inptr[i+j]) ? min : inptr[i+j];
147
148 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
149 {
150 if ( min != outptr[i+j] )
151 {
152 log_info("work_group_reduce_min ulong: Error at %u: expected = %llu, got = %llu\n", i+j, min, outptr[i+j]);
153 return -1;
154 }
155 }
156 }
157
158 return 0;
159 }
160
161
162
163 int
test_work_group_reduce_min_int(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)164 test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
165 {
166 cl_mem streams[2];
167 cl_int *input_ptr[1], *p;
168 cl_int *output_ptr;
169 cl_program program;
170 cl_kernel kernel;
171 void *values[2];
172 size_t threads[1];
173 size_t wg_size[1];
174 size_t wg_sizes_per_dimension[3];
175 size_t num_elements;
176 int err;
177 int i;
178 MTdata d;
179
180 err = create_single_kernel_helper(context, &program, &kernel, 1,
181 &wg_reduce_min_kernel_code_int,
182 "test_wg_reduce_min_int");
183 if (err)
184 return -1;
185
186 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
187 if (err)
188 return -1;
189
190 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
191 if (err)
192 return -1;
193 if(wg_sizes_per_dimension[0] < wg_size[0])
194 {
195 wg_size[0] = wg_sizes_per_dimension[0];
196 }
197
198 num_elements = n_elems;
199
200 input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
201 output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
202 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
203 sizeof(cl_int) * num_elements, NULL, NULL);
204 if (!streams[0])
205 {
206 log_error("clCreateBuffer failed\n");
207 return -1;
208 }
209
210 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
211 sizeof(cl_int) * num_elements, NULL, NULL);
212 if (!streams[1])
213 {
214 log_error("clCreateBuffer failed\n");
215 return -1;
216 }
217
218 p = input_ptr[0];
219 d = init_genrand( gRandomSeed );
220 for (i=0; i<num_elements; i++)
221 p[i] = genrand_int32(d);
222 free_mtdata(d); d = NULL;
223
224 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
225 if (err != CL_SUCCESS)
226 {
227 log_error("clWriteArray failed\n");
228 return -1;
229 }
230
231 values[0] = streams[0];
232 values[1] = streams[1];
233 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
234 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
235 if (err != CL_SUCCESS)
236 {
237 log_error("clSetKernelArgs failed\n");
238 return -1;
239 }
240
241 // Line below is troublesome...
242 threads[0] = (size_t)num_elements;
243 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
244 if (err != CL_SUCCESS)
245 {
246 log_error("clEnqueueNDRangeKernel failed\n");
247 return -1;
248 }
249
250 cl_uint dead = 0xdeaddead;
251 memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
252 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
253 if (err != CL_SUCCESS)
254 {
255 log_error("clEnqueueReadBuffer failed\n");
256 return -1;
257 }
258
259 if (verify_wg_reduce_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
260 {
261 log_error("work_group_reduce_min int failed\n");
262 return -1;
263 }
264 log_info("work_group_reduce_min int passed\n");
265
266 clReleaseMemObject(streams[0]);
267 clReleaseMemObject(streams[1]);
268 clReleaseKernel(kernel);
269 clReleaseProgram(program);
270 free(input_ptr[0]);
271 free(output_ptr);
272
273 return err;
274 }
275
276
277 int
test_work_group_reduce_min_uint(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)278 test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
279 {
280 cl_mem streams[2];
281 cl_uint *input_ptr[1], *p;
282 cl_uint *output_ptr;
283 cl_program program;
284 cl_kernel kernel;
285 void *values[2];
286 size_t threads[1];
287 size_t wg_size[1];
288 size_t wg_sizes_per_dimension[3];
289 size_t num_elements;
290 int err;
291 int i;
292 MTdata d;
293
294 err = create_single_kernel_helper(context, &program, &kernel, 1,
295 &wg_reduce_min_kernel_code_uint,
296 "test_wg_reduce_min_uint");
297 if (err)
298 return -1;
299
300 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
301 if (err)
302 return -1;
303
304 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
305 if (err)
306 return -1;
307 if(wg_sizes_per_dimension[0] < wg_size[0])
308 {
309 wg_size[0] = wg_sizes_per_dimension[0];
310 }
311
312 num_elements = n_elems;
313
314 input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
315 output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
316 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
317 sizeof(cl_uint) * num_elements, NULL, NULL);
318 if (!streams[0])
319 {
320 log_error("clCreateBuffer failed\n");
321 return -1;
322 }
323
324 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
325 sizeof(cl_uint) * num_elements, NULL, NULL);
326 if (!streams[1])
327 {
328 log_error("clCreateBuffer failed\n");
329 return -1;
330 }
331
332 p = input_ptr[0];
333 d = init_genrand( gRandomSeed );
334 for (i=0; i<num_elements; i++)
335 p[i] = genrand_int32(d);
336 free_mtdata(d); d = NULL;
337
338 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
339 if (err != CL_SUCCESS)
340 {
341 log_error("clWriteArray failed\n");
342 return -1;
343 }
344
345 values[0] = streams[0];
346 values[1] = streams[1];
347 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
348 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
349 if (err != CL_SUCCESS)
350 {
351 log_error("clSetKernelArgs failed\n");
352 return -1;
353 }
354
355 // Line below is troublesome...
356 threads[0] = (size_t)n_elems;
357 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
358 if (err != CL_SUCCESS)
359 {
360 log_error("clEnqueueNDRangeKernel failed\n");
361 return -1;
362 }
363
364 cl_uint dead = 0xdeaddead;
365 memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
366 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
367 if (err != CL_SUCCESS)
368 {
369 log_error("clEnqueueReadBuffer failed\n");
370 return -1;
371 }
372
373 if (verify_wg_reduce_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
374 {
375 log_error("work_group_reduce_min uint failed\n");
376 return -1;
377 }
378 log_info("work_group_reduce_min uint passed\n");
379
380 clReleaseMemObject(streams[0]);
381 clReleaseMemObject(streams[1]);
382 clReleaseKernel(kernel);
383 clReleaseProgram(program);
384 free(input_ptr[0]);
385 free(output_ptr);
386
387 return err;
388 }
389
390 int
test_work_group_reduce_min_long(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)391 test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
392 {
393 cl_mem streams[2];
394 cl_long *input_ptr[1], *p;
395 cl_long *output_ptr;
396 cl_program program;
397 cl_kernel kernel;
398 void *values[2];
399 size_t threads[1];
400 size_t wg_size[1];
401 size_t wg_sizes_per_dimension[3];
402 size_t num_elements;
403 int err;
404 int i;
405 MTdata d;
406
407 err = create_single_kernel_helper(context, &program, &kernel, 1,
408 &wg_reduce_min_kernel_code_long,
409 "test_wg_reduce_min_long");
410 if (err)
411 return -1;
412
413 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
414 if (err)
415 return -1;
416
417 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
418 if (err)
419 return -1;
420 if(wg_sizes_per_dimension[0] < wg_size[0])
421 {
422 wg_size[0] = wg_sizes_per_dimension[0];
423 }
424
425 num_elements = n_elems;
426
427 input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
428 output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
429 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
430 sizeof(cl_long) * num_elements, NULL, NULL);
431 if (!streams[0])
432 {
433 log_error("clCreateBuffer failed\n");
434 return -1;
435 }
436
437 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
438 sizeof(cl_long) * num_elements, NULL, NULL);
439 if (!streams[1])
440 {
441 log_error("clCreateBuffer failed\n");
442 return -1;
443 }
444
445 p = input_ptr[0];
446 d = init_genrand( gRandomSeed );
447 for (i=0; i<num_elements; i++)
448 p[i] = genrand_int64(d);
449 free_mtdata(d); d = NULL;
450
451 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
452 if (err != CL_SUCCESS)
453 {
454 log_error("clWriteArray failed\n");
455 return -1;
456 }
457
458 values[0] = streams[0];
459 values[1] = streams[1];
460 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
461 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
462 if (err != CL_SUCCESS)
463 {
464 log_error("clSetKernelArgs failed\n");
465 return -1;
466 }
467
468 // Line below is troublesome...
469 threads[0] = (size_t)n_elems;
470 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
471 if (err != CL_SUCCESS)
472 {
473 log_error("clEnqueueNDRangeKernel failed\n");
474 return -1;
475 }
476
477 cl_uint dead = 0xdeaddead;
478 memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
479 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
480 if (err != CL_SUCCESS)
481 {
482 log_error("clEnqueueReadBuffer failed\n");
483 return -1;
484 }
485
486 if (verify_wg_reduce_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
487 {
488 log_error("work_group_reduce_min long failed\n");
489 return -1;
490 }
491 log_info("work_group_reduce_min long passed\n");
492
493 clReleaseMemObject(streams[0]);
494 clReleaseMemObject(streams[1]);
495 clReleaseKernel(kernel);
496 clReleaseProgram(program);
497 free(input_ptr[0]);
498 free(output_ptr);
499
500 return err;
501 }
502
503
504 int
test_work_group_reduce_min_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)505 test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
506 {
507 cl_mem streams[2];
508 cl_ulong *input_ptr[1], *p;
509 cl_ulong *output_ptr;
510 cl_program program;
511 cl_kernel kernel;
512 void *values[2];
513 size_t threads[1];
514 size_t wg_size[1];
515 size_t wg_sizes_per_dimension[3];
516 size_t num_elements;
517 int err;
518 int i;
519 MTdata d;
520
521 err = create_single_kernel_helper(context, &program, &kernel, 1,
522 &wg_reduce_min_kernel_code_ulong,
523 "test_wg_reduce_min_ulong");
524 if (err)
525 return -1;
526
527 err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
528 if (err)
529 return -1;
530
531 err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
532 if (err)
533 return -1;
534 if(wg_sizes_per_dimension[0] < wg_size[0])
535 {
536 wg_size[0] = wg_sizes_per_dimension[0];
537 }
538
539 num_elements = n_elems;
540
541 input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
542 output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
543 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
544 sizeof(cl_ulong) * num_elements, NULL, NULL);
545 if (!streams[0])
546 {
547 log_error("clCreateBuffer failed\n");
548 return -1;
549 }
550
551 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
552 sizeof(cl_ulong) * num_elements, NULL, NULL);
553 if (!streams[1])
554 {
555 log_error("clCreateBuffer failed\n");
556 return -1;
557 }
558
559 p = input_ptr[0];
560 d = init_genrand( gRandomSeed );
561 for (i=0; i<num_elements; i++)
562 p[i] = genrand_int64(d);
563 free_mtdata(d); d = NULL;
564
565 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
566 if (err != CL_SUCCESS)
567 {
568 log_error("clWriteArray failed\n");
569 return -1;
570 }
571
572 values[0] = streams[0];
573 values[1] = streams[1];
574 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
575 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
576 if (err != CL_SUCCESS)
577 {
578 log_error("clSetKernelArgs failed\n");
579 return -1;
580 }
581
582 // Line below is troublesome...
583 threads[0] = (size_t)n_elems;
584 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
585 if (err != CL_SUCCESS)
586 {
587 log_error("clEnqueueNDRangeKernel failed\n");
588 return -1;
589 }
590
591 cl_uint dead = 0xdeaddead;
592 memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
593 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
594 if (err != CL_SUCCESS)
595 {
596 log_error("clEnqueueReadBuffer failed\n");
597 return -1;
598 }
599
600 if (verify_wg_reduce_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
601 {
602 log_error("work_group_reduce_min ulong failed\n");
603 return -1;
604 }
605 log_info("work_group_reduce_min ulong passed\n");
606
607 clReleaseMemObject(streams[0]);
608 clReleaseMemObject(streams[1]);
609 clReleaseKernel(kernel);
610 clReleaseProgram(program);
611 free(input_ptr[0]);
612 free(output_ptr);
613
614 return err;
615 }
616
617
618 int
test_work_group_reduce_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)619 test_work_group_reduce_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
620 {
621 int err;
622
623 err = test_work_group_reduce_min_int(device, context, queue, n_elems);
624 if (err) return err;
625 err = test_work_group_reduce_min_uint(device, context, queue, n_elems);
626 if (err) return err;
627 err = test_work_group_reduce_min_long(device, context, queue, n_elems);
628 if (err) return err;
629 err = test_work_group_reduce_min_ulong(device, context, queue, n_elems);
630 return err;
631 }
632
633