• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 
26 const char *wg_reduce_min_kernel_code_int =
27 "__kernel void test_wg_reduce_min_int(global int *input, global int *output)\n"
28 "{\n"
29 "    int  tid = get_global_id(0);\n"
30 "\n"
31 "    int result = work_group_reduce_min(input[tid]);\n"
32 "    output[tid] = result;\n"
33 "}\n";
34 
35 
36 const char *wg_reduce_min_kernel_code_uint =
37 "__kernel void test_wg_reduce_min_uint(global uint *input, global uint *output)\n"
38 "{\n"
39 "    int  tid = get_global_id(0);\n"
40 "\n"
41 "    uint result = work_group_reduce_min(input[tid]);\n"
42 "    output[tid] = result;\n"
43 "}\n";
44 
45 const char *wg_reduce_min_kernel_code_long =
46 "__kernel void test_wg_reduce_min_long(global long *input, global long *output)\n"
47 "{\n"
48 "    int  tid = get_global_id(0);\n"
49 "\n"
50 "    long result = work_group_reduce_min(input[tid]);\n"
51 "    output[tid] = result;\n"
52 "}\n";
53 
54 
55 const char *wg_reduce_min_kernel_code_ulong =
56 "__kernel void test_wg_reduce_min_ulong(global ulong *input, global ulong *output)\n"
57 "{\n"
58 "    int  tid = get_global_id(0);\n"
59 "\n"
60 "    ulong result = work_group_reduce_min(input[tid]);\n"
61 "    output[tid] = result;\n"
62 "}\n";
63 
64 
65 static int
verify_wg_reduce_min_int(int * inptr,int * outptr,size_t n,size_t wg_size)66 verify_wg_reduce_min_int(int *inptr, int *outptr, size_t n, size_t wg_size)
67 {
68     size_t     i, j;
69 
70     for (i=0; i<n; i+=wg_size)
71     {
72         int min = CL_INT_MAX;
73         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
74             min = (min < inptr[i+j]) ? min : inptr[i+j];
75 
76         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
77         {
78             if ( min != outptr[i+j] )
79             {
80                 log_info("work_group_reduce_min int: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
81                 return -1;
82             }
83         }
84     }
85 
86     return 0;
87 }
88 
89 static int
verify_wg_reduce_min_uint(unsigned int * inptr,unsigned int * outptr,size_t n,size_t wg_size)90 verify_wg_reduce_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
91 {
92     size_t     i, j;
93 
94     for (i=0; i<n; i+=wg_size)
95     {
96         unsigned int min = CL_UINT_MAX;
97         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
98             min = (min < inptr[i+j]) ? min : inptr[i+j];
99 
100         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
101         {
102             if ( min != outptr[i+j] )
103             {
104                 log_info("work_group_reduce_min uint: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
105                 return -1;
106             }
107         }
108     }
109 
110     return 0;
111 }
112 
113 static int
verify_wg_reduce_min_long(cl_long * inptr,cl_long * outptr,size_t n,size_t wg_size)114 verify_wg_reduce_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
115 {
116     size_t     i, j;
117 
118     for (i=0; i<n; i+=wg_size)
119     {
120         cl_long min = CL_ULONG_MAX;
121         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
122             min = (min < inptr[i+j]) ? min : inptr[i+j];
123 
124         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
125         {
126             if ( min != outptr[i+j] )
127             {
128                 log_info("work_group_reduce_min long: Error at %u: expected = %lld, got = %lld\n", i+j, min, outptr[i+j]);
129                 return -1;
130             }
131         }
132     }
133 
134     return 0;
135 }
136 
137 static int
verify_wg_reduce_min_ulong(cl_ulong * inptr,cl_ulong * outptr,size_t n,size_t wg_size)138 verify_wg_reduce_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
139 {
140     size_t     i, j;
141 
142     for (i=0; i<n; i+=wg_size)
143     {
144         cl_ulong min = CL_ULONG_MAX;
145         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
146             min = (min < inptr[i+j]) ? min : inptr[i+j];
147 
148         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
149         {
150             if ( min != outptr[i+j] )
151             {
152                 log_info("work_group_reduce_min ulong: Error at %u: expected = %llu, got = %llu\n", i+j, min, outptr[i+j]);
153                 return -1;
154             }
155         }
156     }
157 
158     return 0;
159 }
160 
161 
162 
163 int
test_work_group_reduce_min_int(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)164 test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
165 {
166     cl_mem       streams[2];
167     cl_int       *input_ptr[1], *p;
168     cl_int       *output_ptr;
169     cl_program   program;
170     cl_kernel    kernel;
171     void         *values[2];
172     size_t       threads[1];
173     size_t       wg_size[1];
174     size_t       wg_sizes_per_dimension[3];
175     size_t       num_elements;
176     int          err;
177     int          i;
178     MTdata       d;
179 
180     err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_int, "test_wg_reduce_min_int", "-cl-std=CL2.0" );
181     if (err)
182         return -1;
183 
184     err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
185     if (err)
186         return -1;
187 
188     err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
189     if (err)
190         return -1;
191     if(wg_sizes_per_dimension[0] < wg_size[0])
192     {
193         wg_size[0] = wg_sizes_per_dimension[0];
194     }
195 
196     num_elements = n_elems;
197 
198     input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
199     output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
200     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
201     if (!streams[0])
202     {
203         log_error("clCreateBuffer failed\n");
204         return -1;
205     }
206 
207     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
208     if (!streams[1])
209     {
210         log_error("clCreateBuffer failed\n");
211         return -1;
212     }
213 
214     p = input_ptr[0];
215     d = init_genrand( gRandomSeed );
216     for (i=0; i<num_elements; i++)
217         p[i] = genrand_int32(d);
218     free_mtdata(d); d = NULL;
219 
220     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
221     if (err != CL_SUCCESS)
222     {
223         log_error("clWriteArray failed\n");
224         return -1;
225     }
226 
227     values[0] = streams[0];
228     values[1] = streams[1];
229     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
230     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
231     if (err != CL_SUCCESS)
232     {
233         log_error("clSetKernelArgs failed\n");
234         return -1;
235     }
236 
237     // Line below is troublesome...
238     threads[0] = (size_t)num_elements;
239     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
240     if (err != CL_SUCCESS)
241     {
242         log_error("clEnqueueNDRangeKernel failed\n");
243         return -1;
244     }
245 
246     cl_uint dead = 0xdeaddead;
247     memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
248     err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
249     if (err != CL_SUCCESS)
250     {
251         log_error("clEnqueueReadBuffer failed\n");
252         return -1;
253     }
254 
255     if (verify_wg_reduce_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
256     {
257         log_error("work_group_reduce_min int failed\n");
258         return -1;
259     }
260     log_info("work_group_reduce_min int passed\n");
261 
262     clReleaseMemObject(streams[0]);
263     clReleaseMemObject(streams[1]);
264     clReleaseKernel(kernel);
265     clReleaseProgram(program);
266     free(input_ptr[0]);
267     free(output_ptr);
268 
269     return err;
270 }
271 
272 
273 int
test_work_group_reduce_min_uint(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)274 test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
275 {
276     cl_mem       streams[2];
277     cl_uint      *input_ptr[1], *p;
278     cl_uint      *output_ptr;
279     cl_program   program;
280     cl_kernel    kernel;
281     void         *values[2];
282     size_t       threads[1];
283     size_t       wg_size[1];
284     size_t       wg_sizes_per_dimension[3];
285     size_t       num_elements;
286     int          err;
287     int          i;
288     MTdata       d;
289 
290     err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_uint, "test_wg_reduce_min_uint", "-cl-std=CL2.0" );
291     if (err)
292         return -1;
293 
294     err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
295     if (err)
296         return -1;
297 
298     err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
299     if (err)
300         return -1;
301     if(wg_sizes_per_dimension[0] < wg_size[0])
302     {
303         wg_size[0] = wg_sizes_per_dimension[0];
304     }
305 
306     num_elements = n_elems;
307 
308     input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
309     output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
310     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
311     if (!streams[0])
312     {
313         log_error("clCreateBuffer failed\n");
314         return -1;
315     }
316 
317     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
318     if (!streams[1])
319     {
320         log_error("clCreateBuffer failed\n");
321         return -1;
322     }
323 
324     p = input_ptr[0];
325     d = init_genrand( gRandomSeed );
326     for (i=0; i<num_elements; i++)
327         p[i] = genrand_int32(d);
328     free_mtdata(d); d = NULL;
329 
330     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
331     if (err != CL_SUCCESS)
332     {
333         log_error("clWriteArray failed\n");
334         return -1;
335     }
336 
337     values[0] = streams[0];
338     values[1] = streams[1];
339     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
340     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
341     if (err != CL_SUCCESS)
342     {
343         log_error("clSetKernelArgs failed\n");
344         return -1;
345     }
346 
347     // Line below is troublesome...
348     threads[0] = (size_t)n_elems;
349     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
350     if (err != CL_SUCCESS)
351     {
352         log_error("clEnqueueNDRangeKernel failed\n");
353         return -1;
354     }
355 
356     cl_uint dead = 0xdeaddead;
357     memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
358     err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
359     if (err != CL_SUCCESS)
360     {
361         log_error("clEnqueueReadBuffer failed\n");
362         return -1;
363     }
364 
365     if (verify_wg_reduce_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
366     {
367         log_error("work_group_reduce_min uint failed\n");
368         return -1;
369     }
370     log_info("work_group_reduce_min uint passed\n");
371 
372     clReleaseMemObject(streams[0]);
373     clReleaseMemObject(streams[1]);
374     clReleaseKernel(kernel);
375     clReleaseProgram(program);
376     free(input_ptr[0]);
377     free(output_ptr);
378 
379     return err;
380 }
381 
382 int
test_work_group_reduce_min_long(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)383 test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
384 {
385     cl_mem       streams[2];
386     cl_long      *input_ptr[1], *p;
387     cl_long      *output_ptr;
388     cl_program   program;
389     cl_kernel    kernel;
390     void         *values[2];
391     size_t       threads[1];
392     size_t       wg_size[1];
393     size_t       wg_sizes_per_dimension[3];
394     size_t       num_elements;
395     int          err;
396     int          i;
397     MTdata       d;
398 
399     err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_long, "test_wg_reduce_min_long", "-cl-std=CL2.0" );
400     if (err)
401         return -1;
402 
403     err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
404     if (err)
405         return -1;
406 
407     err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
408     if (err)
409         return -1;
410     if(wg_sizes_per_dimension[0] < wg_size[0])
411     {
412         wg_size[0] = wg_sizes_per_dimension[0];
413     }
414 
415     num_elements = n_elems;
416 
417     input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
418     output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
419     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
420     if (!streams[0])
421     {
422         log_error("clCreateBuffer failed\n");
423         return -1;
424     }
425 
426     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
427     if (!streams[1])
428     {
429         log_error("clCreateBuffer failed\n");
430         return -1;
431     }
432 
433     p = input_ptr[0];
434     d = init_genrand( gRandomSeed );
435     for (i=0; i<num_elements; i++)
436         p[i] = genrand_int64(d);
437     free_mtdata(d); d = NULL;
438 
439     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
440     if (err != CL_SUCCESS)
441     {
442         log_error("clWriteArray failed\n");
443         return -1;
444     }
445 
446     values[0] = streams[0];
447     values[1] = streams[1];
448     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
449     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
450     if (err != CL_SUCCESS)
451     {
452         log_error("clSetKernelArgs failed\n");
453         return -1;
454     }
455 
456     // Line below is troublesome...
457     threads[0] = (size_t)n_elems;
458     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
459     if (err != CL_SUCCESS)
460     {
461         log_error("clEnqueueNDRangeKernel failed\n");
462         return -1;
463     }
464 
465     cl_uint dead = 0xdeaddead;
466     memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
467     err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
468     if (err != CL_SUCCESS)
469     {
470         log_error("clEnqueueReadBuffer failed\n");
471         return -1;
472     }
473 
474     if (verify_wg_reduce_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
475     {
476         log_error("work_group_reduce_min long failed\n");
477         return -1;
478     }
479     log_info("work_group_reduce_min long passed\n");
480 
481     clReleaseMemObject(streams[0]);
482     clReleaseMemObject(streams[1]);
483     clReleaseKernel(kernel);
484     clReleaseProgram(program);
485     free(input_ptr[0]);
486     free(output_ptr);
487 
488     return err;
489 }
490 
491 
492 int
test_work_group_reduce_min_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)493 test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
494 {
495     cl_mem       streams[2];
496     cl_ulong     *input_ptr[1], *p;
497     cl_ulong     *output_ptr;
498     cl_program   program;
499     cl_kernel    kernel;
500     void         *values[2];
501     size_t       threads[1];
502     size_t       wg_size[1];
503     size_t       wg_sizes_per_dimension[3];
504     size_t       num_elements;
505     int          err;
506     int          i;
507     MTdata       d;
508 
509     err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_ulong, "test_wg_reduce_min_ulong", "-cl-std=CL2.0" );
510     if (err)
511         return -1;
512 
513     err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
514     if (err)
515         return -1;
516 
517     err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
518     if (err)
519         return -1;
520     if(wg_sizes_per_dimension[0] < wg_size[0])
521     {
522         wg_size[0] = wg_sizes_per_dimension[0];
523     }
524 
525     num_elements = n_elems;
526 
527     input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
528     output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
529     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
530     if (!streams[0])
531     {
532         log_error("clCreateBuffer failed\n");
533         return -1;
534     }
535 
536     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
537     if (!streams[1])
538     {
539         log_error("clCreateBuffer failed\n");
540         return -1;
541     }
542 
543     p = input_ptr[0];
544     d = init_genrand( gRandomSeed );
545     for (i=0; i<num_elements; i++)
546         p[i] = genrand_int64(d);
547     free_mtdata(d); d = NULL;
548 
549     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
550     if (err != CL_SUCCESS)
551     {
552         log_error("clWriteArray failed\n");
553         return -1;
554     }
555 
556     values[0] = streams[0];
557     values[1] = streams[1];
558     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
559     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
560     if (err != CL_SUCCESS)
561     {
562         log_error("clSetKernelArgs failed\n");
563         return -1;
564     }
565 
566     // Line below is troublesome...
567     threads[0] = (size_t)n_elems;
568     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
569     if (err != CL_SUCCESS)
570     {
571         log_error("clEnqueueNDRangeKernel failed\n");
572         return -1;
573     }
574 
575     cl_uint dead = 0xdeaddead;
576     memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
577     err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
578     if (err != CL_SUCCESS)
579     {
580         log_error("clEnqueueReadBuffer failed\n");
581         return -1;
582     }
583 
584     if (verify_wg_reduce_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
585     {
586         log_error("work_group_reduce_min ulong failed\n");
587         return -1;
588     }
589     log_info("work_group_reduce_min ulong passed\n");
590 
591     clReleaseMemObject(streams[0]);
592     clReleaseMemObject(streams[1]);
593     clReleaseKernel(kernel);
594     clReleaseProgram(program);
595     free(input_ptr[0]);
596     free(output_ptr);
597 
598     return err;
599 }
600 
601 
602 int
test_work_group_reduce_min(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)603 test_work_group_reduce_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
604 {
605     int err;
606 
607     err = test_work_group_reduce_min_int(device, context, queue, n_elems);
608     if (err) return err;
609     err = test_work_group_reduce_min_uint(device, context, queue, n_elems);
610     if (err) return err;
611     err = test_work_group_reduce_min_long(device, context, queue, n_elems);
612     if (err) return err;
613     err = test_work_group_reduce_min_ulong(device, context, queue, n_elems);
614     return err;
615 }
616 
617