• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <iostream>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include <CL/cl_ext.h>
26 
27 /** @brief Gets the number of elements of type s in a fixed length array of s */
28 #define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
29 #define test_error_ret_and_free(errCode, msg, retValue, ptr)                   \
30     {                                                                          \
31         auto errCodeResult = errCode;                                          \
32         if (errCodeResult != CL_SUCCESS)                                       \
33         {                                                                      \
34             print_error(errCodeResult, msg);                                   \
35             free(ptr);                                                         \
36             return retValue;                                                   \
37         }                                                                      \
38     }
39 
40 const char* wg_scan_local_work_group_size = R"(
41     bool is_zero_linear_id()
42     {
43         size_t linear_id;
44 #if __OPENCL_VERSION__ < CL_VERSION_2_0
45         linear_id = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) * get_global_size(0)) +
46                     ((get_global_id(1) - get_global_offset(1)) * get_global_size(0)) +
47                     (get_global_id(0) - get_global_offset(0));
48 #else
49         linear_id = get_global_linear_id();
50 #endif
51         return linear_id == 0;
52     }
53 
54     uint get_l_size(size_t dim)
55     {
56 #if __OPENCL_VERSION__ < CL_VERSION_2_0
57         return get_local_size(dim);
58 #else
59         return get_enqueued_local_size(dim);
60 #endif
61     }
62 
63     __kernel void test_wg_scan_local_work_group_size(global uint *output)
64     {
65         if(!is_zero_linear_id()) return;
66         for (uint i = 0; i < 3; i++)
67         {
68             output[i] = get_l_size(i);
69         }
70     }
71     __kernel void test_wg_scan_local_work_group_size_static_local(
72                                             global uint *output)
73     {
74         __local char c[LOCAL_MEM_SIZE];
75 
76         if(!is_zero_linear_id()) return;
77         for (uint i = 0; i < 3; i++)
78         {
79             output[i] = get_l_size(i);
80         }
81     }
82     __kernel void test_wg_scan_local_work_group_size_dynlocal(
83                                         global uint *output,
84                                         __local char * c)
85     {
86         if(!is_zero_linear_id()) return;
87         for (uint i = 0; i < 3; i++)
88         {
89             output[i] = get_l_size(i);
90         }
91     };)";
92 
is_prime(size_t a)93 bool is_prime(size_t a)
94 {
95     size_t c;
96 
97     for (c = 2; c < a; c++)
98     {
99         if (a % c == 0) return false;
100     }
101     return true;
102 }
103 
is_not_prime(size_t a)104 bool is_not_prime(size_t a) { return !is_prime(a); }
105 
is_not_even(size_t a)106 bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); }
107 
is_not_odd(size_t a)108 bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); }
109 
110 #define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
111 /* The numbers we chose in the value_range are to be used for the second and
112    third dimension of the global work group size. The numbers below cover many
113    different cases: 1024 is a power of 2, 3 is an odd and small prime number, 12
114    is a multiple of 4 but not a power of 2, 1031 is a large odd and prime number
115    and 1 is to test the lack of this dimension if the others are present */
116 const size_t value_range[] = { 1024, 3, 12, 1031, 1 };
117 /* The value_range_nD contains numbers to be used for the experiments with 2D
118    and 3D global work sizes. This is because we need smaller numbers so that the
119    resulting number of work items is meaningful and does not become too large.
120    The cases here are: 64 that is a power of 2, 3 is an odd and small prime
121    number, 12 is a multiple of 4 but not a power of 2, 113 is a large prime
122    number
123    and 1 is to test the lack of this dimension if the others are present */
124 const size_t value_range_nD[] = { 64, 3, 12, 113, 1 };
125 const size_t basic_increment = 16;
126 const size_t primes_increment = 1;
127 enum num_dims
128 {
129     _1D = 1,
130     _2D = 2,
131     _3D = 3
132 };
133 
do_test(cl_device_id device,cl_context context,cl_command_queue queue,cl_kernel scan_kernel,int work_dim,size_t global_work_offset[3],size_t test_values[3],size_t dyn_mem_size)134 int do_test(cl_device_id device, cl_context context, cl_command_queue queue,
135             cl_kernel scan_kernel, int work_dim, size_t global_work_offset[3],
136             size_t test_values[3], size_t dyn_mem_size)
137 {
138     size_t local_work_size[] = { 1, 1, 1 };
139     size_t suggested_total_size;
140     size_t workgroupinfo_size;
141     cl_uint kernel_work_size[3] = { 0 };
142     clMemWrapper buffer;
143     cl_platform_id platform;
144 
145     int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
146                               &platform, NULL);
147     test_error_ret(err, "clGetDeviceInfo failed", -1);
148     clGetKernelSuggestedLocalWorkSizeKHR_fn
149         clGetKernelSuggestedLocalWorkSizeKHR =
150             (clGetKernelSuggestedLocalWorkSizeKHR_fn)
151                 clGetExtensionFunctionAddressForPlatform(
152                     platform, "clGetKernelSuggestedLocalWorkSizeKHR");
153 
154     if (clGetKernelSuggestedLocalWorkSizeKHR == NULL)
155     {
156         log_info("Extension 'cl_khr_suggested_local_work_size' could not be "
157                  "found.\n");
158         return TEST_FAIL;
159     }
160 
161     /* Create the actual buffer, using local_buffer as the host pointer, and ask
162      * to copy that into the buffer */
163     buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
164                             sizeof(kernel_work_size), NULL, &err);
165     test_error_ret(err, "clCreateBuffer failed", -1);
166     err = clSetKernelArg(scan_kernel, 0, sizeof(buffer), &buffer);
167     test_error_ret(err, "clSetKernelArg failed", -1);
168     if (dyn_mem_size)
169     {
170         err = clSetKernelArg(scan_kernel, 1, dyn_mem_size, NULL);
171         test_error_ret(err, "clSetKernelArg failed", -1);
172     }
173     err = clGetKernelSuggestedLocalWorkSizeKHR(queue, scan_kernel, work_dim,
174                                                global_work_offset, test_values,
175                                                local_work_size);
176     test_error_ret(err, "clGetKernelSuggestedLocalWorkSizeKHR failed", -1);
177     suggested_total_size =
178         local_work_size[0] * local_work_size[1] * local_work_size[2];
179     err = clGetKernelWorkGroupInfo(
180         scan_kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
181         sizeof(workgroupinfo_size), &workgroupinfo_size, NULL);
182     test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
183     if (suggested_total_size > workgroupinfo_size)
184     {
185         std::cout << "The suggested work group size consist of "
186                   << suggested_total_size << " work items.\n"
187                   << "Work items are limited by " << workgroupinfo_size
188                   << std::endl;
189         std::cout << "Size from clGetKernelWorkGroupInfo: "
190                   << workgroupinfo_size;
191         std::cout << "\nSize from clGetKernelSuggestedLocalWorkSizeKHR: "
192                   << local_work_size[0] * local_work_size[1]
193                 * local_work_size[2]
194                   << std::endl;
195         return -1;
196     }
197 
198     err =
199         clEnqueueNDRangeKernel(queue, scan_kernel, work_dim, global_work_offset,
200                                test_values, // global work size
201                                NULL, 0, NULL, NULL);
202     test_error_ret(err, "clEnqueueNDRangeKernel failed", -1);
203     err = clEnqueueReadBuffer(queue, buffer, CL_NON_BLOCKING, 0,
204                               sizeof(kernel_work_size), kernel_work_size, 0,
205                               NULL, NULL);
206     test_error_ret(err, "clEnqueueReadBuffer failed", -1);
207     err = clFinish(queue);
208     test_error_ret(err, "clFinish failed", -1);
209 
210     if (kernel_work_size[0] != local_work_size[0]
211         || kernel_work_size[1] != local_work_size[1]
212         || kernel_work_size[2] != local_work_size[2])
213     {
214         std::cout
215             << "Kernel work size differs from local work size suggested:\n"
216             << "Kernel work size: (" << kernel_work_size[0] << ", "
217             << kernel_work_size[1] << ", " << kernel_work_size[2] << ")"
218             << "Local work size: (" << local_work_size[0] << ", "
219             << local_work_size[1] << ", " << local_work_size[2] << ")\n";
220         return -1;
221     }
222     return err;
223 }
224 
do_test_work_group_suggested_local_size(cl_device_id device,cl_context context,cl_command_queue queue,bool (* skip_cond)(size_t),size_t start,size_t end,size_t incr,cl_long max_local_mem_size,size_t global_work_offset[],num_dims dim)225 int do_test_work_group_suggested_local_size(
226     cl_device_id device, cl_context context, cl_command_queue queue,
227     bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr,
228     cl_long max_local_mem_size, size_t global_work_offset[], num_dims dim)
229 {
230     clProgramWrapper scan_program;
231     clKernelWrapper scan_kernel;
232     int err;
233     size_t test_values[] = { 1, 1, 1 };
234     std::string kernel_names[6] = {
235         "test_wg_scan_local_work_group_size",
236         "test_wg_scan_local_work_group_size_static_local",
237         "test_wg_scan_local_work_group_size_static_local",
238         "test_wg_scan_local_work_group_size_static_local",
239         "test_wg_scan_local_work_group_size_static_local",
240         "test_wg_scan_local_work_group_size_dynlocal"
241     };
242     std::string str_local_mem_size[6] = {
243         "-DLOCAL_MEM_SIZE=1",     "-DLOCAL_MEM_SIZE=1024",
244         "-DLOCAL_MEM_SIZE=4096",  "-DLOCAL_MEM_SIZE=16384",
245         "-DLOCAL_MEM_SIZE=32768", "-DLOCAL_MEM_SIZE=1"
246     };
247     size_t local_mem_size[6] = { 1, 1024, 4096, 16384, 32768, 1 };
248     size_t dyn_mem_size[6] = { 0, 0, 0, 0, 0, 1024 };
249     cl_ulong kernel_local_mem_size;
250     for (int kernel_num = 0; kernel_num < 6; kernel_num++)
251     {
252         if (max_local_mem_size < local_mem_size[kernel_num]) continue;
253         // Create the kernel
254         err = create_single_kernel_helper(
255             context, &scan_program, &scan_kernel, 1,
256             &wg_scan_local_work_group_size, (kernel_names[kernel_num]).c_str(),
257             (str_local_mem_size[kernel_num]).c_str());
258         test_error_ret(err,
259                        ("create_single_kernel_helper failed for kernel "
260                         + kernel_names[kernel_num])
261                            .c_str(),
262                        -1);
263 
264         // Check if the local memory used by the kernel is going to exceed the
265         // max_local_mem_size
266         err = clGetKernelWorkGroupInfo(
267             scan_kernel, device, CL_KERNEL_LOCAL_MEM_SIZE,
268             sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
269         test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
270         if (kernel_local_mem_size > max_local_mem_size) continue;
271         // return error if no number is found due to the skip condition
272         err = -1;
273         unsigned int j = 0;
274         size_t num_elems = NELEMS(value_range);
275         for (size_t i = start; i < end; i += incr)
276         {
277             if (skip_cond(i)) continue;
278             err = 0;
279             test_values[0] = i;
280             if (dim == _2D) test_values[1] = value_range_nD[j++ % num_elems];
281             if (dim == _3D)
282             {
283                 test_values[1] = value_range_nD[j++ % num_elems];
284                 test_values[2] = value_range_nD[rand() % num_elems];
285             }
286             err |= do_test(device, context, queue, scan_kernel, dim,
287                            global_work_offset, test_values,
288                            dyn_mem_size[kernel_num]);
289             test_error_ret(
290                 err,
291                 ("do_test failed for kernel " + kernel_names[kernel_num])
292                     .c_str(),
293                 -1);
294         }
295     }
296     return err;
297 }
298 
test_work_group_suggested_local_size_1D(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)299 int test_work_group_suggested_local_size_1D(cl_device_id device,
300                                             cl_context context,
301                                             cl_command_queue queue, int n_elems)
302 {
303     if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
304     {
305         log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
306                  "Skipping the test.\n");
307         return TEST_SKIPPED_ITSELF;
308     }
309     cl_long max_local_mem_size;
310     cl_int err =
311         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
312                         sizeof(max_local_mem_size), &max_local_mem_size, NULL);
313     test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
314                    -1);
315 
316     size_t start, end, incr;
317     size_t global_work_offset[] = { 0, 0, 0 };
318     size_t max_work_items = 0;
319     clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
320                     sizeof(max_work_items), &max_work_items, NULL);
321 
322     // odds
323     start = 1;
324     end = max_work_items;
325     incr = basic_increment;
326     err = do_test_work_group_suggested_local_size(
327         device, context, queue, is_not_odd, start, end, incr,
328         max_local_mem_size, global_work_offset, _1D);
329     test_error_ret(
330         err, "test_work_group_suggested_local_size_1D for odds failed.", -1);
331     log_info("test_work_group_suggested_local_size_1D odds passed\n");
332 
333     // evens
334     start = 2;
335     end = max_work_items;
336     incr = basic_increment;
337     err = do_test_work_group_suggested_local_size(
338         device, context, queue, is_not_even, start, end, incr,
339         max_local_mem_size, global_work_offset, _1D);
340     test_error_ret(
341         err, "test_work_group_suggested_local_size_1D for evens failed.", -1);
342     log_info("test_work_group_suggested_local_size_1D evens passed\n");
343 
344     // primes
345     start = max_work_items + 1;
346     end = 2 * max_work_items;
347     incr = primes_increment;
348     err = do_test_work_group_suggested_local_size(
349         device, context, queue, is_not_prime, start, end, incr,
350         max_local_mem_size, global_work_offset, _1D);
351     test_error_ret(
352         err, "test_work_group_suggested_local_size_1D for primes failed.", -1);
353     log_info("test_work_group_suggested_local_size_1D primes passed\n");
354 
355     global_work_offset[0] = 10;
356     global_work_offset[1] = 10;
357     global_work_offset[2] = 10;
358     // odds
359     start = 1;
360     end = max_work_items;
361     incr = basic_increment;
362     err = do_test_work_group_suggested_local_size(
363         device, context, queue, is_not_odd, start, end, incr,
364         max_local_mem_size, global_work_offset, _1D);
365     test_error_ret(err,
366                    "test_work_group_suggested_local_size_1D for odds with "
367                    "global_work_offset failed.",
368                    -1);
369     log_info("test_work_group_suggested_local_size_1D odds with "
370              "global_work_offset passed\n");
371 
372     // evens
373     start = 2;
374     end = max_work_items;
375     incr = basic_increment;
376     err = do_test_work_group_suggested_local_size(
377         device, context, queue, is_not_even, start, end, incr,
378         max_local_mem_size, global_work_offset, _1D);
379     test_error_ret(err,
380                    "test_work_group_suggested_local_size_1D for evens with "
381                    "global_work_offset failed.",
382                    -1);
383     log_info("test_work_group_suggested_local_size_1D evens with "
384              "global_work_offset passed\n");
385 
386     // primes
387     start = max_work_items + 1;
388     end = 2 * max_work_items;
389     incr = primes_increment;
390     err = do_test_work_group_suggested_local_size(
391         device, context, queue, is_not_prime, start, end, incr,
392         max_local_mem_size, global_work_offset, _1D);
393     test_error_ret(err,
394                    "test_work_group_suggested_local_size_1D for primes with "
395                    "global_work_offset failed.",
396                    -1);
397     log_info("test_work_group_suggested_local_size_1D primes with "
398              "global_work_offset passed\n");
399 
400     return err;
401 }
402 
test_work_group_suggested_local_size_2D(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)403 int test_work_group_suggested_local_size_2D(cl_device_id device,
404                                             cl_context context,
405                                             cl_command_queue queue, int n_elems)
406 {
407     if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
408     {
409         log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
410                  "Skipping the test.\n");
411         return TEST_SKIPPED_ITSELF;
412     }
413     cl_long max_local_mem_size;
414     cl_int err =
415         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
416                         sizeof(max_local_mem_size), &max_local_mem_size, NULL);
417     test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
418                    -1);
419 
420     size_t start, end, incr;
421     size_t global_work_offset[] = { 0, 0, 0 };
422     size_t max_work_items = 0;
423     clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
424                     sizeof(max_work_items), &max_work_items, NULL);
425 
426     // odds
427     start = 1;
428     end = max_work_items;
429     incr = basic_increment;
430     err = do_test_work_group_suggested_local_size(
431         device, context, queue, is_not_odd, start, end, incr,
432         max_local_mem_size, global_work_offset, _2D);
433     test_error_ret(
434         err, "test_work_group_suggested_local_size_2D for odds failed.", -1);
435     log_info("test_work_group_suggested_local_size_2D odds passed\n");
436 
437     // evens
438     start = 2;
439     end = max_work_items;
440     incr = basic_increment;
441     err = do_test_work_group_suggested_local_size(
442         device, context, queue, is_not_even, start, end, incr,
443         max_local_mem_size, global_work_offset, _2D);
444     test_error_ret(
445         err, "test_work_group_suggested_local_size_2D for evens failed.", -1);
446     log_info("test_work_group_suggested_local_size_2D evens passed\n");
447 
448     // primes
449     start = max_work_items + 1;
450     end = max_work_items + max_work_items / 4;
451     incr = primes_increment;
452     err = do_test_work_group_suggested_local_size(
453         device, context, queue, is_not_prime, start, end, incr,
454         max_local_mem_size, global_work_offset, _2D);
455     test_error_ret(
456         err, "test_work_group_suggested_local_size_2D for primes failed.", -1);
457     log_info("test_work_group_suggested_local_size_2D primes passed\n");
458 
459     global_work_offset[0] = 10;
460     global_work_offset[1] = 10;
461     global_work_offset[2] = 10;
462 
463     // odds
464     start = 1;
465     end = max_work_items;
466     incr = basic_increment;
467     err = do_test_work_group_suggested_local_size(
468         device, context, queue, is_not_odd, start, end, incr,
469         max_local_mem_size, global_work_offset, _2D);
470     test_error_ret(err,
471                    "test_work_group_suggested_local_size_2D for odds with "
472                    "global_work_offset failed.",
473                    -1);
474     log_info("test_work_group_suggested_local_size_2D odds with "
475              "global_work_offset passed\n");
476 
477     // evens
478     start = 2;
479     end = max_work_items;
480     incr = basic_increment;
481     err = do_test_work_group_suggested_local_size(
482         device, context, queue, is_not_even, start, end, incr,
483         max_local_mem_size, global_work_offset, _2D);
484     test_error_ret(err,
485                    "test_work_group_suggested_local_size_2D for evens with "
486                    "global_work_offset failed.",
487                    -1);
488     log_info("test_work_group_suggested_local_size_2D evens with "
489              "global_work_offset passed\n");
490 
491     // primes
492     start = max_work_items + 1;
493     end = max_work_items + max_work_items / 4;
494     incr = primes_increment;
495     err = do_test_work_group_suggested_local_size(
496         device, context, queue, is_not_prime, start, end, incr,
497         max_local_mem_size, global_work_offset, _2D);
498     test_error_ret(err,
499                    "test_work_group_suggested_local_size_2D for primes with "
500                    "global_work_offset failed.",
501                    -1);
502     log_info("test_work_group_suggested_local_size_2D primes with "
503              "global_work_offset passed\n");
504 
505     return err;
506 }
507 
test_work_group_suggested_local_size_3D(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)508 int test_work_group_suggested_local_size_3D(cl_device_id device,
509                                             cl_context context,
510                                             cl_command_queue queue, int n_elems)
511 {
512     if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
513     {
514         log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
515                  "Skipping the test.\n");
516         return TEST_SKIPPED_ITSELF;
517     }
518     cl_long max_local_mem_size;
519     cl_int err =
520         clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
521                         sizeof(max_local_mem_size), &max_local_mem_size, NULL);
522     test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
523                    -1);
524 
525     size_t start, end, incr;
526     size_t global_work_offset[] = { 0, 0, 0 };
527     size_t max_work_items = 0;
528     clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
529                     sizeof(max_work_items), &max_work_items, NULL);
530 
531     // odds
532     start = 1;
533     end = max_work_items / 2;
534     incr = basic_increment;
535     err = do_test_work_group_suggested_local_size(
536         device, context, queue, is_not_odd, start, end, incr,
537         max_local_mem_size, global_work_offset, _3D);
538     test_error_ret(
539         err, "test_work_group_suggested_local_size_3D for odds failed.", -1);
540     log_info("test_work_group_suggested_local_size_3D odds passed\n");
541 
542     // evens
543     start = 2;
544     end = max_work_items / 2;
545     incr = basic_increment;
546     err = do_test_work_group_suggested_local_size(
547         device, context, queue, is_not_even, start, end, incr,
548         max_local_mem_size, global_work_offset, _3D);
549     test_error_ret(
550         err, "test_work_group_suggested_local_size_3D for evens failed.", -1);
551     log_info("test_work_group_suggested_local_size_3D evens passed\n");
552 
553     // primes
554     start = max_work_items + 1;
555     end = max_work_items + max_work_items / 4;
556     incr = primes_increment;
557     err = do_test_work_group_suggested_local_size(
558         device, context, queue, is_not_prime, start, end, incr,
559         max_local_mem_size, global_work_offset, _3D);
560     test_error_ret(
561         err, "test_work_group_suggested_local_size_3D for primes failed.", -1);
562     log_info("test_work_group_suggested_local_size_3D primes passed\n");
563 
564     global_work_offset[0] = 10;
565     global_work_offset[1] = 10;
566     global_work_offset[2] = 10;
567 
568     // odds
569     start = 1;
570     end = max_work_items / 2;
571     incr = basic_increment;
572     err = do_test_work_group_suggested_local_size(
573         device, context, queue, is_not_odd, start, end, incr,
574         max_local_mem_size, global_work_offset, _3D);
575     test_error_ret(err,
576                    "test_work_group_suggested_local_size_3D for odds with "
577                    "global_work_offset failed.",
578                    -1);
579     log_info("test_work_group_suggested_local_size_3D odds with "
580              "global_work_offset passed\n");
581 
582     // evens
583     start = 2;
584     end = max_work_items / 2;
585     incr = basic_increment;
586     err = do_test_work_group_suggested_local_size(
587         device, context, queue, is_not_even, start, end, incr,
588         max_local_mem_size, global_work_offset, _3D);
589     test_error_ret(err,
590                    "test_work_group_suggested_local_size_3D for evens with "
591                    "global_work_offset failed.",
592                    -1);
593     log_info("test_work_group_suggested_local_size_3D evens with "
594              "global_work_offset passed\n");
595 
596     // primes
597     start = max_work_items + 1;
598     end = max_work_items + max_work_items / 4;
599     incr = primes_increment;
600     err = do_test_work_group_suggested_local_size(
601         device, context, queue, is_not_prime, start, end, incr,
602         max_local_mem_size, global_work_offset, _3D);
603     test_error_ret(err,
604                    "test_work_group_suggested_local_size_3D for primes with "
605                    "global_work_offset failed.",
606                    -1);
607     log_info("test_work_group_suggested_local_size_3D primes with "
608              "global_work_offset passed\n");
609 
610     return err;
611 }
612