• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
17 #define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
18 
19 #include <vector>
20 #include <limits>
21 #include <algorithm>
22 
23 // Common for all OpenCL C++ tests
24 #include "../common.hpp"
25 // Common for tests of work-group functions
26 #include "common.hpp"
27 
28 // -----------------------------------------------------------------------------------
29 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
30 // -----------------------------------------------------------------------------------
31 #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
generate_wg_broadcast_1D_kernel_code()32 std::string generate_wg_broadcast_1D_kernel_code()
33 {
34     return
35         "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
36         "{\n"
37         "    ulong tid = get_global_id(0);\n"
38         "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
39         "    output[tid] = result;\n"
40         "}\n";
41 }
generate_wg_broadcast_2D_kernel_code()42 std::string generate_wg_broadcast_2D_kernel_code()
43 {
44     return
45         "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
46         "{\n"
47         "    ulong tid_x = get_global_id(0);\n"
48         "    ulong tid_y = get_global_id(1);\n"
49         "    size_t x = get_group_id(0) % get_local_size(0);\n"
50         "    size_t y = get_group_id(1) % get_local_size(1);\n"
51         "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
52         "    uint result = work_group_broadcast(input[idx], x, y);\n"
53         "    output[idx] = result;\n"
54         "}\n";
55 }
generate_wg_broadcast_3D_kernel_code()56 std::string generate_wg_broadcast_3D_kernel_code()
57 {
58     return
59         "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
60         "{\n"
61         "    ulong tid_x = get_global_id(0);\n"
62         "    ulong tid_y = get_global_id(1);\n"
63         "    ulong tid_z = get_global_id(2);\n"
64         "    size_t x = get_group_id(0) % get_local_size(0);\n"
65         "    size_t y = get_group_id(1) % get_local_size(1);\n"
66         "    size_t z = get_group_id(2) % get_local_size(2);\n"
67         "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
68         "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
69         "    output[idx] = result;\n"
70         "}\n";
71 }
72 #else
generate_wg_broadcast_1D_kernel_code()73 std::string generate_wg_broadcast_1D_kernel_code()
74 {
75     return "#include <opencl_memory>\n"
76            "#include <opencl_work_item>\n"
77            "#include <opencl_work_group>\n"
78            "using namespace cl;\n"
79            "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
80            "{\n"
81            "    ulong tid = get_global_id(0);\n"
82            "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
83            "    output[tid] = result;\n"
84            "}\n";
85 }
generate_wg_broadcast_2D_kernel_code()86 std::string generate_wg_broadcast_2D_kernel_code()
87 {
88     return "#include <opencl_memory>\n"
89            "#include <opencl_work_item>\n"
90            "#include <opencl_work_group>\n"
91            "using namespace cl;\n"
92            "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
93            "{\n"
94            "    ulong tid_x = get_global_id(0);\n"
95            "    ulong tid_y = get_global_id(1);\n"
96            "    size_t x = get_group_id(0) % get_local_size(0);\n"
97            "    size_t y = get_group_id(1) % get_local_size(1);\n"
98            "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
99            "    uint result = work_group_broadcast(input[idx], x, y);\n"
100            "    output[idx] = result;\n"
101            "}\n";
102 }
generate_wg_broadcast_3D_kernel_code()103 std::string generate_wg_broadcast_3D_kernel_code()
104 {
105     return "#include <opencl_memory>\n"
106            "#include <opencl_work_item>\n"
107            "#include <opencl_work_group>\n"
108            "using namespace cl;\n"
109            "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
110            "{\n"
111            "    ulong tid_x = get_global_id(0);\n"
112            "    ulong tid_y = get_global_id(1);\n"
113            "    ulong tid_z = get_global_id(2);\n"
114            "    size_t x = get_group_id(0) % get_local_size(0);\n"
115            "    size_t y = get_group_id(1) % get_local_size(1);\n"
116            "    size_t z = get_group_id(2) % get_local_size(2);\n"
117            "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
118            "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
119            "    output[idx] = result;\n"
120            "}\n";
121 }
122 #endif
123 
124 int
verify_wg_broadcast_1D(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t n,size_t wg_size)125 verify_wg_broadcast_1D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t n, size_t wg_size)
126 {
127     size_t i, j;
128     size_t group_id;
129 
130     for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
131     {
132         int local_size = (n-i) > wg_size ? wg_size : (n-i);
133         cl_uint broadcast_result = in[i + (group_id % local_size)];
134         for (j=0; j<local_size; j++)
135         {
136             if ( broadcast_result != out[i+j] )
137             {
138                 log_info("work_group_broadcast: Error at %lu: expected = %u, got = %u\n", i+j, broadcast_result, out[i+j]);
139                 return -1;
140             }
141         }
142     }
143 
144     return CL_SUCCESS;
145 }
146 
147 int
verify_wg_broadcast_2D(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t nx,size_t ny,size_t wg_size_x,size_t wg_size_y)148 verify_wg_broadcast_2D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
149                        size_t nx, size_t ny,
150                        size_t wg_size_x, size_t wg_size_y)
151 {
152     size_t i, j, _i, _j;
153     size_t group_id_x, group_id_y;
154 
155     for (i=0,group_id_y=0; i<ny; i+=wg_size_y,group_id_y++)
156     {
157         size_t y = group_id_y % wg_size_y;
158         size_t local_size_y = (ny-i) > wg_size_y ? wg_size_y : (ny-i);
159         for (_i=0; _i < local_size_y; _i++)
160         {
161             for (j=0,group_id_x=0; j<nx; j+=wg_size_x,group_id_x++)
162             {
163                 size_t x = group_id_x % wg_size_x;
164                 size_t local_size_x = (nx-j) > wg_size_x ? wg_size_x : (nx-j);
165                 cl_uint broadcast_result = in[(i + y) * nx + (j + x)];
166                 for (_j=0; _j < local_size_x; _j++)
167                 {
168                     size_t indx = (i + _i) * nx + (j + _j);
169                     if ( broadcast_result != out[indx] )
170                     {
171                         log_info("%lu\n", indx);
172                         log_info("%lu\n", ((i + y) * nx + (j + x)));
173                          log_info("%lu\n", out.size());
174                         log_info("work_group_broadcast: Error at (%lu, %lu): expected = %u, got = %u\n", j+_j, i+_i, broadcast_result, out[indx]);
175                         return -1;
176                     }
177                 }
178             }
179         }
180     }
181 
182     return CL_SUCCESS;
183 }
184 
185 int
verify_wg_broadcast_3D(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t nx,size_t ny,size_t nz,size_t wg_size_x,size_t wg_size_y,size_t wg_size_z)186 verify_wg_broadcast_3D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
187                        size_t nx, size_t ny, size_t nz,
188                        size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
189 {
190     size_t i, j, k, _i, _j, _k;
191     size_t group_id_x, group_id_y, group_id_z;
192 
193     for (i=0,group_id_z=0; i<nz; i+=wg_size_z,group_id_z++)
194     {
195         size_t z = group_id_z % wg_size_z;
196         size_t local_size_z = (nz-i) > wg_size_z ? wg_size_z : (nz-i);
197         for (_i=0; _i < local_size_z; _i++)
198         {
199             for (j=0,group_id_y=0; j<ny; j+=wg_size_y,group_id_y++)
200             {
201                 size_t y = group_id_y % wg_size_y;
202                 size_t local_size_y = (ny-j) > wg_size_y ? wg_size_y : (ny-j);
203                 for (_j=0; _j < local_size_y; _j++)
204                 {
205                     for (k=0,group_id_x=0; k<nx; k+=wg_size_x,group_id_x++)
206                     {
207                         size_t x = group_id_x % wg_size_x;
208                         size_t local_size_x = (nx-k) > wg_size_x ? wg_size_x : (nx-k);
209                         cl_uint broadcast_result = in[(i + z) * ny * nz + (j + y) * nx + (k + x)];
210                         for (_k=0; _k < local_size_x; _k++)
211                         {
212                             size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k);
213                             if ( broadcast_result != out[indx] )
214                             {
215                                 log_info(
216                                     "work_group_broadcast: Error at (%lu, %lu, %lu): expected = %u, got = %u\n",
217                                     k+_k, j+_j, i+_i,
218                                     broadcast_result, out[indx]);
219                                 return -1;
220                             }
221                         }
222                     }
223                 }
224             }
225         }
226     }
227     return CL_SUCCESS;
228 }
229 
generate_input_wg_broadcast(size_t count,size_t wg_size)230 std::vector<cl_uint> generate_input_wg_broadcast(size_t count, size_t wg_size)
231 {
232     std::vector<cl_uint> input(count, cl_uint(0));
233     size_t j = wg_size;
234     for(size_t i = 0; i < count; i++)
235     {
236         input[i] = static_cast<cl_uint>(j);
237         j--;
238         if(j == 0)
239         {
240             j = wg_size;
241         }
242     }
243     return input;
244 }
245 
generate_output_wg_broadcast(size_t count,size_t wg_size)246 std::vector<cl_uint> generate_output_wg_broadcast(size_t count, size_t wg_size)
247 {
248     (void) wg_size;
249     return std::vector<cl_uint>(count, cl_uint(1));
250 }
251 
work_group_broadcast(cl_device_id device,cl_context context,cl_command_queue queue,size_t count,size_t dim)252 int work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, size_t dim)
253 {
254     cl_mem buffers[2];
255     cl_program program;
256     cl_kernel kernel;
257     size_t flat_wg_size;
258     size_t wg_size[] = { 1, 1, 1};
259     size_t work_size[] = { 1, 1, 1};
260     int err;
261 
262     // Get kernel source code
263     std::string code_str;
264     if(dim > 2) code_str = generate_wg_broadcast_3D_kernel_code();
265     else if(dim > 1) code_str = generate_wg_broadcast_2D_kernel_code();
266     else code_str = generate_wg_broadcast_1D_kernel_code();
267 
268 // -----------------------------------------------------------------------------------
269 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
270 // -----------------------------------------------------------------------------------
271 // Only OpenCL C++ to SPIR-V compilation
272 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
273     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
274     RETURN_ON_ERROR(err)
275     return err;
276 // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
277 #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
278     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast", "-cl-std=CL2.0", false);
279     RETURN_ON_ERROR(err)
280 #else
281     err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
282     RETURN_ON_ERROR(err)
283 #endif
284 
285     // Get max flat workgroup size
286     err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &flat_wg_size, NULL);
287     RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
288 
289     // Set local work size
290     wg_size[0] = flat_wg_size;
291     if(dim > 2)
292     {
293         if (flat_wg_size >=512)
294         {
295             wg_size[0] = wg_size[1] = wg_size[2] = 8;
296         }
297         else if (flat_wg_size >= 64)
298         {
299             wg_size[0] = wg_size[1] = wg_size[2] = 4;
300         }
301         else if (flat_wg_size >= 8)
302         {
303             wg_size[0] = wg_size[1] = wg_size[2] = 2;
304         }
305         else
306         {
307             wg_size[0] = wg_size[1] = wg_size[2] = 1;
308         }
309     }
310     else if(dim > 1)
311     {
312         if (flat_wg_size >= 256)
313         {
314             wg_size[0] = wg_size[1] = 16;
315         }
316         else if (flat_wg_size >=64)
317         {
318             wg_size[0] = wg_size[1] = 8;
319         }
320         else if (flat_wg_size >= 16)
321         {
322             wg_size[0] = wg_size[1] = 4;
323         }
324         else
325         {
326             wg_size[0] = wg_size[1] = 1;
327         }
328     }
329 
330     // Calculate flat local work size
331     flat_wg_size = wg_size[0];
332     if(dim > 1) flat_wg_size *= wg_size[1];
333     if(dim > 2) flat_wg_size *= wg_size[2];
334 
335     // Calculate global work size
336     size_t flat_work_size = count;
337     // 3D
338     if(dim > 2)
339     {
340         size_t wg_number = static_cast<size_t>(
341             std::ceil(static_cast<double>(count / 3) / (wg_size[0] * wg_size[1] * wg_size[2]))
342         );
343         work_size[0] = wg_number * wg_size[0];
344         work_size[1] = wg_number * wg_size[1];
345         work_size[2] = wg_number * wg_size[2];
346         flat_work_size = work_size[0] * work_size[1] * work_size[2];
347     }
348     // 2D
349     else if(dim > 1)
350     {
351         size_t wg_number = static_cast<size_t>(
352             std::ceil(static_cast<double>(count / 2) / (wg_size[0] * wg_size[1]))
353         );
354         work_size[0] = wg_number * wg_size[0];
355         work_size[1] = wg_number * wg_size[1];
356         flat_work_size = work_size[0] * work_size[1];
357     }
358     // 1D
359     else
360     {
361         size_t wg_number = static_cast<size_t>(
362             std::ceil(static_cast<double>(count) / wg_size[0])
363         );
364         flat_work_size = wg_number * wg_size[0];
365         work_size[0] = flat_work_size;
366     }
367 
368     std::vector<cl_uint> input = generate_input_wg_broadcast(flat_work_size, flat_wg_size);
369     std::vector<cl_uint> output = generate_output_wg_broadcast(flat_work_size, flat_wg_size);
370 
371     buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
372     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
373 
374     buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
375     RETURN_ON_CL_ERROR(err, "clCreateBuffer");
376 
377     err = clEnqueueWriteBuffer(
378         queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
379         static_cast<void *>(input.data()), 0, NULL, NULL
380     );
381     RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
382 
383     err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
384     err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
385     RETURN_ON_CL_ERROR(err, "clSetKernelArg");
386 
387     err = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, wg_size, 0, NULL, NULL);
388     RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
389 
390     err = clEnqueueReadBuffer(
391         queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
392         static_cast<void *>(output.data()), 0, NULL, NULL
393     );
394     RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
395 
396     int result = CL_SUCCESS;
397     // 3D
398     if(dim > 2)
399     {
400         result = verify_wg_broadcast_3D(
401             input, output,
402             work_size[0], work_size[1], work_size[2],
403             wg_size[0], wg_size[1], wg_size[2]
404         );
405     }
406     // 2D
407     else if(dim > 1)
408     {
409         result = verify_wg_broadcast_2D(
410             input, output,
411             work_size[0], work_size[1],
412             wg_size[0], wg_size[1]
413         );
414     }
415     // 1D
416     else
417     {
418         result = verify_wg_broadcast_1D(
419             input, output,
420             work_size[0],
421             wg_size[0]
422         );
423     }
424 
425     RETURN_ON_ERROR_MSG(result, "work_group_broadcast_%luD failed", dim);
426     log_info("work_group_broadcast_%luD passed\n", dim);
427 
428     clReleaseMemObject(buffers[0]);
429     clReleaseMemObject(buffers[1]);
430     clReleaseKernel(kernel);
431     clReleaseProgram(program);
432     return err;
433 }
434 
AUTO_TEST_CASE(test_work_group_broadcast)435 AUTO_TEST_CASE(test_work_group_broadcast)
436 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
437 {
438     int error = CL_SUCCESS;
439     int local_error = CL_SUCCESS;
440 
441     local_error = work_group_broadcast(device, context, queue, n_elems, 1);
442     CHECK_ERROR(local_error)
443     error |= local_error;
444 
445     local_error = work_group_broadcast(device, context, queue, n_elems, 2);
446     CHECK_ERROR(local_error)
447     error |= local_error;
448 
449     local_error = work_group_broadcast(device, context, queue, n_elems, 3);
450     CHECK_ERROR(local_error)
451     error |= local_error;
452 
453     if(error != CL_SUCCESS)
454         return -1;
455     return CL_SUCCESS;
456 }
457 
458 #endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
459