1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
17 #define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
18
19 #include <vector>
20 #include <limits>
21 #include <algorithm>
22
23 // Common for all OpenCL C++ tests
24 #include "../common.hpp"
25 // Common for tests of work-group functions
26 #include "common.hpp"
27
28 // -----------------------------------------------------------------------------------
29 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
30 // -----------------------------------------------------------------------------------
31 #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
generate_wg_broadcast_1D_kernel_code()32 std::string generate_wg_broadcast_1D_kernel_code()
33 {
34 return
35 "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
36 "{\n"
37 " ulong tid = get_global_id(0);\n"
38 " uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
39 " output[tid] = result;\n"
40 "}\n";
41 }
generate_wg_broadcast_2D_kernel_code()42 std::string generate_wg_broadcast_2D_kernel_code()
43 {
44 return
45 "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
46 "{\n"
47 " ulong tid_x = get_global_id(0);\n"
48 " ulong tid_y = get_global_id(1);\n"
49 " size_t x = get_group_id(0) % get_local_size(0);\n"
50 " size_t y = get_group_id(1) % get_local_size(1);\n"
51 " size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
52 " uint result = work_group_broadcast(input[idx], x, y);\n"
53 " output[idx] = result;\n"
54 "}\n";
55 }
generate_wg_broadcast_3D_kernel_code()56 std::string generate_wg_broadcast_3D_kernel_code()
57 {
58 return
59 "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
60 "{\n"
61 " ulong tid_x = get_global_id(0);\n"
62 " ulong tid_y = get_global_id(1);\n"
63 " ulong tid_z = get_global_id(2);\n"
64 " size_t x = get_group_id(0) % get_local_size(0);\n"
65 " size_t y = get_group_id(1) % get_local_size(1);\n"
66 " size_t z = get_group_id(2) % get_local_size(2);\n"
67 " ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
68 " uint result = work_group_broadcast(input[idx], x, y, z);\n"
69 " output[idx] = result;\n"
70 "}\n";
71 }
72 #else
generate_wg_broadcast_1D_kernel_code()73 std::string generate_wg_broadcast_1D_kernel_code()
74 {
75 return "#include <opencl_memory>\n"
76 "#include <opencl_work_item>\n"
77 "#include <opencl_work_group>\n"
78 "using namespace cl;\n"
79 "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
80 "{\n"
81 " ulong tid = get_global_id(0);\n"
82 " uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
83 " output[tid] = result;\n"
84 "}\n";
85 }
generate_wg_broadcast_2D_kernel_code()86 std::string generate_wg_broadcast_2D_kernel_code()
87 {
88 return "#include <opencl_memory>\n"
89 "#include <opencl_work_item>\n"
90 "#include <opencl_work_group>\n"
91 "using namespace cl;\n"
92 "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
93 "{\n"
94 " ulong tid_x = get_global_id(0);\n"
95 " ulong tid_y = get_global_id(1);\n"
96 " size_t x = get_group_id(0) % get_local_size(0);\n"
97 " size_t y = get_group_id(1) % get_local_size(1);\n"
98 " size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
99 " uint result = work_group_broadcast(input[idx], x, y);\n"
100 " output[idx] = result;\n"
101 "}\n";
102 }
generate_wg_broadcast_3D_kernel_code()103 std::string generate_wg_broadcast_3D_kernel_code()
104 {
105 return "#include <opencl_memory>\n"
106 "#include <opencl_work_item>\n"
107 "#include <opencl_work_group>\n"
108 "using namespace cl;\n"
109 "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
110 "{\n"
111 " ulong tid_x = get_global_id(0);\n"
112 " ulong tid_y = get_global_id(1);\n"
113 " ulong tid_z = get_global_id(2);\n"
114 " size_t x = get_group_id(0) % get_local_size(0);\n"
115 " size_t y = get_group_id(1) % get_local_size(1);\n"
116 " size_t z = get_group_id(2) % get_local_size(2);\n"
117 " ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
118 " uint result = work_group_broadcast(input[idx], x, y, z);\n"
119 " output[idx] = result;\n"
120 "}\n";
121 }
122 #endif
123
124 int
verify_wg_broadcast_1D(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t n,size_t wg_size)125 verify_wg_broadcast_1D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t n, size_t wg_size)
126 {
127 size_t i, j;
128 size_t group_id;
129
130 for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
131 {
132 int local_size = (n-i) > wg_size ? wg_size : (n-i);
133 cl_uint broadcast_result = in[i + (group_id % local_size)];
134 for (j=0; j<local_size; j++)
135 {
136 if ( broadcast_result != out[i+j] )
137 {
138 log_info("work_group_broadcast: Error at %lu: expected = %u, got = %u\n", i+j, broadcast_result, out[i+j]);
139 return -1;
140 }
141 }
142 }
143
144 return CL_SUCCESS;
145 }
146
147 int
verify_wg_broadcast_2D(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t nx,size_t ny,size_t wg_size_x,size_t wg_size_y)148 verify_wg_broadcast_2D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
149 size_t nx, size_t ny,
150 size_t wg_size_x, size_t wg_size_y)
151 {
152 size_t i, j, _i, _j;
153 size_t group_id_x, group_id_y;
154
155 for (i=0,group_id_y=0; i<ny; i+=wg_size_y,group_id_y++)
156 {
157 size_t y = group_id_y % wg_size_y;
158 size_t local_size_y = (ny-i) > wg_size_y ? wg_size_y : (ny-i);
159 for (_i=0; _i < local_size_y; _i++)
160 {
161 for (j=0,group_id_x=0; j<nx; j+=wg_size_x,group_id_x++)
162 {
163 size_t x = group_id_x % wg_size_x;
164 size_t local_size_x = (nx-j) > wg_size_x ? wg_size_x : (nx-j);
165 cl_uint broadcast_result = in[(i + y) * nx + (j + x)];
166 for (_j=0; _j < local_size_x; _j++)
167 {
168 size_t indx = (i + _i) * nx + (j + _j);
169 if ( broadcast_result != out[indx] )
170 {
171 log_info("%lu\n", indx);
172 log_info("%lu\n", ((i + y) * nx + (j + x)));
173 log_info("%lu\n", out.size());
174 log_info("work_group_broadcast: Error at (%lu, %lu): expected = %u, got = %u\n", j+_j, i+_i, broadcast_result, out[indx]);
175 return -1;
176 }
177 }
178 }
179 }
180 }
181
182 return CL_SUCCESS;
183 }
184
185 int
verify_wg_broadcast_3D(const std::vector<cl_uint> & in,const std::vector<cl_uint> & out,size_t nx,size_t ny,size_t nz,size_t wg_size_x,size_t wg_size_y,size_t wg_size_z)186 verify_wg_broadcast_3D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
187 size_t nx, size_t ny, size_t nz,
188 size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
189 {
190 size_t i, j, k, _i, _j, _k;
191 size_t group_id_x, group_id_y, group_id_z;
192
193 for (i=0,group_id_z=0; i<nz; i+=wg_size_z,group_id_z++)
194 {
195 size_t z = group_id_z % wg_size_z;
196 size_t local_size_z = (nz-i) > wg_size_z ? wg_size_z : (nz-i);
197 for (_i=0; _i < local_size_z; _i++)
198 {
199 for (j=0,group_id_y=0; j<ny; j+=wg_size_y,group_id_y++)
200 {
201 size_t y = group_id_y % wg_size_y;
202 size_t local_size_y = (ny-j) > wg_size_y ? wg_size_y : (ny-j);
203 for (_j=0; _j < local_size_y; _j++)
204 {
205 for (k=0,group_id_x=0; k<nx; k+=wg_size_x,group_id_x++)
206 {
207 size_t x = group_id_x % wg_size_x;
208 size_t local_size_x = (nx-k) > wg_size_x ? wg_size_x : (nx-k);
209 cl_uint broadcast_result = in[(i + z) * ny * nz + (j + y) * nx + (k + x)];
210 for (_k=0; _k < local_size_x; _k++)
211 {
212 size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k);
213 if ( broadcast_result != out[indx] )
214 {
215 log_info(
216 "work_group_broadcast: Error at (%lu, %lu, %lu): expected = %u, got = %u\n",
217 k+_k, j+_j, i+_i,
218 broadcast_result, out[indx]);
219 return -1;
220 }
221 }
222 }
223 }
224 }
225 }
226 }
227 return CL_SUCCESS;
228 }
229
generate_input_wg_broadcast(size_t count,size_t wg_size)230 std::vector<cl_uint> generate_input_wg_broadcast(size_t count, size_t wg_size)
231 {
232 std::vector<cl_uint> input(count, cl_uint(0));
233 size_t j = wg_size;
234 for(size_t i = 0; i < count; i++)
235 {
236 input[i] = static_cast<cl_uint>(j);
237 j--;
238 if(j == 0)
239 {
240 j = wg_size;
241 }
242 }
243 return input;
244 }
245
generate_output_wg_broadcast(size_t count,size_t wg_size)246 std::vector<cl_uint> generate_output_wg_broadcast(size_t count, size_t wg_size)
247 {
248 (void) wg_size;
249 return std::vector<cl_uint>(count, cl_uint(1));
250 }
251
work_group_broadcast(cl_device_id device,cl_context context,cl_command_queue queue,size_t count,size_t dim)252 int work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, size_t dim)
253 {
254 cl_mem buffers[2];
255 cl_program program;
256 cl_kernel kernel;
257 size_t flat_wg_size;
258 size_t wg_size[] = { 1, 1, 1};
259 size_t work_size[] = { 1, 1, 1};
260 int err;
261
262 // Get kernel source code
263 std::string code_str;
264 if(dim > 2) code_str = generate_wg_broadcast_3D_kernel_code();
265 else if(dim > 1) code_str = generate_wg_broadcast_2D_kernel_code();
266 else code_str = generate_wg_broadcast_1D_kernel_code();
267
268 // -----------------------------------------------------------------------------------
269 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
270 // -----------------------------------------------------------------------------------
271 // Only OpenCL C++ to SPIR-V compilation
272 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
273 err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
274 RETURN_ON_ERROR(err)
275 return err;
276 // Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
277 #elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
278 err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast", "-cl-std=CL2.0", false);
279 RETURN_ON_ERROR(err)
280 #else
281 err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
282 RETURN_ON_ERROR(err)
283 #endif
284
285 // Get max flat workgroup size
286 err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &flat_wg_size, NULL);
287 RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
288
289 // Set local work size
290 wg_size[0] = flat_wg_size;
291 if(dim > 2)
292 {
293 if (flat_wg_size >=512)
294 {
295 wg_size[0] = wg_size[1] = wg_size[2] = 8;
296 }
297 else if (flat_wg_size >= 64)
298 {
299 wg_size[0] = wg_size[1] = wg_size[2] = 4;
300 }
301 else if (flat_wg_size >= 8)
302 {
303 wg_size[0] = wg_size[1] = wg_size[2] = 2;
304 }
305 else
306 {
307 wg_size[0] = wg_size[1] = wg_size[2] = 1;
308 }
309 }
310 else if(dim > 1)
311 {
312 if (flat_wg_size >= 256)
313 {
314 wg_size[0] = wg_size[1] = 16;
315 }
316 else if (flat_wg_size >=64)
317 {
318 wg_size[0] = wg_size[1] = 8;
319 }
320 else if (flat_wg_size >= 16)
321 {
322 wg_size[0] = wg_size[1] = 4;
323 }
324 else
325 {
326 wg_size[0] = wg_size[1] = 1;
327 }
328 }
329
330 // Calculate flat local work size
331 flat_wg_size = wg_size[0];
332 if(dim > 1) flat_wg_size *= wg_size[1];
333 if(dim > 2) flat_wg_size *= wg_size[2];
334
335 // Calculate global work size
336 size_t flat_work_size = count;
337 // 3D
338 if(dim > 2)
339 {
340 size_t wg_number = static_cast<size_t>(
341 std::ceil(static_cast<double>(count / 3) / (wg_size[0] * wg_size[1] * wg_size[2]))
342 );
343 work_size[0] = wg_number * wg_size[0];
344 work_size[1] = wg_number * wg_size[1];
345 work_size[2] = wg_number * wg_size[2];
346 flat_work_size = work_size[0] * work_size[1] * work_size[2];
347 }
348 // 2D
349 else if(dim > 1)
350 {
351 size_t wg_number = static_cast<size_t>(
352 std::ceil(static_cast<double>(count / 2) / (wg_size[0] * wg_size[1]))
353 );
354 work_size[0] = wg_number * wg_size[0];
355 work_size[1] = wg_number * wg_size[1];
356 flat_work_size = work_size[0] * work_size[1];
357 }
358 // 1D
359 else
360 {
361 size_t wg_number = static_cast<size_t>(
362 std::ceil(static_cast<double>(count) / wg_size[0])
363 );
364 flat_work_size = wg_number * wg_size[0];
365 work_size[0] = flat_work_size;
366 }
367
368 std::vector<cl_uint> input = generate_input_wg_broadcast(flat_work_size, flat_wg_size);
369 std::vector<cl_uint> output = generate_output_wg_broadcast(flat_work_size, flat_wg_size);
370
371 buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
372 RETURN_ON_CL_ERROR(err, "clCreateBuffer");
373
374 buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
375 RETURN_ON_CL_ERROR(err, "clCreateBuffer");
376
377 err = clEnqueueWriteBuffer(
378 queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
379 static_cast<void *>(input.data()), 0, NULL, NULL
380 );
381 RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
382
383 err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
384 err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
385 RETURN_ON_CL_ERROR(err, "clSetKernelArg");
386
387 err = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, wg_size, 0, NULL, NULL);
388 RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
389
390 err = clEnqueueReadBuffer(
391 queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
392 static_cast<void *>(output.data()), 0, NULL, NULL
393 );
394 RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
395
396 int result = CL_SUCCESS;
397 // 3D
398 if(dim > 2)
399 {
400 result = verify_wg_broadcast_3D(
401 input, output,
402 work_size[0], work_size[1], work_size[2],
403 wg_size[0], wg_size[1], wg_size[2]
404 );
405 }
406 // 2D
407 else if(dim > 1)
408 {
409 result = verify_wg_broadcast_2D(
410 input, output,
411 work_size[0], work_size[1],
412 wg_size[0], wg_size[1]
413 );
414 }
415 // 1D
416 else
417 {
418 result = verify_wg_broadcast_1D(
419 input, output,
420 work_size[0],
421 wg_size[0]
422 );
423 }
424
425 RETURN_ON_ERROR_MSG(result, "work_group_broadcast_%luD failed", dim);
426 log_info("work_group_broadcast_%luD passed\n", dim);
427
428 clReleaseMemObject(buffers[0]);
429 clReleaseMemObject(buffers[1]);
430 clReleaseKernel(kernel);
431 clReleaseProgram(program);
432 return err;
433 }
434
AUTO_TEST_CASE(test_work_group_broadcast)435 AUTO_TEST_CASE(test_work_group_broadcast)
436 (cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
437 {
438 int error = CL_SUCCESS;
439 int local_error = CL_SUCCESS;
440
441 local_error = work_group_broadcast(device, context, queue, n_elems, 1);
442 CHECK_ERROR(local_error)
443 error |= local_error;
444
445 local_error = work_group_broadcast(device, context, queue, n_elems, 2);
446 CHECK_ERROR(local_error)
447 error |= local_error;
448
449 local_error = work_group_broadcast(device, context, queue, n_elems, 3);
450 CHECK_ERROR(local_error)
451 error |= local_error;
452
453 if(error != CL_SUCCESS)
454 return -1;
455 return CL_SUCCESS;
456 }
457
458 #endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
459