1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20
21 #include <vector>
22
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26
27
28 #ifdef CL_VERSION_2_0
29
30 static int gNestingLevel = 4;
31 extern int gWimpyMode;
32
33 static const char* enqueue_nested_blocks_single[] =
34 {
35 NL, "void block_fn(__global int* res, int level)"
36 NL, "{"
37 NL, " size_t tid = get_global_id(0);"
38 NL, " queue_t def_q = get_default_queue();"
39 NL, " ndrange_t ndrange = ndrange_1D(3);"
40 NL, " if(--level < 0) return;"
41 NL, ""
42 NL, " void (^kernelBlock)(void) = ^{ block_fn(res, level); };"
43 NL, ""
44 NL, " // Only 1 work-item enqueues block"
45 NL, " if(tid == 1)"
46 NL, " {"
47 NL, " res[tid]++;"
48 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
49 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
50 NL, " }"
51 NL, "}"
52 NL, ""
53 NL, "kernel void enqueue_nested_blocks_single(__global int* res, int level)"
54 NL, "{"
55 NL, " block_fn(res, level);"
56 NL, "}"
57 NL
58 };
59
60 static const char* enqueue_nested_blocks_some_eq[] =
61 {
62 NL, "void block_fn(int level, __global int* res)"
63 NL, "{"
64 NL, " size_t tid = get_global_id(0);"
65 NL, " queue_t def_q = get_default_queue();"
66 NL, " ndrange_t ndrange = ndrange_1D(10);"
67 NL, " if(--level < 0) return;"
68 NL, ""
69 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
70 NL, ""
71 NL, " // Some work-items enqueues nested blocks with the same level"
72 NL, " if(tid < (get_global_size(0) >> 1))"
73 NL, " {"
74 NL, " atomic_inc(&res[tid]);"
75 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
76 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
77 NL, " }"
78 NL, "}"
79 NL, ""
80 NL, "kernel void enqueue_nested_blocks_some_eq(__global int* res, int level)"
81 NL, "{"
82 NL, " block_fn(level, res);"
83 NL, "}"
84 NL
85 };
86
87 static const char* enqueue_nested_blocks_some_diff[] =
88 {
89 NL, "void block_fn(int level, __global int* res)"
90 NL, "{"
91 NL, " size_t tid = get_global_id(0);"
92 NL, " queue_t def_q = get_default_queue();"
93 NL, " ndrange_t ndrange = ndrange_1D(10);"
94 NL, " if(--level < 0) return;"
95 NL, ""
96 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
97 NL, ""
98 NL, " // Some work-items enqueues nested blocks with different levels"
99 NL, " if(tid % 2)"
100 NL, " {"
101 NL, " atomic_inc(&res[tid]);"
102 NL, " if(level >= tid)"
103 NL, " {"
104 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
105 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
106 NL, " }"
107 NL, " }"
108 NL, "}"
109 NL, ""
110 NL, "kernel void enqueue_nested_blocks_some_diff(__global int* res, int level)"
111 NL, "{"
112 NL, " block_fn(level, res);"
113 NL, "}"
114 NL
115 };
116
117 static const char* enqueue_nested_blocks_all_eq[] =
118 {
119 NL, "void block_fn(int level, __global int* res)"
120 NL, "{"
121 NL, " size_t tid = get_global_id(0);"
122 NL, " queue_t def_q = get_default_queue();"
123 NL, " ndrange_t ndrange = ndrange_1D(4);"
124 NL, " if(--level < 0) return;"
125 NL, ""
126 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
127 NL, ""
128 NL, " // All work-items enqueues nested blocks with the same level"
129 NL, " atomic_inc(&res[tid]);"
130 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
131 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
132 NL, "}"
133 NL, ""
134 NL, "kernel void enqueue_nested_blocks_all_eq(__global int* res, int level)"
135 NL, "{"
136 NL, " block_fn(level, res);"
137 NL, "}"
138 NL
139 };
140
141 static const char* enqueue_nested_blocks_all_diff[] =
142 {
143 NL, "void block_fn(int level, __global int* res)"
144 NL, "{"
145 NL, " size_t tid = get_global_id(0);"
146 NL, " queue_t def_q = get_default_queue();"
147 NL, " ndrange_t ndrange = ndrange_1D(10);"
148 NL, " if(--level < 0) return;"
149 NL, ""
150 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
151 NL, ""
152 NL, " // All work-items enqueues nested blocks with different levels"
153 NL, " atomic_inc(&res[tid]);"
154 NL, " if(level >= tid)"
155 NL, " {"
156 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
157 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
158 NL, " }"
159 NL, "}"
160 NL, ""
161 NL, "kernel void enqueue_nested_blocks_all_diff(__global int* res, int level)"
162 NL, "{"
163 NL, " block_fn(level, res);"
164 NL, "}"
165 NL
166 };
167
check_single(cl_int * results,cl_int len,cl_int nesting_level)168 static int check_single(cl_int* results, cl_int len, cl_int nesting_level)
169 {
170 int i, fail = -1;
171 const cl_uint tid = 1;
172
173 for(i = 0; i < len; ++i)
174 {
175 if(i != tid && results[i] != 0) { fail = i; break; }
176 if(i == tid && results[i] != nesting_level) { fail = i; break; }
177 }
178 return fail;
179 }
180
generate_reference_some_eq(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)181 void generate_reference_some_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
182 {
183 size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
184 if(--nesting_level < 0) return;
185
186 for (size_t tid = 0; tid < globalWorkSize; ++tid)
187 {
188 if (tid < (globalWorkSize >> 1))
189 {
190 ++referenceResults[tid];
191 generate_reference_some_eq(referenceResults, len, nesting_level);
192 }
193 }
194 }
195
check_some_eq(cl_int * results,cl_int len,cl_int nesting_level)196 static int check_some_eq(cl_int* results, cl_int len, cl_int nesting_level)
197 {
198 int i, fail = -1;
199 std::vector<cl_int> referenceResults(len, 0);
200 generate_reference_some_eq(referenceResults, len, nesting_level);
201
202 for(i = 0; i < len; ++i)
203 {
204 if (results[i] != referenceResults[i]) { fail = i; break; }
205 }
206
207 return fail;
208 }
209
generate_reference_some_diff(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)210 void generate_reference_some_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
211 {
212 size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
213 if(--nesting_level < 0) return;
214
215 for (size_t tid = 0; tid < globalWorkSize; ++tid)
216 {
217 if (tid % 2)
218 {
219 ++referenceResults[tid];
220 if (nesting_level >= tid)
221 {
222 generate_reference_some_diff(referenceResults, len, nesting_level);
223 }
224 }
225 }
226 }
227
check_some_diff(cl_int * results,cl_int len,cl_int nesting_level)228 static int check_some_diff(cl_int* results, cl_int len, cl_int nesting_level)
229 {
230 int i, fail = -1;
231 std::vector<cl_int> referenceResults(len, 0);
232 generate_reference_some_diff(referenceResults, len, nesting_level);
233
234 for(i = 0; i < len; ++i)
235 {
236 if (results[i] != referenceResults[i]) { fail = i; break; }
237 }
238
239 return fail;
240 }
241
generate_reference_all_eq(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)242 void generate_reference_all_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
243 {
244 size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 4;
245 if(--nesting_level < 0) return;
246
247 for (size_t tid = 0; tid < globalWorkSize; ++tid)
248 {
249 ++referenceResults[tid];
250 generate_reference_all_eq(referenceResults, len, nesting_level);
251 }
252 }
253
check_all_eq(cl_int * results,cl_int len,cl_int nesting_level)254 static int check_all_eq(cl_int* results, cl_int len, cl_int nesting_level)
255 {
256 int i, fail = -1;
257 std::vector<cl_int> referenceResults(len, 0);
258 generate_reference_all_eq(referenceResults, len, nesting_level);
259
260 for(i = 0; i < len; ++i)
261 {
262 if (results[i] != referenceResults[i]) { fail = i; break; }
263 }
264
265 return fail;
266 }
267
generate_reference_all_diff(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)268 void generate_reference_all_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
269 {
270 size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
271 if(--nesting_level < 0) return;
272
273 for (size_t tid = 0; tid < globalWorkSize; ++tid)
274 {
275 ++referenceResults[tid];
276 if (nesting_level >= tid)
277 {
278 generate_reference_all_diff(referenceResults, len, nesting_level);
279 }
280 }
281 }
282
check_all_diff(cl_int * results,cl_int len,cl_int nesting_level)283 static int check_all_diff(cl_int* results, cl_int len, cl_int nesting_level)
284 {
285 int i, fail = -1;
286 std::vector<cl_int> referenceResults(len, 0);
287 generate_reference_all_diff(referenceResults, len, nesting_level);
288
289 for(i = 0; i < len; ++i)
290 {
291 if (results[i] != referenceResults[i]) { fail = i; break; }
292 }
293
294 return fail;
295 }
296
297 static const kernel_src_check sources_nested_blocks[] =
298 {
299 { KERNEL(enqueue_nested_blocks_single), check_single },
300 { KERNEL(enqueue_nested_blocks_some_eq), check_some_eq },
301 { KERNEL(enqueue_nested_blocks_some_diff), check_some_diff },
302 { KERNEL(enqueue_nested_blocks_all_eq), check_all_eq },
303 { KERNEL(enqueue_nested_blocks_all_diff), check_all_diff }
304 };
305
test_enqueue_nested_blocks(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)306 int test_enqueue_nested_blocks(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
307 {
308 cl_uint i, k;
309 cl_int err_ret, res = 0;
310 clCommandQueueWrapper dev_queue;
311 const size_t MAX_GLOBAL_WORK_SIZE = MAX_GWS / 4;
312 cl_int kernel_results[MAX_GLOBAL_WORK_SIZE] = {0};
313
314 if(gWimpyMode)
315 {
316 gNestingLevel = 2;
317 vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
318 vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
319 }
320
321 size_t ret_len;
322 cl_uint max_queues = 1;
323 cl_uint maxQueueSize = 0;
324 err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
325 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
326
327 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
328 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
329
330 cl_queue_properties queue_prop_def[] =
331 {
332 CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
333 CL_QUEUE_SIZE, maxQueueSize,
334 0
335 };
336
337 dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
338 test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
339
340 kernel_arg args[] =
341 {
342 { sizeof(cl_int), &gNestingLevel }
343 };
344
345 size_t failCnt = 0;
346 for(k = 0; k < arr_size(sources_nested_blocks); ++k)
347 {
348 if (!gKernelName.empty() && gKernelName != sources_nested_blocks[k].src.kernel_name)
349 continue;
350
351 log_info("Running '%s' kernel (%d of %d) ...\n", sources_nested_blocks[k].src.kernel_name, k + 1, arr_size(sources_nested_blocks));
352 for(i = 0; i < MAX_GLOBAL_WORK_SIZE; ++i) kernel_results[i] = 0;
353
354 err_ret = run_n_kernel_args(context, queue, sources_nested_blocks[k].src.lines, sources_nested_blocks[k].src.num_lines, sources_nested_blocks[k].src.kernel_name, 0, MAX_GLOBAL_WORK_SIZE, kernel_results, sizeof(kernel_results), arr_size(args), args);
355 if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { res = -1; continue ; }
356
357 //check results
358 int fail = sources_nested_blocks[k].check(kernel_results, MAX_GLOBAL_WORK_SIZE, gNestingLevel);
359
360 if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { ++failCnt; res = -1; continue; }
361 else if(fail >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_nested_blocks[k].src.kernel_name, fail, kernel_results[fail])) { ++failCnt; res = -1; continue; }
362 else log_info("'%s' kernel is OK.\n", sources_nested_blocks[k].src.kernel_name);
363 }
364
365 if (failCnt > 0)
366 {
367 log_error("ERROR: %d of %d kernels failed.\n", failCnt, arr_size(sources_nested_blocks));
368 }
369
370 return res;
371 }
372
373 #endif
374
375