1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20
21 #include <vector>
22
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26
27
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30 static const char* enqueue_simple_block[] =
31 {
32 NL, "void block_fn(size_t tid, int mul, __global int* res)"
33 NL, "{"
34 NL, " res[tid] = mul * 7 - 21;"
35 NL, "}"
36 NL, ""
37 NL, "kernel void enqueue_simple_block(__global int* res)"
38 NL, "{"
39 NL, " int multiplier = 3;"
40 NL, " size_t tid = get_global_id(0);"
41 NL, ""
42 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
43 NL, ""
44 NL, " res[tid] = -1;"
45 NL, " queue_t def_q = get_default_queue();"
46 NL, " ndrange_t ndrange = ndrange_1D(1);"
47 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
48 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
49 NL, "}"
50 NL
51 };
52
53 static const char* enqueue_block_with_local_arg1[] =
54 {
55 NL, "#define LOCAL_MEM_SIZE 10"
56 NL, ""
57 NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)"
58 NL, "{"
59 NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
60 NL, " {"
61 NL, " tmp[i] = mul * 7 - 21;"
62 NL, " res[tid] += tmp[i];"
63 NL, " }"
64 NL, " res[tid] += 2;"
65 NL, "}"
66 NL, ""
67 NL, "kernel void enqueue_block_with_local_arg1(__global int* res)"
68 NL, "{"
69 NL, " int multiplier = 3;"
70 NL, " size_t tid = get_global_id(0);"
71 NL, ""
72 NL, " void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };"
73 NL, ""
74 NL, " res[tid] = -2;"
75 NL, " queue_t def_q = get_default_queue();"
76 NL, " ndrange_t ndrange = ndrange_1D(1);"
77 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));"
78 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
79 NL, "}"
80 NL
81 };
82
83 static const char* enqueue_block_with_local_arg2[] =
84 {
85 NL, "#define LOCAL_MEM_SIZE 10"
86 NL, ""
87 NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)"
88 NL, "{"
89 NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
90 NL, " {"
91 NL, " tmp1[i] = mul * 7 - 21;"
92 NL, " tmp2[i].x = (float)(mul * 7 - 21);"
93 NL, " tmp2[i].y = (float)(mul * 7 - 21);"
94 NL, " tmp2[i].z = (float)(mul * 7 - 21);"
95 NL, " tmp2[i].w = (float)(mul * 7 - 21);"
96 NL, ""
97 NL, " res[tid] += tmp1[i];"
98 NL, " res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);"
99 NL, " }"
100 NL, " res[tid] += 2;"
101 NL, "}"
102 NL, ""
103 NL, "kernel void enqueue_block_with_local_arg2(__global int* res)"
104 NL, "{"
105 NL, " int multiplier = 3;"
106 NL, " size_t tid = get_global_id(0);"
107 NL, ""
108 NL, " void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)"
109 NL, " { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };"
110 NL, ""
111 NL, " res[tid] = -2;"
112 NL, " queue_t def_q = get_default_queue();"
113 NL, " ndrange_t ndrange = ndrange_1D(1);"
114 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));"
115 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
116 NL, "}"
117 NL
118 };
119
120 static const char* enqueue_block_with_wait_list[] =
121 {
122 NL, "#define BLOCK_SUBMITTED 1"
123 NL, "#define BLOCK_COMPLETED 2"
124 NL, "#define CHECK_SUCCESS 0"
125 NL, ""
126 NL, "kernel void enqueue_block_with_wait_list(__global int* res)"
127 NL, "{"
128 NL, " size_t tid = get_global_id(0);"
129 NL, ""
130 NL, " clk_event_t user_evt = create_user_event();"
131 NL, ""
132 NL, " res[tid] = BLOCK_SUBMITTED;"
133 NL, " queue_t def_q = get_default_queue();"
134 NL, " ndrange_t ndrange = ndrange_1D(1);"
135 NL, " clk_event_t block_evt;"
136 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,"
137 NL, " ^{"
138 NL, " res[tid] = BLOCK_COMPLETED;"
139 NL, " });"
140 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
141 NL, ""
142 NL, " retain_event(block_evt);"
143 NL, " release_event(block_evt);"
144 NL, ""
145 NL, " //check block is not started"
146 NL, " if(res[tid] == BLOCK_SUBMITTED)"
147 NL, " {"
148 NL, " clk_event_t my_evt;"
149 NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
150 NL, " ^{"
151 NL, " //check block is completed"
152 NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
153 NL, " });"
154 NL, " release_event(my_evt);"
155 NL, " }"
156 NL, ""
157 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
158 NL, ""
159 NL, " release_event(user_evt);"
160 NL, " release_event(block_evt);"
161 NL, "}"
162 NL
163 };
164
165 static const char* enqueue_block_with_wait_list_and_local_arg[] =
166 {
167 NL, "#define LOCAL_MEM_SIZE 10"
168 NL, "#define BLOCK_COMPLETED 1"
169 NL, "#define BLOCK_SUBMITTED 2"
170 NL, "#define BLOCK_STARTED 3"
171 NL, "#define CHECK_SUCCESS 0"
172 NL, ""
173 NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)"
174 NL, "{"
175 NL, " res[tid] = BLOCK_STARTED;"
176 NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
177 NL, " {"
178 NL, " tmp[i] = mul * 7 - 21;"
179 NL, " res[tid] += tmp[i];"
180 NL, " }"
181 NL, " if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;"
182 NL, "}"
183 NL, ""
184 NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)"
185 NL, "{"
186 NL, " int multiplier = 3;"
187 NL, " size_t tid = get_global_id(0);"
188 NL, " clk_event_t user_evt = create_user_event();"
189 NL, ""
190 NL, " res[tid] = BLOCK_SUBMITTED;"
191 NL, " queue_t def_q = get_default_queue();"
192 NL, " ndrange_t ndrange = ndrange_1D(1);"
193 NL, " clk_event_t block_evt;"
194 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, "
195 NL, " ^(__local void* buf) {"
196 NL, " block_fn_local_arg(tid, multiplier, res, (__local int*)buf);"
197 NL, " }, LOCAL_MEM_SIZE*sizeof(int));"
198 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
199 NL, ""
200 NL, " retain_event(block_evt);"
201 NL, " release_event(block_evt);"
202 NL, ""
203 NL, " //check block is not started"
204 NL, " if(res[tid] == BLOCK_SUBMITTED)"
205 NL, " {"
206 NL, " clk_event_t my_evt;"
207 NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
208 NL, " ^{"
209 NL, " //check block is completed"
210 NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
211 NL, " });"
212 NL, " release_event(my_evt);"
213 NL, " }"
214 NL, ""
215 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
216 NL, ""
217 NL, " release_event(user_evt);"
218 NL, " release_event(block_evt);"
219 NL, "}"
220 NL
221 };
222
223 static const char* enqueue_block_get_kernel_work_group_size[] =
224 {
225 NL, "void block_fn(size_t tid, int mul, __global int* res)"
226 NL, "{"
227 NL, " res[tid] = mul * 7 - 21;"
228 NL, "}"
229 NL, ""
230 NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)"
231 NL, "{"
232 NL, " int multiplier = 3;"
233 NL, " size_t tid = get_global_id(0);"
234 NL, ""
235 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
236 NL, ""
237 NL, " size_t local_work_size = get_kernel_work_group_size(kernelBlock);"
238 NL, " if (local_work_size <= 0){ res[tid] = -1; return; }"
239 NL, " size_t global_work_size = local_work_size * 4;"
240 NL, ""
241 NL, " res[tid] = -1;"
242 NL, " queue_t q1 = get_default_queue();"
243 NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
244 NL, ""
245 NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
246 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
247 NL, "}"
248 };
249
250 static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] =
251 {
252 NL, "void block_fn(size_t tid, int mul, __global int* res)"
253 NL, "{"
254 NL, " res[tid] = mul * 7 - 21;"
255 NL, "}"
256 NL, ""
257 NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)"
258 NL, "{"
259 NL, " int multiplier = 3;"
260 NL, " size_t tid = get_global_id(0);"
261 NL, ""
262 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
263 NL, ""
264 NL, " size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);"
265 NL, " if (local_work_size <= 0){ res[tid] = -1; return; }"
266 NL, " size_t global_work_size = local_work_size * 4;"
267 NL, ""
268 NL, " res[tid] = -1;"
269 NL, " queue_t q1 = get_default_queue();"
270 NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
271 NL, ""
272 NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
273 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
274 NL, "}"
275 };
276
277 static const char* enqueue_block_capture_event_profiling_info_after_execution[] =
278 {
279 NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
280 NL, ""
281 NL, "__global ulong value[MAX_GWS*2] = {0};"
282 NL, ""
283 NL, "void block_fn(size_t tid, __global int* res)"
284 NL, "{"
285 NL, " res[tid] = -2;"
286 NL, "}"
287 NL, ""
288 NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)"
289 NL, "{"
290 NL, " capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
291 NL, ""
292 NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;"
293 NL, " else res[tid] = -4;"
294 NL, " release_event(evt);"
295 NL, "}"
296 NL, ""
297 NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)"
298 NL, "{"
299 NL, " size_t tid = get_global_id(0);"
300 NL, ""
301 NL, " res[tid] = -1;"
302 NL, " queue_t def_q = get_default_queue();"
303 NL, " ndrange_t ndrange = ndrange_1D(1);"
304 NL, " clk_event_t block_evt1;"
305 NL, ""
306 NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };"
307 NL, ""
308 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);"
309 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
310 NL, ""
311 NL, " void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };"
312 NL, ""
313 NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);"
314 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
315 NL, "}"
316 NL
317 };
318
319 static const char* enqueue_block_capture_event_profiling_info_before_execution[] =
320 {
321 NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
322 NL, ""
323 NL, "__global ulong value[MAX_GWS*2] = {0};"
324 NL, ""
325 NL, "void block_fn(size_t tid, __global int* res)"
326 NL, "{"
327 NL, " res[tid] = -2;"
328 NL, "}"
329 NL, ""
330 NL, "void check_res(size_t tid, const ulong *value, __global int* res)"
331 NL, "{"
332 NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;"
333 NL, " else res[tid] = -4;"
334 NL, "}"
335 NL, ""
336 NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)"
337 NL, "{"
338 NL, " int multiplier = 3;"
339 NL, " size_t tid = get_global_id(0);"
340 NL, " clk_event_t user_evt = create_user_event();"
341 NL, ""
342 NL, " res[tid] = -1;"
343 NL, " queue_t def_q = get_default_queue();"
344 NL, " ndrange_t ndrange = ndrange_1D(1);"
345 NL, " clk_event_t block_evt1;"
346 NL, " clk_event_t block_evt2;"
347 NL, ""
348 NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };"
349 NL, ""
350 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);"
351 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
352 NL, ""
353 NL, " capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
354 NL, ""
355 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
356 NL, ""
357 NL, " void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };"
358 NL, ""
359 NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);"
360 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
361 NL, ""
362 NL, " release_event(user_evt);"
363 NL, " release_event(block_evt1);"
364 NL, " release_event(block_evt2);"
365 NL, "}"
366 NL
367 };
368
369 static const char* enqueue_block_with_barrier[] =
370 {
371 NL, "void block_fn(size_t tid, int mul, __global int* res)"
372 NL, "{"
373 NL, " if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);"
374 NL, " res[tid] = mul * 7 -21;"
375 NL, "}"
376 NL, ""
377 NL, "void loop_fn(size_t tid, int n, __global int* res)"
378 NL, "{"
379 NL, " while(n > 0)"
380 NL, " {"
381 NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
382 NL, " res[tid] = 0;"
383 NL, " --n;"
384 NL, " }"
385 NL, "}"
386 NL, ""
387 NL, "kernel void enqueue_block_with_barrier(__global int* res)"
388 NL, "{"
389 NL, " int multiplier = 3;"
390 NL, " size_t tid = get_global_id(0);"
391 NL, " queue_t def_q = get_default_queue();"
392 NL, " res[tid] = -1;"
393 NL, " size_t n = 256;"
394 NL, ""
395 NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
396 NL, ""
397 NL, " ndrange_t ndrange = ndrange_1D(n);"
398 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
399 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
400 NL, ""
401 NL, " void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };"
402 NL, ""
403 NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);"
404 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
405 NL, "}"
406 NL
407 };
408
409 static const char* enqueue_marker_with_block_event[] =
410 {
411 NL, "#define BLOCK_COMPLETED 1"
412 NL, "#define BLOCK_SUBMITTED 2"
413 NL, "#define CHECK_SUCCESS 0"
414 NL, ""
415 NL, "kernel void enqueue_marker_with_block_event(__global int* res)"
416 NL, "{"
417 NL, " size_t tid = get_global_id(0);"
418 NL, ""
419 NL, " clk_event_t user_evt = create_user_event();"
420 NL, ""
421 NL, " res[tid] = BLOCK_SUBMITTED;"
422 NL, " queue_t def_q = get_default_queue();"
423 NL, " ndrange_t ndrange = ndrange_1D(1);"
424 NL, ""
425 NL, " clk_event_t block_evt1;"
426 NL, " clk_event_t marker_evt;"
427 NL, ""
428 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,"
429 NL, " ^{"
430 NL, " res[tid] = BLOCK_COMPLETED;"
431 NL, " });"
432 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
433 NL, ""
434 NL, " enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);"
435 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
436 NL, ""
437 NL, " retain_event(marker_evt);"
438 NL, " release_event(marker_evt);"
439 NL, ""
440 NL, " //check block is not started"
441 NL, " if(res[tid] == BLOCK_SUBMITTED)"
442 NL, " {"
443 NL, " clk_event_t my_evt;"
444 NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
445 NL, " ^{"
446 NL, " //check block is completed"
447 NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
448 NL, " });"
449 NL, " release_event(my_evt);"
450 NL, " }"
451 NL, ""
452 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
453 NL, ""
454 NL, " release_event(block_evt1);"
455 NL, " release_event(marker_evt);"
456 NL, " release_event(user_evt);"
457 NL, "}"
458 NL
459 };
460
461 static const char* enqueue_marker_with_user_event[] =
462 {
463 NL, "#define BLOCK_COMPLETED 1"
464 NL, "#define BLOCK_SUBMITTED 2"
465 NL, "#define CHECK_SUCCESS 0"
466 NL, ""
467 NL, "kernel void enqueue_marker_with_user_event(__global int* res)"
468 NL, "{"
469 NL, " size_t tid = get_global_id(0);"
470 NL, " uint multiplier = 7;"
471 NL, ""
472 NL, " clk_event_t user_evt = create_user_event();"
473 NL, ""
474 NL, " res[tid] = BLOCK_SUBMITTED;"
475 NL, " queue_t def_q = get_default_queue();"
476 NL, " ndrange_t ndrange = ndrange_1D(1);"
477 NL, ""
478 NL, " clk_event_t marker_evt;"
479 NL, " clk_event_t block_evt;"
480 NL, ""
481 NL, " int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);"
482 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
483 NL, ""
484 NL, " retain_event(marker_evt);"
485 NL, " release_event(marker_evt);"
486 NL, ""
487 NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, "
488 NL, " ^{"
489 NL, " if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;"
490 NL, " });"
491 NL, ""
492 NL, " //check block is not started"
493 NL, " if(res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }"
494 NL, ""
495 NL, " set_user_event_status(user_evt, CL_COMPLETE);"
496 NL, ""
497 NL, " release_event(block_evt);"
498 NL, " release_event(marker_evt);"
499 NL, " release_event(user_evt);"
500 NL, "}"
501 NL
502 };
503
504 static const char* enqueue_marker_with_mixed_events[] =
505 {
506 NL, "#define BLOCK_COMPLETED 1"
507 NL, "#define BLOCK_SUBMITTED 2"
508 NL, "#define CHECK_SUCCESS 0"
509 NL, ""
510 NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)"
511 NL, "{"
512 NL, " size_t tid = get_global_id(0);"
513 NL, ""
514 NL, " clk_event_t mix_ev[2];"
515 NL, " mix_ev[0] = create_user_event();"
516 NL, ""
517 NL, " res[tid] = BLOCK_SUBMITTED;"
518 NL, " queue_t def_q = get_default_queue();"
519 NL, " ndrange_t ndrange = ndrange_1D(1);"
520 NL, ""
521 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],"
522 NL, " ^{"
523 NL, " res[tid] = BLOCK_COMPLETED;"
524 NL, " });"
525 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
526 NL, ""
527 NL, " clk_event_t marker_evt;"
528 NL, ""
529 NL, " enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);"
530 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
531 NL, ""
532 NL, " retain_event(marker_evt);"
533 NL, " release_event(marker_evt);"
534 NL, ""
535 NL, " //check block is not started"
536 NL, " if(res[tid] == BLOCK_SUBMITTED)"
537 NL, " {"
538 NL, " clk_event_t my_evt;"
539 NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
540 NL, " ^{"
541 NL, " //check block is completed"
542 NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
543 NL, " });"
544 NL, " release_event(my_evt);"
545 NL, " }"
546 NL, ""
547 NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);"
548 NL, ""
549 NL, " release_event(mix_ev[1]);"
550 NL, " release_event(marker_evt);"
551 NL, " release_event(mix_ev[0]);"
552 NL, "}"
553 NL
554 };
555
556 static const char* enqueue_block_with_mixed_events[] =
557 {
558 NL, "kernel void enqueue_block_with_mixed_events(__global int* res)"
559 NL, "{"
560 NL, " int enq_res;"
561 NL, " size_t tid = get_global_id(0);"
562 NL, " clk_event_t mix_ev[3];"
563 NL, " mix_ev[0] = create_user_event();"
564 NL, " queue_t def_q = get_default_queue();"
565 NL, " ndrange_t ndrange = ndrange_1D(1);"
566 NL, " res[tid] = -2;"
567 NL, ""
568 NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });"
569 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
570 NL, ""
571 NL, " enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);"
572 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
573 NL, ""
574 NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });"
575 NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }"
576 NL, ""
577 NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);"
578 NL, ""
579 NL, " release_event(mix_ev[0]);"
580 NL, " release_event(mix_ev[1]);"
581 NL, " release_event(mix_ev[2]);"
582 NL, "}"
583 NL
584 };
585
586 static const kernel_src sources_enqueue_block[] =
587 {
588 KERNEL(enqueue_simple_block),
589 // Block with local mem
590 KERNEL(enqueue_block_with_local_arg1),
591 KERNEL(enqueue_block_with_local_arg2),
592 KERNEL(enqueue_block_with_wait_list),
593 KERNEL(enqueue_block_with_wait_list_and_local_arg),
594 // WG size built-ins
595 KERNEL(enqueue_block_get_kernel_work_group_size),
596 KERNEL(enqueue_block_get_kernel_preferred_work_group_size_multiple),
597 // Event profiling info
598 KERNEL(enqueue_block_capture_event_profiling_info_after_execution),
599 KERNEL(enqueue_block_capture_event_profiling_info_before_execution),
600 // Marker
601 KERNEL(enqueue_marker_with_block_event),
602 KERNEL(enqueue_marker_with_user_event),
603 // Mixed events
604 KERNEL(enqueue_marker_with_mixed_events),
605 KERNEL(enqueue_block_with_mixed_events),
606 // Barrier
607 KERNEL(enqueue_block_with_barrier),
608
609 };
610 static const size_t num_kernels_enqueue_block = arr_size(sources_enqueue_block);
611
check_kernel_results(cl_int * results,cl_int len)612 static int check_kernel_results(cl_int* results, cl_int len)
613 {
614 for(cl_int i = 0; i < len; ++i)
615 {
616 if(results[i] != 0) return i;
617 }
618 return -1;
619 }
620
test_enqueue_block(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)621 int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
622 {
623 cl_uint i;
624 cl_int n, err_ret, res = 0;
625 clCommandQueueWrapper dev_queue;
626 cl_int kernel_results[MAX_GWS] = {0};
627
628 size_t ret_len;
629 cl_uint max_queues = 1;
630 cl_uint maxQueueSize = 0;
631 err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
632 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
633
634 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
635 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
636
637 size_t max_local_size = 1;
638 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
639 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
640
641 cl_queue_properties queue_prop_def[] =
642 {
643 CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT|CL_QUEUE_PROFILING_ENABLE,
644 CL_QUEUE_SIZE, maxQueueSize,
645 0
646 };
647
648 dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
649 test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
650
651 size_t global_size = MAX_GWS;
652 size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
653 if(gWimpyMode)
654 {
655 global_size = 4;
656 local_size = 2;
657 }
658
659 size_t failCnt = 0;
660 for(i = 0; i < num_kernels_enqueue_block; ++i)
661 {
662 if (!gKernelName.empty() && gKernelName != sources_enqueue_block[i].kernel_name)
663 continue;
664
665 log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block[i].kernel_name, i + 1, num_kernels_enqueue_block);
666 err_ret = run_n_kernel_args(context, queue, sources_enqueue_block[i].lines, sources_enqueue_block[i].num_lines, sources_enqueue_block[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), 0, NULL);
667 if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block[i].kernel_name)) { ++failCnt; res = -1; }
668 else if((n = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_enqueue_block[i].kernel_name, n, kernel_results[n])) res = -1;
669 else log_info("'%s' kernel is OK.\n", sources_enqueue_block[i].kernel_name);
670 }
671
672 if (failCnt > 0)
673 {
674 log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_enqueue_block);
675 }
676
677 return res;
678 }
679
680
681
682 #endif
683
684
685