• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20 
21 #include <vector>
22 
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26 
27 
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30 static const char* enqueue_simple_block[] =
31 {
32     NL, "void block_fn(size_t tid, int mul, __global int* res)"
33     NL, "{"
34     NL, "  res[tid] = mul * 7 - 21;"
35     NL, "}"
36     NL, ""
37     NL, "kernel void enqueue_simple_block(__global int* res)"
38     NL, "{"
39     NL, "  int multiplier = 3;"
40     NL, "  size_t tid = get_global_id(0);"
41     NL, ""
42     NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
43     NL, ""
44     NL, "  res[tid] = -1;"
45     NL, "  queue_t def_q = get_default_queue();"
46     NL, "  ndrange_t ndrange = ndrange_1D(1);"
47     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
48     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
49     NL, "}"
50     NL
51 };
52 
53 static const char* enqueue_block_with_local_arg1[] =
54 {
55     NL, "#define LOCAL_MEM_SIZE 10"
56     NL, ""
57     NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)"
58     NL, "{"
59     NL, "  for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
60     NL, "  {"
61     NL, "    tmp[i] = mul * 7 - 21;"
62     NL, "    res[tid] += tmp[i];"
63     NL, "  }"
64     NL, "  res[tid] += 2;"
65     NL, "}"
66     NL, ""
67     NL, "kernel void enqueue_block_with_local_arg1(__global int* res)"
68     NL, "{"
69     NL, "  int multiplier = 3;"
70     NL, "  size_t tid = get_global_id(0);"
71     NL, ""
72     NL, "  void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };"
73     NL, ""
74     NL, "  res[tid] = -2;"
75     NL, "  queue_t def_q = get_default_queue();"
76     NL, "  ndrange_t ndrange = ndrange_1D(1);"
77     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));"
78     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
79     NL, "}"
80     NL
81 };
82 
83 static const char* enqueue_block_with_local_arg2[] =
84 {
85     NL, "#define LOCAL_MEM_SIZE 10"
86     NL, ""
87     NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)"
88     NL, "{"
89     NL, "  for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
90     NL, "  {"
91     NL, "    tmp1[i]   = mul * 7 - 21;"
92     NL, "    tmp2[i].x = (float)(mul * 7 - 21);"
93     NL, "    tmp2[i].y = (float)(mul * 7 - 21);"
94     NL, "    tmp2[i].z = (float)(mul * 7 - 21);"
95     NL, "    tmp2[i].w = (float)(mul * 7 - 21);"
96     NL, ""
97     NL, "    res[tid] += tmp1[i];"
98     NL, "    res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);"
99     NL, "  }"
100     NL, "  res[tid] += 2;"
101     NL, "}"
102     NL, ""
103     NL, "kernel void enqueue_block_with_local_arg2(__global int* res)"
104     NL, "{"
105     NL, "  int multiplier = 3;"
106     NL, "  size_t tid = get_global_id(0);"
107     NL, ""
108     NL, "  void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)"
109     NL, "    { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };"
110     NL, ""
111     NL, "  res[tid] = -2;"
112     NL, "  queue_t def_q = get_default_queue();"
113     NL, "  ndrange_t ndrange = ndrange_1D(1);"
114     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));"
115     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
116     NL, "}"
117     NL
118 };
119 
120 static const char* enqueue_block_with_wait_list[] =
121 {
122     NL, "#define BLOCK_SUBMITTED 1"
123     NL, "#define BLOCK_COMPLETED 2"
124     NL, "#define CHECK_SUCCESS   0"
125     NL, ""
126     NL, "kernel void enqueue_block_with_wait_list(__global int* res)"
127     NL, "{"
128     NL, "  size_t tid = get_global_id(0);"
129     NL, ""
130     NL, "  clk_event_t user_evt = create_user_event();"
131     NL, ""
132     NL, "  res[tid] = BLOCK_SUBMITTED;"
133     NL, "  queue_t def_q = get_default_queue();"
134     NL, "  ndrange_t ndrange = ndrange_1D(1);"
135     NL, "  clk_event_t block_evt;"
136     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,"
137     NL, "  ^{"
138     NL, "      res[tid] = BLOCK_COMPLETED;"
139     NL, "   });"
140     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
141     NL, ""
142     NL, "  retain_event(block_evt);"
143     NL, "  release_event(block_evt);"
144     NL, ""
145     NL, "  //check block is not started"
146     NL, "  if(res[tid] == BLOCK_SUBMITTED)"
147     NL, "  {"
148     NL, "    clk_event_t my_evt;"
149     NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
150     NL, "    ^{"
151     NL, "       //check block is completed"
152     NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
153     NL, "     });"
154     NL, "    release_event(my_evt);"
155     NL, "  }"
156     NL, ""
157     NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
158     NL, ""
159     NL, "  release_event(user_evt);"
160     NL, "  release_event(block_evt);"
161     NL, "}"
162     NL
163 };
164 
165 static const char* enqueue_block_with_wait_list_and_local_arg[] =
166 {
167     NL, "#define LOCAL_MEM_SIZE 10"
168     NL, "#define BLOCK_COMPLETED 1"
169     NL, "#define BLOCK_SUBMITTED 2"
170     NL, "#define BLOCK_STARTED   3"
171     NL, "#define CHECK_SUCCESS   0"
172     NL, ""
173     NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)"
174     NL, "{"
175     NL, "  res[tid] = BLOCK_STARTED;"
176     NL, "  for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
177     NL, "  {"
178     NL, "    tmp[i] = mul * 7 - 21;"
179     NL, "    res[tid] += tmp[i];"
180     NL, "  }"
181     NL, "  if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;"
182     NL, "}"
183     NL, ""
184     NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)"
185     NL, "{"
186     NL, "  int multiplier = 3;"
187     NL, "  size_t tid = get_global_id(0);"
188     NL, "  clk_event_t user_evt = create_user_event();"
189     NL, ""
190     NL, "  res[tid] = BLOCK_SUBMITTED;"
191     NL, "  queue_t def_q = get_default_queue();"
192     NL, "  ndrange_t ndrange = ndrange_1D(1);"
193     NL, "  clk_event_t block_evt;"
194     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, "
195     NL, "    ^(__local void* buf) {"
196     NL, "       block_fn_local_arg(tid, multiplier, res, (__local int*)buf);"
197     NL, "     }, LOCAL_MEM_SIZE*sizeof(int));"
198     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
199     NL, ""
200     NL, "  retain_event(block_evt);"
201     NL, "  release_event(block_evt);"
202     NL, ""
203     NL, "  //check block is not started"
204     NL, "  if(res[tid] == BLOCK_SUBMITTED)"
205     NL, "  {"
206     NL, "    clk_event_t my_evt;"
207     NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
208     NL, "    ^{"
209     NL, "       //check block is completed"
210     NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
211     NL, "     });"
212     NL, "    release_event(my_evt);"
213     NL, "  }"
214     NL, ""
215     NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
216     NL, ""
217     NL, "  release_event(user_evt);"
218     NL, "  release_event(block_evt);"
219     NL, "}"
220     NL
221 };
222 
223 static const char* enqueue_block_get_kernel_work_group_size[] =
224 {
225     NL, "void block_fn(size_t tid, int mul, __global int* res)"
226     NL, "{"
227     NL, "  res[tid] = mul * 7 - 21;"
228     NL, "}"
229     NL, ""
230     NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)"
231     NL, "{"
232     NL, "    int multiplier = 3;"
233     NL, "    size_t tid = get_global_id(0);"
234     NL, ""
235     NL, "    void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
236     NL, ""
237     NL, "    size_t local_work_size = get_kernel_work_group_size(kernelBlock);"
238     NL, "    if (local_work_size <= 0){ res[tid] = -1; return; }"
239     NL, "    size_t global_work_size = local_work_size * 4;"
240     NL, ""
241     NL, "    res[tid] = -1;"
242     NL, "    queue_t q1 = get_default_queue();"
243     NL, "    ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
244     NL, ""
245     NL, "    int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
246     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
247     NL, "}"
248 };
249 
250 static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] =
251 {
252     NL, "void block_fn(size_t tid, int mul, __global int* res)"
253     NL, "{"
254     NL, "  res[tid] = mul * 7 - 21;"
255     NL, "}"
256     NL, ""
257     NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)"
258     NL, "{"
259     NL, "    int multiplier = 3;"
260     NL, "    size_t tid = get_global_id(0);"
261     NL, ""
262     NL, "    void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
263     NL, ""
264     NL, "    size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);"
265     NL, "    if (local_work_size <= 0){ res[tid] = -1; return; }"
266     NL, "    size_t global_work_size = local_work_size * 4;"
267     NL, ""
268     NL, "    res[tid] = -1;"
269     NL, "    queue_t q1 = get_default_queue();"
270     NL, "    ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
271     NL, ""
272     NL, "    int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
273     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
274     NL, "}"
275 };
276 
277 static const char* enqueue_block_capture_event_profiling_info_after_execution[] =
278 {
279     NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
280     NL, ""
281     NL, "__global ulong value[MAX_GWS*2] = {0};"
282     NL, ""
283     NL, "void block_fn(size_t tid, __global int* res)"
284     NL, "{"
285     NL, "    res[tid] = -2;"
286     NL, "}"
287     NL, ""
288     NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)"
289     NL, "{"
290     NL, "    capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
291     NL, ""
292     NL, "    if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] =  0;"
293     NL, "    else                                        res[tid] = -4;"
294     NL, "    release_event(evt);"
295     NL, "}"
296     NL, ""
297     NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)"
298     NL, "{"
299     NL, "    size_t tid = get_global_id(0);"
300     NL, ""
301     NL, "    res[tid] = -1;"
302     NL, "    queue_t def_q = get_default_queue();"
303     NL, "    ndrange_t ndrange = ndrange_1D(1);"
304     NL, "    clk_event_t block_evt1;"
305     NL, ""
306     NL, "    void (^kernelBlock)(void)  = ^{ block_fn (tid, res);                   };"
307     NL, ""
308     NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);"
309     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
310     NL, ""
311     NL, "    void (^checkBlock) (void)  = ^{ check_res(tid, block_evt1, res);      };"
312     NL, ""
313     NL, "    enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);"
314     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
315     NL, "}"
316     NL
317 };
318 
319 static const char* enqueue_block_capture_event_profiling_info_before_execution[] =
320 {
321     NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
322     NL, ""
323     NL, "__global ulong value[MAX_GWS*2] = {0};"
324     NL, ""
325     NL, "void block_fn(size_t tid, __global int* res)"
326     NL, "{"
327     NL, "    res[tid] = -2;"
328     NL, "}"
329     NL, ""
330     NL, "void check_res(size_t tid, const ulong *value, __global int* res)"
331     NL, "{"
332     NL, "    if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] =  0;"
333     NL, "    else                                        res[tid] = -4;"
334     NL, "}"
335     NL, ""
336     NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)"
337     NL, "{"
338     NL, "    int multiplier = 3;"
339     NL, "    size_t tid = get_global_id(0);"
340     NL, "    clk_event_t user_evt = create_user_event();"
341     NL, ""
342     NL, "    res[tid] = -1;"
343     NL, "    queue_t def_q = get_default_queue();"
344     NL, "    ndrange_t ndrange = ndrange_1D(1);"
345     NL, "    clk_event_t block_evt1;"
346     NL, "    clk_event_t block_evt2;"
347     NL, ""
348     NL, "    void (^kernelBlock)(void)  = ^{ block_fn (tid, res);                   };"
349     NL, ""
350     NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);"
351     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
352     NL, ""
353     NL, "    capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
354     NL, ""
355     NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
356     NL, ""
357     NL, "    void (^checkBlock) (void)  = ^{ check_res(tid, &value, res);      };"
358     NL, ""
359     NL, "    enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);"
360     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
361     NL, ""
362     NL, "    release_event(user_evt);"
363     NL, "    release_event(block_evt1);"
364     NL, "    release_event(block_evt2);"
365     NL, "}"
366     NL
367 };
368 
369 static const char* enqueue_block_with_barrier[] =
370 {
371     NL, "void block_fn(size_t tid, int mul, __global int* res)"
372     NL, "{"
373     NL, "  if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);"
374     NL, "  res[tid] = mul * 7 -21;"
375     NL, "}"
376     NL, ""
377     NL, "void loop_fn(size_t tid, int n, __global int* res)"
378     NL, "{"
379     NL, "  while(n > 0)"
380     NL, "  {"
381     NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
382     NL, "    res[tid] = 0;"
383     NL, "    --n;"
384     NL, "  }"
385     NL, "}"
386     NL, ""
387     NL, "kernel void enqueue_block_with_barrier(__global int* res)"
388     NL, "{"
389     NL, "  int multiplier = 3;"
390     NL, "  size_t tid = get_global_id(0);"
391     NL, "  queue_t def_q = get_default_queue();"
392     NL, "  res[tid] = -1;"
393     NL, "  size_t n = 256;"
394     NL, ""
395     NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
396     NL, ""
397     NL, "  ndrange_t ndrange = ndrange_1D(n);"
398     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
399     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
400     NL, ""
401     NL, "  void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };"
402     NL, ""
403     NL, "  enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);"
404     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
405     NL, "}"
406     NL
407 };
408 
409 static const char* enqueue_marker_with_block_event[] =
410 {
411     NL, "#define BLOCK_COMPLETED 1"
412     NL, "#define BLOCK_SUBMITTED 2"
413     NL, "#define CHECK_SUCCESS   0"
414     NL, ""
415     NL, "kernel void enqueue_marker_with_block_event(__global int* res)"
416     NL, "{"
417     NL, "  size_t tid = get_global_id(0);"
418     NL, ""
419     NL, "  clk_event_t user_evt = create_user_event();"
420     NL, ""
421     NL, "  res[tid] = BLOCK_SUBMITTED;"
422     NL, "  queue_t def_q = get_default_queue();"
423     NL, "  ndrange_t ndrange = ndrange_1D(1);"
424     NL, ""
425     NL, "  clk_event_t block_evt1;"
426     NL, "  clk_event_t marker_evt;"
427     NL, ""
428     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,"
429     NL, "  ^{"
430     NL, "     res[tid] = BLOCK_COMPLETED;"
431     NL, "   });"
432     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
433     NL, ""
434     NL, "  enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);"
435     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
436     NL, ""
437     NL, "  retain_event(marker_evt);"
438     NL, "  release_event(marker_evt);"
439     NL, ""
440     NL, "  //check block is not started"
441     NL, "  if(res[tid] == BLOCK_SUBMITTED)"
442     NL, "  {"
443     NL, "    clk_event_t my_evt;"
444     NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
445     NL, "    ^{"
446     NL, "       //check block is completed"
447     NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
448     NL, "     });"
449     NL, "    release_event(my_evt);"
450     NL, "  }"
451     NL, ""
452     NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
453     NL, ""
454     NL, "  release_event(block_evt1);"
455     NL, "  release_event(marker_evt);"
456     NL, "  release_event(user_evt);"
457     NL, "}"
458     NL
459 };
460 
461 static const char* enqueue_marker_with_user_event[] =
462 {
463     NL, "#define BLOCK_COMPLETED 1"
464     NL, "#define BLOCK_SUBMITTED 2"
465     NL, "#define CHECK_SUCCESS   0"
466     NL, ""
467     NL, "kernel void enqueue_marker_with_user_event(__global int* res)"
468     NL, "{"
469     NL, "  size_t tid = get_global_id(0);"
470     NL, "  uint multiplier = 7;"
471     NL, ""
472     NL, "  clk_event_t user_evt = create_user_event();"
473     NL, ""
474     NL, "  res[tid] = BLOCK_SUBMITTED;"
475     NL, "  queue_t def_q = get_default_queue();"
476     NL, "  ndrange_t ndrange = ndrange_1D(1);"
477     NL, ""
478     NL, "  clk_event_t marker_evt;"
479     NL, "  clk_event_t block_evt;"
480     NL, ""
481     NL, "  int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);"
482     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
483     NL, ""
484     NL, "  retain_event(marker_evt);"
485     NL, "  release_event(marker_evt);"
486     NL, ""
487     NL, "  enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, "
488     NL, "  ^{"
489     NL, "     if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;"
490     NL, "   });"
491     NL, ""
492     NL, "  //check block is not started"
493     NL, "  if(res[tid] != BLOCK_SUBMITTED)  { res[tid] = -2; return; }"
494     NL, ""
495     NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
496     NL, ""
497     NL, "  release_event(block_evt);"
498     NL, "  release_event(marker_evt);"
499     NL, "  release_event(user_evt);"
500     NL, "}"
501     NL
502 };
503 
504 static const char* enqueue_marker_with_mixed_events[] =
505 {
506     NL, "#define BLOCK_COMPLETED 1"
507     NL, "#define BLOCK_SUBMITTED 2"
508     NL, "#define CHECK_SUCCESS   0"
509     NL, ""
510     NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)"
511     NL, "{"
512     NL, "  size_t tid = get_global_id(0);"
513     NL, ""
514     NL, "  clk_event_t mix_ev[2];"
515     NL, "  mix_ev[0] = create_user_event();"
516     NL, ""
517     NL, "  res[tid] = BLOCK_SUBMITTED;"
518     NL, "  queue_t def_q = get_default_queue();"
519     NL, "  ndrange_t ndrange = ndrange_1D(1);"
520     NL, ""
521     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],"
522     NL, "  ^{"
523     NL, "     res[tid] = BLOCK_COMPLETED;"
524     NL, "   });"
525     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
526     NL, ""
527     NL, "  clk_event_t marker_evt;"
528     NL, ""
529     NL, "  enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);"
530     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
531     NL, ""
532     NL, "  retain_event(marker_evt);"
533     NL, "  release_event(marker_evt);"
534     NL, ""
535     NL, "  //check block is not started"
536     NL, "  if(res[tid] == BLOCK_SUBMITTED)"
537     NL, "  {"
538     NL, "    clk_event_t my_evt;"
539     NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
540     NL, "    ^{"
541     NL, "       //check block is completed"
542     NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
543     NL, "     });"
544     NL, "    release_event(my_evt);"
545     NL, "  }"
546     NL, ""
547     NL, "  set_user_event_status(mix_ev[0], CL_COMPLETE);"
548     NL, ""
549     NL, "  release_event(mix_ev[1]);"
550     NL, "  release_event(marker_evt);"
551     NL, "  release_event(mix_ev[0]);"
552     NL, "}"
553     NL
554 };
555 
556 static const char* enqueue_block_with_mixed_events[] =
557 {
558     NL, "kernel void enqueue_block_with_mixed_events(__global int* res)"
559     NL, "{"
560     NL, "  int enq_res;"
561     NL, "  size_t tid = get_global_id(0);"
562     NL, "  clk_event_t mix_ev[3];"
563     NL, "  mix_ev[0] = create_user_event();"
564     NL, "  queue_t def_q = get_default_queue();"
565     NL, "  ndrange_t ndrange = ndrange_1D(1);"
566     NL, "  res[tid] = -2;"
567     NL, ""
568     NL, "  enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });"
569     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
570     NL, ""
571     NL, "  enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);"
572     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
573     NL, ""
574     NL, "  enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });"
575     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }"
576     NL, ""
577     NL, "  set_user_event_status(mix_ev[0], CL_COMPLETE);"
578     NL, ""
579     NL, "  release_event(mix_ev[0]);"
580     NL, "  release_event(mix_ev[1]);"
581     NL, "  release_event(mix_ev[2]);"
582     NL, "}"
583     NL
584 };
585 
586 static const kernel_src sources_enqueue_block[] =
587 {
588     KERNEL(enqueue_simple_block),
589     // Block with local mem
590     KERNEL(enqueue_block_with_local_arg1),
591     KERNEL(enqueue_block_with_local_arg2),
592     KERNEL(enqueue_block_with_wait_list),
593     KERNEL(enqueue_block_with_wait_list_and_local_arg),
594     // WG size built-ins
595     KERNEL(enqueue_block_get_kernel_work_group_size),
596     KERNEL(enqueue_block_get_kernel_preferred_work_group_size_multiple),
597     // Event profiling info
598     KERNEL(enqueue_block_capture_event_profiling_info_after_execution),
599     KERNEL(enqueue_block_capture_event_profiling_info_before_execution),
600     // Marker
601     KERNEL(enqueue_marker_with_block_event),
602     KERNEL(enqueue_marker_with_user_event),
603     // Mixed events
604     KERNEL(enqueue_marker_with_mixed_events),
605     KERNEL(enqueue_block_with_mixed_events),
606     // Barrier
607     KERNEL(enqueue_block_with_barrier),
608 
609 };
610 static const size_t num_kernels_enqueue_block = arr_size(sources_enqueue_block);
611 
check_kernel_results(cl_int * results,cl_int len)612 static int check_kernel_results(cl_int* results, cl_int len)
613 {
614     for(cl_int i = 0; i < len; ++i)
615     {
616         if(results[i] != 0) return i;
617     }
618     return -1;
619 }
620 
test_enqueue_block(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)621 int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
622 {
623     cl_uint i;
624     cl_int n, err_ret, res = 0;
625     clCommandQueueWrapper dev_queue;
626     cl_int kernel_results[MAX_GWS] = {0};
627 
628     size_t ret_len;
629     cl_uint max_queues = 1;
630     cl_uint maxQueueSize = 0;
631     err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
632     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
633 
634     err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
635     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
636 
637     size_t max_local_size = 1;
638     err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
639     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
640 
641     cl_queue_properties queue_prop_def[] =
642     {
643         CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT|CL_QUEUE_PROFILING_ENABLE,
644         CL_QUEUE_SIZE, maxQueueSize,
645         0
646     };
647 
648     dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
649     test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
650 
651     size_t global_size = MAX_GWS;
652     size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
653     if(gWimpyMode)
654     {
655         global_size = 4;
656         local_size = 2;
657     }
658 
659     size_t failCnt = 0;
660     for(i = 0; i < num_kernels_enqueue_block; ++i)
661     {
662         if (!gKernelName.empty() && gKernelName != sources_enqueue_block[i].kernel_name)
663             continue;
664 
665         log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block[i].kernel_name, i + 1, num_kernels_enqueue_block);
666         err_ret = run_n_kernel_args(context, queue, sources_enqueue_block[i].lines, sources_enqueue_block[i].num_lines, sources_enqueue_block[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), 0, NULL);
667         if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block[i].kernel_name)) { ++failCnt; res = -1; }
668         else if((n = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_enqueue_block[i].kernel_name, n, kernel_results[n])) res = -1;
669         else log_info("'%s' kernel is OK.\n", sources_enqueue_block[i].kernel_name);
670     }
671 
672     if (failCnt > 0)
673     {
674       log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_enqueue_block);
675     }
676 
677     return res;
678 }
679 
680 
681 
682 #endif
683 
684 
685