• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20 
21 #include <vector>
22 
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26 
27 
28 #ifdef CL_VERSION_2_0
29 
30 static int gNestingLevel = 4;
31 extern int gWimpyMode;
32 
33 static const char* enqueue_nested_blocks_single[] =
34 {
35     NL, "void block_fn(__global int* res, int level)"
36     NL, "{"
37     NL, "  size_t tid = get_global_id(0);"
38     NL, "  queue_t def_q = get_default_queue();"
39     NL, "  ndrange_t ndrange = ndrange_1D(3);"
40     NL, "  if(--level < 0) return;"
41     NL, ""
42     NL, "  void (^kernelBlock)(void) = ^{ block_fn(res, level); };"
43     NL, ""
44     NL, "  // Only 1 work-item enqueues block"
45     NL, "  if(tid == 1)"
46     NL, "  {"
47     NL, "    res[tid]++;"
48     NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
49     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
50     NL, "  }"
51     NL, "}"
52     NL, ""
53     NL, "kernel void enqueue_nested_blocks_single(__global int* res, int level)"
54     NL, "{"
55     NL, "  block_fn(res, level);"
56     NL, "}"
57     NL
58 };
59 
60 static const char* enqueue_nested_blocks_some_eq[] =
61 {
62     NL, "void block_fn(int level, __global int* res)"
63     NL, "{"
64     NL, "  size_t tid = get_global_id(0);"
65     NL, "  queue_t def_q = get_default_queue();"
66     NL, "  ndrange_t ndrange = ndrange_1D(10);"
67     NL, "  if(--level < 0) return;"
68     NL, ""
69     NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
70     NL, ""
71     NL, "  // Some work-items enqueues nested blocks with the same level"
72     NL, "  if(tid < (get_global_size(0) >> 1))"
73     NL, "  {"
74     NL, "    atomic_inc(&res[tid]);"
75     NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
76     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
77     NL, "  }"
78     NL, "}"
79     NL, ""
80     NL, "kernel void enqueue_nested_blocks_some_eq(__global int* res, int level)"
81     NL, "{"
82     NL, "  block_fn(level, res);"
83     NL, "}"
84     NL
85 };
86 
87 static const char* enqueue_nested_blocks_some_diff[] =
88 {
89     NL, "void block_fn(int level, __global int* res)"
90     NL, "{"
91     NL, "  size_t tid = get_global_id(0);"
92     NL, "  queue_t def_q = get_default_queue();"
93     NL, "  ndrange_t ndrange = ndrange_1D(10);"
94     NL, "  if(--level < 0) return;"
95     NL, ""
96     NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
97     NL, ""
98     NL, "  // Some work-items enqueues nested blocks with different levels"
99     NL, "  if(tid % 2)"
100     NL, "  {"
101     NL, "    atomic_inc(&res[tid]);"
102     NL, "    if(level >= tid)"
103     NL, "    {"
104     NL, "      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
105     NL, "      if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
106     NL, "    }"
107     NL, "  }"
108     NL, "}"
109     NL, ""
110     NL, "kernel void enqueue_nested_blocks_some_diff(__global int* res, int level)"
111     NL, "{"
112     NL, "  block_fn(level, res);"
113     NL, "}"
114     NL
115 };
116 
117 static const char* enqueue_nested_blocks_all_eq[] =
118 {
119     NL, "void block_fn(int level, __global int* res)"
120     NL, "{"
121     NL, "  size_t tid = get_global_id(0);"
122     NL, "  queue_t def_q = get_default_queue();"
123     NL, "  ndrange_t ndrange = ndrange_1D(4);"
124     NL, "  if(--level < 0) return;"
125     NL, ""
126     NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
127     NL, ""
128     NL, "  // All work-items enqueues nested blocks with the same level"
129     NL, "  atomic_inc(&res[tid]);"
130     NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
131     NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
132     NL, "}"
133     NL, ""
134     NL, "kernel void enqueue_nested_blocks_all_eq(__global int* res, int level)"
135     NL, "{"
136     NL, "  block_fn(level, res);"
137     NL, "}"
138     NL
139 };
140 
141 static const char* enqueue_nested_blocks_all_diff[] =
142 {
143     NL, "void block_fn(int level, __global int* res)"
144     NL, "{"
145     NL, "  size_t tid = get_global_id(0);"
146     NL, "  queue_t def_q = get_default_queue();"
147     NL, "  ndrange_t ndrange = ndrange_1D(10);"
148     NL, "  if(--level < 0) return;"
149     NL, ""
150     NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
151     NL, ""
152     NL, "  // All work-items enqueues nested blocks with different levels"
153     NL, "  atomic_inc(&res[tid]);"
154     NL, "  if(level >= tid)"
155     NL, "  {"
156     NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
157     NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
158     NL, "  }"
159     NL, "}"
160     NL, ""
161     NL, "kernel void enqueue_nested_blocks_all_diff(__global int* res, int level)"
162     NL, "{"
163     NL, "  block_fn(level, res);"
164     NL, "}"
165     NL
166 };
167 
check_single(cl_int * results,cl_int len,cl_int nesting_level)168 static int check_single(cl_int* results, cl_int len, cl_int nesting_level)
169 {
170     int i, fail = -1;
171     const cl_uint tid = 1;
172 
173     for(i = 0; i < len; ++i)
174     {
175         if(i != tid && results[i] != 0) { fail = i; break; }
176         if(i == tid && results[i] != nesting_level) { fail = i; break; }
177     }
178     return fail;
179 }
180 
generate_reference_some_eq(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)181 void generate_reference_some_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
182 {
183     size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
184     if(--nesting_level < 0) return;
185 
186     for (size_t tid = 0; tid < globalWorkSize; ++tid)
187     {
188         if (tid < (globalWorkSize >> 1))
189         {
190             ++referenceResults[tid];
191             generate_reference_some_eq(referenceResults, len, nesting_level);
192         }
193     }
194 }
195 
check_some_eq(cl_int * results,cl_int len,cl_int nesting_level)196 static int check_some_eq(cl_int* results, cl_int len, cl_int nesting_level)
197 {
198     int i, fail = -1;
199     std::vector<cl_int> referenceResults(len, 0);
200     generate_reference_some_eq(referenceResults, len, nesting_level);
201 
202     for(i = 0; i < len; ++i)
203     {
204         if (results[i] != referenceResults[i]) { fail = i; break; }
205     }
206 
207     return fail;
208 }
209 
generate_reference_some_diff(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)210 void generate_reference_some_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
211 {
212     size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
213     if(--nesting_level < 0) return;
214 
215     for (size_t tid = 0; tid < globalWorkSize; ++tid)
216     {
217         if (tid % 2)
218         {
219             ++referenceResults[tid];
220             if (nesting_level >= tid)
221             {
222                 generate_reference_some_diff(referenceResults, len, nesting_level);
223             }
224         }
225     }
226 }
227 
check_some_diff(cl_int * results,cl_int len,cl_int nesting_level)228 static int check_some_diff(cl_int* results, cl_int len, cl_int nesting_level)
229 {
230     int i, fail = -1;
231     std::vector<cl_int> referenceResults(len, 0);
232     generate_reference_some_diff(referenceResults, len, nesting_level);
233 
234     for(i = 0; i < len; ++i)
235     {
236         if (results[i] != referenceResults[i]) { fail = i; break; }
237     }
238 
239     return fail;
240 }
241 
generate_reference_all_eq(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)242 void generate_reference_all_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
243 {
244     size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 4;
245     if(--nesting_level < 0) return;
246 
247     for (size_t tid = 0; tid < globalWorkSize; ++tid)
248     {
249         ++referenceResults[tid];
250         generate_reference_all_eq(referenceResults, len, nesting_level);
251     }
252 }
253 
check_all_eq(cl_int * results,cl_int len,cl_int nesting_level)254 static int check_all_eq(cl_int* results, cl_int len, cl_int nesting_level)
255 {
256     int i, fail = -1;
257     std::vector<cl_int> referenceResults(len, 0);
258     generate_reference_all_eq(referenceResults, len, nesting_level);
259 
260     for(i = 0; i < len; ++i)
261     {
262         if (results[i] != referenceResults[i]) { fail = i; break; }
263     }
264 
265     return fail;
266 }
267 
generate_reference_all_diff(std::vector<cl_int> & referenceResults,cl_int len,cl_int nesting_level)268 void generate_reference_all_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
269 {
270     size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
271     if(--nesting_level < 0) return;
272 
273     for (size_t tid = 0; tid < globalWorkSize; ++tid)
274     {
275         ++referenceResults[tid];
276         if (nesting_level >= tid)
277         {
278             generate_reference_all_diff(referenceResults, len, nesting_level);
279         }
280     }
281 }
282 
check_all_diff(cl_int * results,cl_int len,cl_int nesting_level)283 static int check_all_diff(cl_int* results, cl_int len, cl_int nesting_level)
284 {
285     int i, fail = -1;
286     std::vector<cl_int> referenceResults(len, 0);
287     generate_reference_all_diff(referenceResults, len, nesting_level);
288 
289     for(i = 0; i < len; ++i)
290     {
291         if (results[i] != referenceResults[i]) { fail = i; break; }
292     }
293 
294     return fail;
295 }
296 
297 static const kernel_src_check sources_nested_blocks[] =
298 {
299     { KERNEL(enqueue_nested_blocks_single), check_single },
300     { KERNEL(enqueue_nested_blocks_some_eq), check_some_eq },
301     { KERNEL(enqueue_nested_blocks_some_diff), check_some_diff },
302     { KERNEL(enqueue_nested_blocks_all_eq), check_all_eq },
303     { KERNEL(enqueue_nested_blocks_all_diff), check_all_diff }
304 };
305 
test_enqueue_nested_blocks(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)306 int test_enqueue_nested_blocks(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
307 {
308     cl_uint i, k;
309     cl_int err_ret, res = 0;
310     clCommandQueueWrapper dev_queue;
311     const size_t MAX_GLOBAL_WORK_SIZE = MAX_GWS / 4;
312     cl_int kernel_results[MAX_GLOBAL_WORK_SIZE] = {0};
313 
314     if(gWimpyMode)
315     {
316         gNestingLevel = 2;
317         vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
318         vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
319     }
320 
321     size_t ret_len;
322     cl_uint max_queues = 1;
323     cl_uint maxQueueSize = 0;
324     err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
325     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
326 
327     err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
328     test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
329 
330     cl_queue_properties queue_prop_def[] =
331     {
332         CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
333         CL_QUEUE_SIZE, maxQueueSize,
334         0
335     };
336 
337     dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
338     test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
339 
340     kernel_arg args[] =
341     {
342         { sizeof(cl_int), &gNestingLevel }
343     };
344 
345     size_t failCnt = 0;
346     for(k = 0; k < arr_size(sources_nested_blocks); ++k)
347     {
348         if (!gKernelName.empty() && gKernelName != sources_nested_blocks[k].src.kernel_name)
349             continue;
350 
351         log_info("Running '%s' kernel (%d of %d) ...\n", sources_nested_blocks[k].src.kernel_name, k + 1, arr_size(sources_nested_blocks));
352         for(i = 0; i < MAX_GLOBAL_WORK_SIZE; ++i) kernel_results[i] = 0;
353 
354         err_ret = run_n_kernel_args(context, queue, sources_nested_blocks[k].src.lines, sources_nested_blocks[k].src.num_lines, sources_nested_blocks[k].src.kernel_name, 0, MAX_GLOBAL_WORK_SIZE, kernel_results, sizeof(kernel_results), arr_size(args), args);
355         if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { res = -1; continue ; }
356 
357         //check results
358         int fail = sources_nested_blocks[k].check(kernel_results, MAX_GLOBAL_WORK_SIZE, gNestingLevel);
359 
360         if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { ++failCnt; res = -1; continue; }
361         else if(fail >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_nested_blocks[k].src.kernel_name, fail, kernel_results[fail])) { ++failCnt; res = -1; continue; }
362         else log_info("'%s' kernel is OK.\n", sources_nested_blocks[k].src.kernel_name);
363     }
364 
365     if (failCnt > 0)
366     {
367         log_error("ERROR: %d of %d kernels failed.\n", failCnt, arr_size(sources_nested_blocks));
368     }
369 
370     return res;
371 }
372 
373 #endif
374 
375