• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef SUBHELPERS_H
17 #define SUBHELPERS_H
18 
19 #include "testHarness.h"
20 #include "kernelHelpers.h"
21 #include "typeWrappers.h"
22 
23 #include <limits>
24 #include <vector>
25 
26 class subgroupsAPI {
27 public:
subgroupsAPI(cl_platform_id platform,bool useCoreSubgroups)28     subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups)
29     {
30         static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE
31                           == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
32                       "Enums have to be the same");
33         static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
34                           == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
35                       "Enums have to be the same");
36         if (useCoreSubgroups)
37         {
38             _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo;
39             clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo";
40         }
41         else
42         {
43             _clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn)
44                 clGetExtensionFunctionAddressForPlatform(
45                     platform, "clGetKernelSubGroupInfoKHR");
46             clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR";
47         }
48     }
clGetKernelSubGroupInfo_ptr()49     clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr()
50     {
51         return _clGetKernelSubGroupInfo_ptr;
52     }
53     const char *clGetKernelSubGroupInfo_name;
54 
55 private:
56     clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr;
57 };
58 
59 // Some template helpers
60 template <typename Ty> struct TypeName;
61 template <> struct TypeName<cl_half>
62 {
63     static const char *val() { return "half"; }
64 };
65 template <> struct TypeName<cl_uint>
66 {
67     static const char *val() { return "uint"; }
68 };
69 template <> struct TypeName<cl_int>
70 {
71     static const char *val() { return "int"; }
72 };
73 template <> struct TypeName<cl_ulong>
74 {
75     static const char *val() { return "ulong"; }
76 };
77 template <> struct TypeName<cl_long>
78 {
79     static const char *val() { return "long"; }
80 };
81 template <> struct TypeName<float>
82 {
83     static const char *val() { return "float"; }
84 };
85 template <> struct TypeName<double>
86 {
87     static const char *val() { return "double"; }
88 };
89 
90 template <typename Ty> struct TypeDef;
91 template <> struct TypeDef<cl_half>
92 {
93     static const char *val() { return "typedef half Type;\n"; }
94 };
95 template <> struct TypeDef<cl_uint>
96 {
97     static const char *val() { return "typedef uint Type;\n"; }
98 };
99 template <> struct TypeDef<cl_int>
100 {
101     static const char *val() { return "typedef int Type;\n"; }
102 };
103 template <> struct TypeDef<cl_ulong>
104 {
105     static const char *val() { return "typedef ulong Type;\n"; }
106 };
107 template <> struct TypeDef<cl_long>
108 {
109     static const char *val() { return "typedef long Type;\n"; }
110 };
111 template <> struct TypeDef<float>
112 {
113     static const char *val() { return "typedef float Type;\n"; }
114 };
115 template <> struct TypeDef<double>
116 {
117     static const char *val() { return "typedef double Type;\n"; }
118 };
119 
120 template <typename Ty, int Which> struct TypeIdentity;
121 // template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return
122 // (cl_half)0.0; } }; template <> struct TypeIdentity<cl_half,0> { static
123 // cl_half val() { return -(cl_half)65536.0; } }; template <> struct
124 // TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; }
125 // };
126 
127 template <> struct TypeIdentity<cl_uint, 0>
128 {
129     static cl_uint val() { return (cl_uint)0; }
130 };
131 template <> struct TypeIdentity<cl_uint, 1>
132 {
133     static cl_uint val() { return (cl_uint)0; }
134 };
135 template <> struct TypeIdentity<cl_uint, 2>
136 {
137     static cl_uint val() { return (cl_uint)0xffffffff; }
138 };
139 
140 template <> struct TypeIdentity<cl_int, 0>
141 {
142     static cl_int val() { return (cl_int)0; }
143 };
144 template <> struct TypeIdentity<cl_int, 1>
145 {
146     static cl_int val() { return (cl_int)0x80000000; }
147 };
148 template <> struct TypeIdentity<cl_int, 2>
149 {
150     static cl_int val() { return (cl_int)0x7fffffff; }
151 };
152 
153 template <> struct TypeIdentity<cl_ulong, 0>
154 {
155     static cl_ulong val() { return (cl_ulong)0; }
156 };
157 template <> struct TypeIdentity<cl_ulong, 1>
158 {
159     static cl_ulong val() { return (cl_ulong)0; }
160 };
161 template <> struct TypeIdentity<cl_ulong, 2>
162 {
163     static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; }
164 };
165 
166 template <> struct TypeIdentity<cl_long, 0>
167 {
168     static cl_long val() { return (cl_long)0; }
169 };
170 template <> struct TypeIdentity<cl_long, 1>
171 {
172     static cl_long val() { return (cl_long)0x8000000000000000ULL; }
173 };
174 template <> struct TypeIdentity<cl_long, 2>
175 {
176     static cl_long val() { return (cl_long)0x7fffffffffffffffULL; }
177 };
178 
179 
180 template <> struct TypeIdentity<float, 0>
181 {
182     static float val() { return 0.F; }
183 };
184 template <> struct TypeIdentity<float, 1>
185 {
186     static float val() { return -std::numeric_limits<float>::infinity(); }
187 };
188 template <> struct TypeIdentity<float, 2>
189 {
190     static float val() { return std::numeric_limits<float>::infinity(); }
191 };
192 
193 template <> struct TypeIdentity<double, 0>
194 {
195     static double val() { return 0.L; }
196 };
197 
198 template <> struct TypeIdentity<double, 1>
199 {
200     static double val() { return -std::numeric_limits<double>::infinity(); }
201 };
202 template <> struct TypeIdentity<double, 2>
203 {
204     static double val() { return std::numeric_limits<double>::infinity(); }
205 };
206 
207 template <typename Ty> struct TypeCheck;
208 template <> struct TypeCheck<cl_uint>
209 {
210     static bool val(cl_device_id) { return true; }
211 };
212 template <> struct TypeCheck<cl_int>
213 {
214     static bool val(cl_device_id) { return true; }
215 };
216 
217 static bool int64_ok(cl_device_id device)
218 {
219     char profile[128];
220     int error;
221 
222     error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
223                             (void *)&profile, NULL);
224     if (error)
225     {
226         log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
227         return false;
228     }
229 
230     if (strcmp(profile, "EMBEDDED_PROFILE") == 0)
231         return is_extension_available(device, "cles_khr_int64");
232 
233     return true;
234 }
235 
236 template <> struct TypeCheck<cl_ulong>
237 {
238     static bool val(cl_device_id device) { return int64_ok(device); }
239 };
240 template <> struct TypeCheck<cl_long>
241 {
242     static bool val(cl_device_id device) { return int64_ok(device); }
243 };
244 template <> struct TypeCheck<cl_float>
245 {
246     static bool val(cl_device_id) { return true; }
247 };
248 template <> struct TypeCheck<cl_half>
249 {
250     static bool val(cl_device_id device)
251     {
252         return is_extension_available(device, "cl_khr_fp16");
253     }
254 };
255 template <> struct TypeCheck<double>
256 {
257     static bool val(cl_device_id device)
258     {
259         int error;
260         cl_device_fp_config c;
261         error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
262                                 (void *)&c, NULL);
263         if (error)
264         {
265             log_info(
266                 "clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
267             return false;
268         }
269         return c != 0;
270     }
271 };
272 
273 
274 // Run a test kernel to compute the result of a built-in on an input
275 static int run_kernel(cl_context context, cl_command_queue queue,
276                       cl_kernel kernel, size_t global, size_t local,
277                       void *idata, size_t isize, void *mdata, size_t msize,
278                       void *odata, size_t osize, size_t tsize = 0)
279 {
280     clMemWrapper in;
281     clMemWrapper xy;
282     clMemWrapper out;
283     clMemWrapper tmp;
284     int error;
285 
286     in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
287     test_error(error, "clCreateBuffer failed");
288 
289     xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
290     test_error(error, "clCreateBuffer failed");
291 
292     out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
293     test_error(error, "clCreateBuffer failed");
294 
295     if (tsize)
296     {
297         tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
298                              tsize, NULL, &error);
299         test_error(error, "clCreateBuffer failed");
300     }
301 
302     error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
303     test_error(error, "clSetKernelArg failed");
304 
305     error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
306     test_error(error, "clSetKernelArg failed");
307 
308     error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
309     test_error(error, "clSetKernelArg failed");
310 
311     if (tsize)
312     {
313         error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
314         test_error(error, "clSetKernelArg failed");
315     }
316 
317     error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL,
318                                  NULL);
319     test_error(error, "clEnqueueWriteBuffer failed");
320 
321     error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
322                                    NULL, NULL);
323     test_error(error, "clEnqueueNDRangeKernel failed");
324 
325     error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
326                                 NULL);
327     test_error(error, "clEnqueueReadBuffer failed");
328 
329     error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL,
330                                 NULL);
331     test_error(error, "clEnqueueReadBuffer failed");
332 
333     error = clFinish(queue);
334     test_error(error, "clFinish failed");
335 
336     return error;
337 }
338 
339 // Driver for testing a single built in function
340 template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE,
341           size_t TSIZE = 0>
342 struct test
343 {
344     static int run(cl_device_id device, cl_context context,
345                    cl_command_queue queue, int num_elements, const char *kname,
346                    const char *src, int dynscl, bool useCoreSubgroups)
347     {
348         size_t tmp;
349         int error;
350         int subgroup_size, num_subgroups;
351         size_t realSize;
352         size_t global;
353         size_t local;
354         clProgramWrapper program;
355         clKernelWrapper kernel;
356         cl_platform_id platform;
357         cl_int sgmap[2 * GSIZE];
358         Ty mapin[LSIZE];
359         Ty mapout[LSIZE];
360 
361         // Make sure a test of type Ty is supported by the device
362         if (!TypeCheck<Ty>::val(device)) return 0;
363 
364         error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
365                                 (void *)&platform, NULL);
366         test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
367         std::stringstream kernel_sstr;
368         if (useCoreSubgroups)
369         {
370             kernel_sstr
371                 << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
372         }
373         kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
374                        "M[I].y = get_sub_group_id();\n";
375         kernel_sstr << TypeDef<Ty>::val();
376         kernel_sstr << src;
377         const std::string &kernel_str = kernel_sstr.str();
378         const char *kernel_src = kernel_str.c_str();
379 
380         error = create_single_kernel_helper_with_build_options(
381             context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0");
382         if (error != 0) return error;
383 
384         // Determine some local dimensions to use for the test.
385         global = GSIZE;
386         error = get_max_common_work_group_size(context, kernel, GSIZE, &local);
387         test_error(error, "get_max_common_work_group_size failed");
388 
389         // Limit it a bit so we have muliple work groups
390         // Ideally this will still be large enough to give us multiple subgroups
391         if (local > LSIZE) local = LSIZE;
392 
393         // Get the sub group info
394         subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
395         clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
396             subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
397         if (clGetKernelSubGroupInfo_ptr == NULL)
398         {
399             log_error("ERROR: %s function not available",
400                       subgroupsApiSet.clGetKernelSubGroupInfo_name);
401             return TEST_FAIL;
402         }
403         error = clGetKernelSubGroupInfo_ptr(
404             kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
405             sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
406         if (error != CL_SUCCESS)
407         {
408             log_error("ERROR: %s function error for "
409                       "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
410                       subgroupsApiSet.clGetKernelSubGroupInfo_name);
411             return TEST_FAIL;
412         }
413 
414         subgroup_size = (int)tmp;
415 
416         error = clGetKernelSubGroupInfo_ptr(
417             kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
418             sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
419         if (error != CL_SUCCESS)
420         {
421             log_error("ERROR: %s function error for "
422                       "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
423                       subgroupsApiSet.clGetKernelSubGroupInfo_name);
424             return TEST_FAIL;
425         }
426 
427         num_subgroups = (int)tmp;
428         // Make sure the number of sub groups is what we expect
429         if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
430         {
431             log_error("ERROR: unexpected number of subgroups (%d) returned\n",
432                       num_subgroups);
433             return TEST_FAIL;
434         }
435 
436         std::vector<Ty> idata;
437         std::vector<Ty> odata;
438         size_t input_array_size = GSIZE;
439         size_t output_array_size = GSIZE;
440 
441         if (dynscl != 0)
442         {
443             input_array_size =
444                 (int)global / (int)local * num_subgroups * dynscl;
445             output_array_size = (int)global / (int)local * dynscl;
446         }
447 
448         idata.resize(input_array_size);
449         odata.resize(output_array_size);
450 
451         // Run the kernel once on zeroes to get the map
452         memset(&idata[0], 0, input_array_size * sizeof(Ty));
453         error = run_kernel(context, queue, kernel, global, local, &idata[0],
454                            input_array_size * sizeof(Ty), sgmap,
455                            global * sizeof(cl_int) * 2, &odata[0],
456                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
457         if (error) return error;
458 
459         // Generate the desired input for the kernel
460         Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local,
461                  (int)global / (int)local);
462 
463         error = run_kernel(context, queue, kernel, global, local, &idata[0],
464                            input_array_size * sizeof(Ty), sgmap,
465                            global * sizeof(cl_int) * 2, &odata[0],
466                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
467         if (error) return error;
468 
469 
470         // Check the result
471         return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap,
472                         subgroup_size, (int)local, (int)global / (int)local);
473     }
474 };
475 
476 #endif
477