1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef SUBHELPERS_H
17 #define SUBHELPERS_H
18
19 #include "testHarness.h"
20 #include "kernelHelpers.h"
21 #include "typeWrappers.h"
22 #include "imageHelpers.h"
23
24 #include <limits>
25 #include <vector>
26 #include <type_traits>
27
28 #define NR_OF_ACTIVE_WORK_ITEMS 4
29
30 extern MTdata gMTdata;
31
32 struct WorkGroupParams
33 {
34 WorkGroupParams(size_t gws, size_t lws,
35 const std::vector<std::string> &req_ext = {},
36 const std::vector<uint32_t> &all_wim = {})
global_workgroup_sizeWorkGroupParams37 : global_workgroup_size(gws), local_workgroup_size(lws),
38 required_extensions(req_ext), all_work_item_masks(all_wim)
39 {
40 subgroup_size = 0;
41 work_items_mask = 0;
42 use_core_subgroups = true;
43 dynsc = 0;
44 }
45 size_t global_workgroup_size;
46 size_t local_workgroup_size;
47 size_t subgroup_size;
48 uint32_t work_items_mask;
49 int dynsc;
50 bool use_core_subgroups;
51 std::vector<std::string> required_extensions;
52 std::vector<uint32_t> all_work_item_masks;
53 };
54
55 enum class SubgroupsBroadcastOp
56 {
57 broadcast,
58 broadcast_first,
59 non_uniform_broadcast
60 };
61
62 enum class NonUniformVoteOp
63 {
64 elect,
65 all,
66 any,
67 all_equal
68 };
69
70 enum class BallotOp
71 {
72 ballot,
73 inverse_ballot,
74 ballot_bit_extract,
75 ballot_bit_count,
76 ballot_inclusive_scan,
77 ballot_exclusive_scan,
78 ballot_find_lsb,
79 ballot_find_msb,
80 eq_mask,
81 ge_mask,
82 gt_mask,
83 le_mask,
84 lt_mask,
85 };
86
87 enum class ShuffleOp
88 {
89 shuffle,
90 shuffle_up,
91 shuffle_down,
92 shuffle_xor
93 };
94
95 enum class ArithmeticOp
96 {
97 add_,
98 max_,
99 min_,
100 mul_,
101 and_,
102 or_,
103 xor_,
104 logical_and,
105 logical_or,
106 logical_xor
107 };
108
operation_names(ArithmeticOp operation)109 static const char *const operation_names(ArithmeticOp operation)
110 {
111 switch (operation)
112 {
113 case ArithmeticOp::add_: return "add";
114 case ArithmeticOp::max_: return "max";
115 case ArithmeticOp::min_: return "min";
116 case ArithmeticOp::mul_: return "mul";
117 case ArithmeticOp::and_: return "and";
118 case ArithmeticOp::or_: return "or";
119 case ArithmeticOp::xor_: return "xor";
120 case ArithmeticOp::logical_and: return "logical_and";
121 case ArithmeticOp::logical_or: return "logical_or";
122 case ArithmeticOp::logical_xor: return "logical_xor";
123 default: log_error("Unknown operation request"); break;
124 }
125 return "";
126 }
127
operation_names(BallotOp operation)128 static const char *const operation_names(BallotOp operation)
129 {
130 switch (operation)
131 {
132 case BallotOp::ballot: return "ballot";
133 case BallotOp::inverse_ballot: return "inverse_ballot";
134 case BallotOp::ballot_bit_extract: return "bit_extract";
135 case BallotOp::ballot_bit_count: return "bit_count";
136 case BallotOp::ballot_inclusive_scan: return "inclusive_scan";
137 case BallotOp::ballot_exclusive_scan: return "exclusive_scan";
138 case BallotOp::ballot_find_lsb: return "find_lsb";
139 case BallotOp::ballot_find_msb: return "find_msb";
140 case BallotOp::eq_mask: return "eq";
141 case BallotOp::ge_mask: return "ge";
142 case BallotOp::gt_mask: return "gt";
143 case BallotOp::le_mask: return "le";
144 case BallotOp::lt_mask: return "lt";
145 default: log_error("Unknown operation request"); break;
146 }
147 return "";
148 }
149
operation_names(ShuffleOp operation)150 static const char *const operation_names(ShuffleOp operation)
151 {
152 switch (operation)
153 {
154 case ShuffleOp::shuffle: return "shuffle";
155 case ShuffleOp::shuffle_up: return "shuffle_up";
156 case ShuffleOp::shuffle_down: return "shuffle_down";
157 case ShuffleOp::shuffle_xor: return "shuffle_xor";
158 default: log_error("Unknown operation request"); break;
159 }
160 return "";
161 }
162
operation_names(NonUniformVoteOp operation)163 static const char *const operation_names(NonUniformVoteOp operation)
164 {
165 switch (operation)
166 {
167 case NonUniformVoteOp::all: return "all";
168 case NonUniformVoteOp::all_equal: return "all_equal";
169 case NonUniformVoteOp::any: return "any";
170 case NonUniformVoteOp::elect: return "elect";
171 default: log_error("Unknown operation request"); break;
172 }
173 return "";
174 }
175
operation_names(SubgroupsBroadcastOp operation)176 static const char *const operation_names(SubgroupsBroadcastOp operation)
177 {
178 switch (operation)
179 {
180 case SubgroupsBroadcastOp::broadcast: return "broadcast";
181 case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
182 case SubgroupsBroadcastOp::non_uniform_broadcast:
183 return "non_uniform_broadcast";
184 default: log_error("Unknown operation request"); break;
185 }
186 return "";
187 }
188
189 class subgroupsAPI {
190 public:
subgroupsAPI(cl_platform_id platform,bool use_core_subgroups)191 subgroupsAPI(cl_platform_id platform, bool use_core_subgroups)
192 {
193 static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE
194 == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
195 "Enums have to be the same");
196 static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
197 == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
198 "Enums have to be the same");
199 if (use_core_subgroups)
200 {
201 _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo;
202 clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo";
203 }
204 else
205 {
206 _clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn)
207 clGetExtensionFunctionAddressForPlatform(
208 platform, "clGetKernelSubGroupInfoKHR");
209 clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR";
210 }
211 }
clGetKernelSubGroupInfo_ptr()212 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr()
213 {
214 return _clGetKernelSubGroupInfo_ptr;
215 }
216 const char *clGetKernelSubGroupInfo_name;
217
218 private:
219 clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr;
220 };
221
222 // Need to defined custom type for vector size = 3 and half type. This is
223 // because of 3-component types are otherwise indistinguishable from the
224 // 4-component types, and because the half type is indistinguishable from some
225 // other 16-bit type (ushort)
226 namespace subgroups {
227 struct cl_char3
228 {
229 ::cl_char3 data;
230 };
231 struct cl_uchar3
232 {
233 ::cl_uchar3 data;
234 };
235 struct cl_short3
236 {
237 ::cl_short3 data;
238 };
239 struct cl_ushort3
240 {
241 ::cl_ushort3 data;
242 };
243 struct cl_int3
244 {
245 ::cl_int3 data;
246 };
247 struct cl_uint3
248 {
249 ::cl_uint3 data;
250 };
251 struct cl_long3
252 {
253 ::cl_long3 data;
254 };
255 struct cl_ulong3
256 {
257 ::cl_ulong3 data;
258 };
259 struct cl_float3
260 {
261 ::cl_float3 data;
262 };
263 struct cl_double3
264 {
265 ::cl_double3 data;
266 };
267 struct cl_half
268 {
269 ::cl_half data;
270 };
271 struct cl_half2
272 {
273 ::cl_half2 data;
274 };
275 struct cl_half3
276 {
277 ::cl_half3 data;
278 };
279 struct cl_half4
280 {
281 ::cl_half4 data;
282 };
283 struct cl_half8
284 {
285 ::cl_half8 data;
286 };
287 struct cl_half16
288 {
289 ::cl_half16 data;
290 };
291 }
292
int64_ok(cl_device_id device)293 static bool int64_ok(cl_device_id device)
294 {
295 char profile[128];
296 int error;
297
298 error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
299 (void *)&profile, NULL);
300 if (error)
301 {
302 log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
303 return false;
304 }
305
306 if (strcmp(profile, "EMBEDDED_PROFILE") == 0)
307 return is_extension_available(device, "cles_khr_int64");
308
309 return true;
310 }
311
double_ok(cl_device_id device)312 static bool double_ok(cl_device_id device)
313 {
314 int error;
315 cl_device_fp_config c;
316 error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c),
317 (void *)&c, NULL);
318 if (error)
319 {
320 log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
321 return false;
322 }
323 return c != 0;
324 }
325
half_ok(cl_device_id device)326 static bool half_ok(cl_device_id device)
327 {
328 int error;
329 cl_device_fp_config c;
330 error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG, sizeof(c),
331 (void *)&c, NULL);
332 if (error)
333 {
334 log_info("clGetDeviceInfo failed with CL_DEVICE_HALF_FP_CONFIG\n");
335 return false;
336 }
337 return c != 0;
338 }
339
340 template <typename Ty> struct CommonTypeManager
341 {
342
nameCommonTypeManager343 static const char *name() { return ""; }
add_typedefCommonTypeManager344 static const char *add_typedef() { return "\n"; }
345 typedef std::false_type is_vector_type;
346 typedef std::false_type is_sb_vector_size3;
347 typedef std::false_type is_sb_vector_type;
348 typedef std::false_type is_sb_scalar_type;
type_supportedCommonTypeManager349 static const bool type_supported(cl_device_id) { return true; }
identify_limitsCommonTypeManager350 static const Ty identify_limits(ArithmeticOp operation)
351 {
352 switch (operation)
353 {
354 case ArithmeticOp::add_: return (Ty)0;
355 case ArithmeticOp::max_: return (std::numeric_limits<Ty>::min)();
356 case ArithmeticOp::min_: return (std::numeric_limits<Ty>::max)();
357 case ArithmeticOp::mul_: return (Ty)1;
358 case ArithmeticOp::and_: return (Ty)~0;
359 case ArithmeticOp::or_: return (Ty)0;
360 case ArithmeticOp::xor_: return (Ty)0;
361 default: log_error("Unknown operation request"); break;
362 }
363 return 0;
364 }
365 };
366
367 template <typename> struct TypeManager;
368
369 template <> struct TypeManager<cl_int> : public CommonTypeManager<cl_int>
370 {
371 static const char *name() { return "int"; }
372 static const char *add_typedef() { return "typedef int Type;\n"; }
373 static cl_int identify_limits(ArithmeticOp operation)
374 {
375 switch (operation)
376 {
377 case ArithmeticOp::add_: return (cl_int)0;
378 case ArithmeticOp::max_:
379 return (std::numeric_limits<cl_int>::min)();
380 case ArithmeticOp::min_:
381 return (std::numeric_limits<cl_int>::max)();
382 case ArithmeticOp::mul_: return (cl_int)1;
383 case ArithmeticOp::and_: return (cl_int)~0;
384 case ArithmeticOp::or_: return (cl_int)0;
385 case ArithmeticOp::xor_: return (cl_int)0;
386 case ArithmeticOp::logical_and: return (cl_int)1;
387 case ArithmeticOp::logical_or: return (cl_int)0;
388 case ArithmeticOp::logical_xor: return (cl_int)0;
389 default: log_error("Unknown operation request"); break;
390 }
391 return 0;
392 }
393 };
394 template <> struct TypeManager<cl_int2> : public CommonTypeManager<cl_int2>
395 {
396 static const char *name() { return "int2"; }
397 static const char *add_typedef() { return "typedef int2 Type;\n"; }
398 typedef std::true_type is_vector_type;
399 using scalar_type = cl_int;
400 };
401 template <>
402 struct TypeManager<subgroups::cl_int3>
403 : public CommonTypeManager<subgroups::cl_int3>
404 {
405 static const char *name() { return "int3"; }
406 static const char *add_typedef() { return "typedef int3 Type;\n"; }
407 typedef std::true_type is_sb_vector_size3;
408 using scalar_type = cl_int;
409 };
410 template <> struct TypeManager<cl_int4> : public CommonTypeManager<cl_int4>
411 {
412 static const char *name() { return "int4"; }
413 static const char *add_typedef() { return "typedef int4 Type;\n"; }
414 using scalar_type = cl_int;
415 typedef std::true_type is_vector_type;
416 };
417 template <> struct TypeManager<cl_int8> : public CommonTypeManager<cl_int8>
418 {
419 static const char *name() { return "int8"; }
420 static const char *add_typedef() { return "typedef int8 Type;\n"; }
421 using scalar_type = cl_int;
422 typedef std::true_type is_vector_type;
423 };
424 template <> struct TypeManager<cl_int16> : public CommonTypeManager<cl_int16>
425 {
426 static const char *name() { return "int16"; }
427 static const char *add_typedef() { return "typedef int16 Type;\n"; }
428 using scalar_type = cl_int;
429 typedef std::true_type is_vector_type;
430 };
431 // cl_uint
432 template <> struct TypeManager<cl_uint> : public CommonTypeManager<cl_uint>
433 {
434 static const char *name() { return "uint"; }
435 static const char *add_typedef() { return "typedef uint Type;\n"; }
436 };
437 template <> struct TypeManager<cl_uint2> : public CommonTypeManager<cl_uint2>
438 {
439 static const char *name() { return "uint2"; }
440 static const char *add_typedef() { return "typedef uint2 Type;\n"; }
441 using scalar_type = cl_uint;
442 typedef std::true_type is_vector_type;
443 };
444 template <>
445 struct TypeManager<subgroups::cl_uint3>
446 : public CommonTypeManager<subgroups::cl_uint3>
447 {
448 static const char *name() { return "uint3"; }
449 static const char *add_typedef() { return "typedef uint3 Type;\n"; }
450 typedef std::true_type is_sb_vector_size3;
451 using scalar_type = cl_uint;
452 };
453 template <> struct TypeManager<cl_uint4> : public CommonTypeManager<cl_uint4>
454 {
455 static const char *name() { return "uint4"; }
456 static const char *add_typedef() { return "typedef uint4 Type;\n"; }
457 using scalar_type = cl_uint;
458 typedef std::true_type is_vector_type;
459 };
460 template <> struct TypeManager<cl_uint8> : public CommonTypeManager<cl_uint8>
461 {
462 static const char *name() { return "uint8"; }
463 static const char *add_typedef() { return "typedef uint8 Type;\n"; }
464 using scalar_type = cl_uint;
465 typedef std::true_type is_vector_type;
466 };
467 template <> struct TypeManager<cl_uint16> : public CommonTypeManager<cl_uint16>
468 {
469 static const char *name() { return "uint16"; }
470 static const char *add_typedef() { return "typedef uint16 Type;\n"; }
471 using scalar_type = cl_uint;
472 typedef std::true_type is_vector_type;
473 };
474 // cl_short
475 template <> struct TypeManager<cl_short> : public CommonTypeManager<cl_short>
476 {
477 static const char *name() { return "short"; }
478 static const char *add_typedef() { return "typedef short Type;\n"; }
479 };
480 template <> struct TypeManager<cl_short2> : public CommonTypeManager<cl_short2>
481 {
482 static const char *name() { return "short2"; }
483 static const char *add_typedef() { return "typedef short2 Type;\n"; }
484 using scalar_type = cl_short;
485 typedef std::true_type is_vector_type;
486 };
487 template <>
488 struct TypeManager<subgroups::cl_short3>
489 : public CommonTypeManager<subgroups::cl_short3>
490 {
491 static const char *name() { return "short3"; }
492 static const char *add_typedef() { return "typedef short3 Type;\n"; }
493 typedef std::true_type is_sb_vector_size3;
494 using scalar_type = cl_short;
495 };
496 template <> struct TypeManager<cl_short4> : public CommonTypeManager<cl_short4>
497 {
498 static const char *name() { return "short4"; }
499 static const char *add_typedef() { return "typedef short4 Type;\n"; }
500 using scalar_type = cl_short;
501 typedef std::true_type is_vector_type;
502 };
503 template <> struct TypeManager<cl_short8> : public CommonTypeManager<cl_short8>
504 {
505 static const char *name() { return "short8"; }
506 static const char *add_typedef() { return "typedef short8 Type;\n"; }
507 using scalar_type = cl_short;
508 typedef std::true_type is_vector_type;
509 };
510 template <>
511 struct TypeManager<cl_short16> : public CommonTypeManager<cl_short16>
512 {
513 static const char *name() { return "short16"; }
514 static const char *add_typedef() { return "typedef short16 Type;\n"; }
515 using scalar_type = cl_short;
516 typedef std::true_type is_vector_type;
517 };
518 // cl_ushort
519 template <> struct TypeManager<cl_ushort> : public CommonTypeManager<cl_ushort>
520 {
521 static const char *name() { return "ushort"; }
522 static const char *add_typedef() { return "typedef ushort Type;\n"; }
523 };
524 template <>
525 struct TypeManager<cl_ushort2> : public CommonTypeManager<cl_ushort2>
526 {
527 static const char *name() { return "ushort2"; }
528 static const char *add_typedef() { return "typedef ushort2 Type;\n"; }
529 using scalar_type = cl_ushort;
530 typedef std::true_type is_vector_type;
531 };
532 template <>
533 struct TypeManager<subgroups::cl_ushort3>
534 : public CommonTypeManager<subgroups::cl_ushort3>
535 {
536 static const char *name() { return "ushort3"; }
537 static const char *add_typedef() { return "typedef ushort3 Type;\n"; }
538 typedef std::true_type is_sb_vector_size3;
539 using scalar_type = cl_ushort;
540 };
541 template <>
542 struct TypeManager<cl_ushort4> : public CommonTypeManager<cl_ushort4>
543 {
544 static const char *name() { return "ushort4"; }
545 static const char *add_typedef() { return "typedef ushort4 Type;\n"; }
546 using scalar_type = cl_ushort;
547 typedef std::true_type is_vector_type;
548 };
549 template <>
550 struct TypeManager<cl_ushort8> : public CommonTypeManager<cl_ushort8>
551 {
552 static const char *name() { return "ushort8"; }
553 static const char *add_typedef() { return "typedef ushort8 Type;\n"; }
554 using scalar_type = cl_ushort;
555 typedef std::true_type is_vector_type;
556 };
557 template <>
558 struct TypeManager<cl_ushort16> : public CommonTypeManager<cl_ushort16>
559 {
560 static const char *name() { return "ushort16"; }
561 static const char *add_typedef() { return "typedef ushort16 Type;\n"; }
562 using scalar_type = cl_ushort;
563 typedef std::true_type is_vector_type;
564 };
565 // cl_char
566 template <> struct TypeManager<cl_char> : public CommonTypeManager<cl_char>
567 {
568 static const char *name() { return "char"; }
569 static const char *add_typedef() { return "typedef char Type;\n"; }
570 };
571 template <> struct TypeManager<cl_char2> : public CommonTypeManager<cl_char2>
572 {
573 static const char *name() { return "char2"; }
574 static const char *add_typedef() { return "typedef char2 Type;\n"; }
575 using scalar_type = cl_char;
576 typedef std::true_type is_vector_type;
577 };
578 template <>
579 struct TypeManager<subgroups::cl_char3>
580 : public CommonTypeManager<subgroups::cl_char3>
581 {
582 static const char *name() { return "char3"; }
583 static const char *add_typedef() { return "typedef char3 Type;\n"; }
584 typedef std::true_type is_sb_vector_size3;
585 using scalar_type = cl_char;
586 };
587 template <> struct TypeManager<cl_char4> : public CommonTypeManager<cl_char4>
588 {
589 static const char *name() { return "char4"; }
590 static const char *add_typedef() { return "typedef char4 Type;\n"; }
591 using scalar_type = cl_char;
592 typedef std::true_type is_vector_type;
593 };
594 template <> struct TypeManager<cl_char8> : public CommonTypeManager<cl_char8>
595 {
596 static const char *name() { return "char8"; }
597 static const char *add_typedef() { return "typedef char8 Type;\n"; }
598 using scalar_type = cl_char;
599 typedef std::true_type is_vector_type;
600 };
601 template <> struct TypeManager<cl_char16> : public CommonTypeManager<cl_char16>
602 {
603 static const char *name() { return "char16"; }
604 static const char *add_typedef() { return "typedef char16 Type;\n"; }
605 using scalar_type = cl_char;
606 typedef std::true_type is_vector_type;
607 };
608 // cl_uchar
609 template <> struct TypeManager<cl_uchar> : public CommonTypeManager<cl_uchar>
610 {
611 static const char *name() { return "uchar"; }
612 static const char *add_typedef() { return "typedef uchar Type;\n"; }
613 };
614 template <> struct TypeManager<cl_uchar2> : public CommonTypeManager<cl_uchar2>
615 {
616 static const char *name() { return "uchar2"; }
617 static const char *add_typedef() { return "typedef uchar2 Type;\n"; }
618 using scalar_type = cl_uchar;
619 typedef std::true_type is_vector_type;
620 };
621 template <>
622 struct TypeManager<subgroups::cl_uchar3>
623 : public CommonTypeManager<subgroups::cl_char3>
624 {
625 static const char *name() { return "uchar3"; }
626 static const char *add_typedef() { return "typedef uchar3 Type;\n"; }
627 typedef std::true_type is_sb_vector_size3;
628 using scalar_type = cl_uchar;
629 };
630 template <> struct TypeManager<cl_uchar4> : public CommonTypeManager<cl_uchar4>
631 {
632 static const char *name() { return "uchar4"; }
633 static const char *add_typedef() { return "typedef uchar4 Type;\n"; }
634 using scalar_type = cl_uchar;
635 typedef std::true_type is_vector_type;
636 };
637 template <> struct TypeManager<cl_uchar8> : public CommonTypeManager<cl_uchar8>
638 {
639 static const char *name() { return "uchar8"; }
640 static const char *add_typedef() { return "typedef uchar8 Type;\n"; }
641 using scalar_type = cl_uchar;
642 typedef std::true_type is_vector_type;
643 };
644 template <>
645 struct TypeManager<cl_uchar16> : public CommonTypeManager<cl_uchar16>
646 {
647 static const char *name() { return "uchar16"; }
648 static const char *add_typedef() { return "typedef uchar16 Type;\n"; }
649 using scalar_type = cl_uchar;
650 typedef std::true_type is_vector_type;
651 };
652 // cl_long
653 template <> struct TypeManager<cl_long> : public CommonTypeManager<cl_long>
654 {
655 static const char *name() { return "long"; }
656 static const char *add_typedef() { return "typedef long Type;\n"; }
657 static const bool type_supported(cl_device_id device)
658 {
659 return int64_ok(device);
660 }
661 };
662 template <> struct TypeManager<cl_long2> : public CommonTypeManager<cl_long2>
663 {
664 static const char *name() { return "long2"; }
665 static const char *add_typedef() { return "typedef long2 Type;\n"; }
666 using scalar_type = cl_long;
667 typedef std::true_type is_vector_type;
668 static const bool type_supported(cl_device_id device)
669 {
670 return int64_ok(device);
671 }
672 };
673 template <>
674 struct TypeManager<subgroups::cl_long3>
675 : public CommonTypeManager<subgroups::cl_long3>
676 {
677 static const char *name() { return "long3"; }
678 static const char *add_typedef() { return "typedef long3 Type;\n"; }
679 typedef std::true_type is_sb_vector_size3;
680 using scalar_type = cl_long;
681 static const bool type_supported(cl_device_id device)
682 {
683 return int64_ok(device);
684 }
685 };
686 template <> struct TypeManager<cl_long4> : public CommonTypeManager<cl_long4>
687 {
688 static const char *name() { return "long4"; }
689 static const char *add_typedef() { return "typedef long4 Type;\n"; }
690 using scalar_type = cl_long;
691 typedef std::true_type is_vector_type;
692 static const bool type_supported(cl_device_id device)
693 {
694 return int64_ok(device);
695 }
696 };
697 template <> struct TypeManager<cl_long8> : public CommonTypeManager<cl_long8>
698 {
699 static const char *name() { return "long8"; }
700 static const char *add_typedef() { return "typedef long8 Type;\n"; }
701 using scalar_type = cl_long;
702 typedef std::true_type is_vector_type;
703 static const bool type_supported(cl_device_id device)
704 {
705 return int64_ok(device);
706 }
707 };
708 template <> struct TypeManager<cl_long16> : public CommonTypeManager<cl_long16>
709 {
710 static const char *name() { return "long16"; }
711 static const char *add_typedef() { return "typedef long16 Type;\n"; }
712 using scalar_type = cl_long;
713 typedef std::true_type is_vector_type;
714 static const bool type_supported(cl_device_id device)
715 {
716 return int64_ok(device);
717 }
718 };
719 // cl_ulong
720 template <> struct TypeManager<cl_ulong> : public CommonTypeManager<cl_ulong>
721 {
722 static const char *name() { return "ulong"; }
723 static const char *add_typedef() { return "typedef ulong Type;\n"; }
724 static const bool type_supported(cl_device_id device)
725 {
726 return int64_ok(device);
727 }
728 };
729 template <> struct TypeManager<cl_ulong2> : public CommonTypeManager<cl_ulong2>
730 {
731 static const char *name() { return "ulong2"; }
732 static const char *add_typedef() { return "typedef ulong2 Type;\n"; }
733 using scalar_type = cl_ulong;
734 typedef std::true_type is_vector_type;
735 static const bool type_supported(cl_device_id device)
736 {
737 return int64_ok(device);
738 }
739 };
740 template <>
741 struct TypeManager<subgroups::cl_ulong3>
742 : public CommonTypeManager<subgroups::cl_ulong3>
743 {
744 static const char *name() { return "ulong3"; }
745 static const char *add_typedef() { return "typedef ulong3 Type;\n"; }
746 typedef std::true_type is_sb_vector_size3;
747 using scalar_type = cl_ulong;
748 static const bool type_supported(cl_device_id device)
749 {
750 return int64_ok(device);
751 }
752 };
753 template <> struct TypeManager<cl_ulong4> : public CommonTypeManager<cl_ulong4>
754 {
755 static const char *name() { return "ulong4"; }
756 static const char *add_typedef() { return "typedef ulong4 Type;\n"; }
757 using scalar_type = cl_ulong;
758 typedef std::true_type is_vector_type;
759 static const bool type_supported(cl_device_id device)
760 {
761 return int64_ok(device);
762 }
763 };
764 template <> struct TypeManager<cl_ulong8> : public CommonTypeManager<cl_ulong8>
765 {
766 static const char *name() { return "ulong8"; }
767 static const char *add_typedef() { return "typedef ulong8 Type;\n"; }
768 using scalar_type = cl_ulong;
769 typedef std::true_type is_vector_type;
770 static const bool type_supported(cl_device_id device)
771 {
772 return int64_ok(device);
773 }
774 };
775 template <>
776 struct TypeManager<cl_ulong16> : public CommonTypeManager<cl_ulong16>
777 {
778 static const char *name() { return "ulong16"; }
779 static const char *add_typedef() { return "typedef ulong16 Type;\n"; }
780 using scalar_type = cl_ulong;
781 typedef std::true_type is_vector_type;
782 static const bool type_supported(cl_device_id device)
783 {
784 return int64_ok(device);
785 }
786 };
787
788 // cl_float
789 template <> struct TypeManager<cl_float> : public CommonTypeManager<cl_float>
790 {
791 static const char *name() { return "float"; }
792 static const char *add_typedef() { return "typedef float Type;\n"; }
793 static cl_float identify_limits(ArithmeticOp operation)
794 {
795 switch (operation)
796 {
797 case ArithmeticOp::add_: return 0.0f;
798 case ArithmeticOp::max_:
799 return -std::numeric_limits<float>::infinity();
800 case ArithmeticOp::min_:
801 return std::numeric_limits<float>::infinity();
802 case ArithmeticOp::mul_: return (cl_float)1;
803 default: log_error("Unknown operation request"); break;
804 }
805 return 0;
806 }
807 };
808 template <> struct TypeManager<cl_float2> : public CommonTypeManager<cl_float2>
809 {
810 static const char *name() { return "float2"; }
811 static const char *add_typedef() { return "typedef float2 Type;\n"; }
812 using scalar_type = cl_float;
813 typedef std::true_type is_vector_type;
814 };
815 template <>
816 struct TypeManager<subgroups::cl_float3>
817 : public CommonTypeManager<subgroups::cl_float3>
818 {
819 static const char *name() { return "float3"; }
820 static const char *add_typedef() { return "typedef float3 Type;\n"; }
821 typedef std::true_type is_sb_vector_size3;
822 using scalar_type = cl_float;
823 };
824 template <> struct TypeManager<cl_float4> : public CommonTypeManager<cl_float4>
825 {
826 static const char *name() { return "float4"; }
827 static const char *add_typedef() { return "typedef float4 Type;\n"; }
828 using scalar_type = cl_float;
829 typedef std::true_type is_vector_type;
830 };
831 template <> struct TypeManager<cl_float8> : public CommonTypeManager<cl_float8>
832 {
833 static const char *name() { return "float8"; }
834 static const char *add_typedef() { return "typedef float8 Type;\n"; }
835 using scalar_type = cl_float;
836 typedef std::true_type is_vector_type;
837 };
838 template <>
839 struct TypeManager<cl_float16> : public CommonTypeManager<cl_float16>
840 {
841 static const char *name() { return "float16"; }
842 static const char *add_typedef() { return "typedef float16 Type;\n"; }
843 using scalar_type = cl_float;
844 typedef std::true_type is_vector_type;
845 };
846
847 // cl_double
848 template <> struct TypeManager<cl_double> : public CommonTypeManager<cl_double>
849 {
850 static const char *name() { return "double"; }
851 static const char *add_typedef() { return "typedef double Type;\n"; }
852 static cl_double identify_limits(ArithmeticOp operation)
853 {
854 switch (operation)
855 {
856 case ArithmeticOp::add_: return 0.0;
857 case ArithmeticOp::max_:
858 return -std::numeric_limits<double>::infinity();
859 case ArithmeticOp::min_:
860 return std::numeric_limits<double>::infinity();
861 case ArithmeticOp::mul_: return (cl_double)1;
862 default: log_error("Unknown operation request"); break;
863 }
864 return 0;
865 }
866 static const bool type_supported(cl_device_id device)
867 {
868 return double_ok(device);
869 }
870 };
871 template <>
872 struct TypeManager<cl_double2> : public CommonTypeManager<cl_double2>
873 {
874 static const char *name() { return "double2"; }
875 static const char *add_typedef() { return "typedef double2 Type;\n"; }
876 using scalar_type = cl_double;
877 typedef std::true_type is_vector_type;
878 static const bool type_supported(cl_device_id device)
879 {
880 return double_ok(device);
881 }
882 };
883 template <>
884 struct TypeManager<subgroups::cl_double3>
885 : public CommonTypeManager<subgroups::cl_double3>
886 {
887 static const char *name() { return "double3"; }
888 static const char *add_typedef() { return "typedef double3 Type;\n"; }
889 typedef std::true_type is_sb_vector_size3;
890 using scalar_type = cl_double;
891 static const bool type_supported(cl_device_id device)
892 {
893 return double_ok(device);
894 }
895 };
896 template <>
897 struct TypeManager<cl_double4> : public CommonTypeManager<cl_double4>
898 {
899 static const char *name() { return "double4"; }
900 static const char *add_typedef() { return "typedef double4 Type;\n"; }
901 using scalar_type = cl_double;
902 typedef std::true_type is_vector_type;
903 static const bool type_supported(cl_device_id device)
904 {
905 return double_ok(device);
906 }
907 };
908 template <>
909 struct TypeManager<cl_double8> : public CommonTypeManager<cl_double8>
910 {
911 static const char *name() { return "double8"; }
912 static const char *add_typedef() { return "typedef double8 Type;\n"; }
913 using scalar_type = cl_double;
914 typedef std::true_type is_vector_type;
915 static const bool type_supported(cl_device_id device)
916 {
917 return double_ok(device);
918 }
919 };
920 template <>
921 struct TypeManager<cl_double16> : public CommonTypeManager<cl_double16>
922 {
923 static const char *name() { return "double16"; }
924 static const char *add_typedef() { return "typedef double16 Type;\n"; }
925 using scalar_type = cl_double;
926 typedef std::true_type is_vector_type;
927 static const bool type_supported(cl_device_id device)
928 {
929 return double_ok(device);
930 }
931 };
932
933 // cl_half
934 template <>
935 struct TypeManager<subgroups::cl_half>
936 : public CommonTypeManager<subgroups::cl_half>
937 {
938 static const char *name() { return "half"; }
939 static const char *add_typedef() { return "typedef half Type;\n"; }
940 typedef std::true_type is_sb_scalar_type;
941 static subgroups::cl_half identify_limits(ArithmeticOp operation)
942 {
943 switch (operation)
944 {
945 case ArithmeticOp::add_: return { 0x0000 };
946 case ArithmeticOp::max_: return { 0xfc00 };
947 case ArithmeticOp::min_: return { 0x7c00 };
948 case ArithmeticOp::mul_: return { 0x3c00 };
949 default: log_error("Unknown operation request"); break;
950 }
951 return { 0 };
952 }
953 static const bool type_supported(cl_device_id device)
954 {
955 return half_ok(device);
956 }
957 };
958 template <>
959 struct TypeManager<subgroups::cl_half2>
960 : public CommonTypeManager<subgroups::cl_half2>
961 {
962 static const char *name() { return "half2"; }
963 static const char *add_typedef() { return "typedef half2 Type;\n"; }
964 using scalar_type = subgroups::cl_half;
965 typedef std::true_type is_sb_vector_type;
966 static const bool type_supported(cl_device_id device)
967 {
968 return half_ok(device);
969 }
970 };
971 template <>
972 struct TypeManager<subgroups::cl_half3>
973 : public CommonTypeManager<subgroups::cl_half3>
974 {
975 static const char *name() { return "half3"; }
976 static const char *add_typedef() { return "typedef half3 Type;\n"; }
977 typedef std::true_type is_sb_vector_size3;
978 using scalar_type = subgroups::cl_half;
979
980 static const bool type_supported(cl_device_id device)
981 {
982 return half_ok(device);
983 }
984 };
985 template <>
986 struct TypeManager<subgroups::cl_half4>
987 : public CommonTypeManager<subgroups::cl_half4>
988 {
989 static const char *name() { return "half4"; }
990 static const char *add_typedef() { return "typedef half4 Type;\n"; }
991 using scalar_type = subgroups::cl_half;
992 typedef std::true_type is_sb_vector_type;
993 static const bool type_supported(cl_device_id device)
994 {
995 return half_ok(device);
996 }
997 };
998 template <>
999 struct TypeManager<subgroups::cl_half8>
1000 : public CommonTypeManager<subgroups::cl_half8>
1001 {
1002 static const char *name() { return "half8"; }
1003 static const char *add_typedef() { return "typedef half8 Type;\n"; }
1004 using scalar_type = subgroups::cl_half;
1005 typedef std::true_type is_sb_vector_type;
1006
1007 static const bool type_supported(cl_device_id device)
1008 {
1009 return half_ok(device);
1010 }
1011 };
1012 template <>
1013 struct TypeManager<subgroups::cl_half16>
1014 : public CommonTypeManager<subgroups::cl_half16>
1015 {
1016 static const char *name() { return "half16"; }
1017 static const char *add_typedef() { return "typedef half16 Type;\n"; }
1018 using scalar_type = subgroups::cl_half;
1019 typedef std::true_type is_sb_vector_type;
1020 static const bool type_supported(cl_device_id device)
1021 {
1022 return half_ok(device);
1023 }
1024 };
1025
1026 // set scalar value to vector of halfs
1027 template <typename Ty, int N = 0>
1028 typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value>::type
1029 set_value(Ty &lhs, const cl_ulong &rhs)
1030 {
1031 const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1032 for (auto i = 0; i < size; ++i)
1033 {
1034 lhs.data.s[i] = rhs;
1035 }
1036 }
1037
1038
1039 // set scalar value to vector
1040 template <typename Ty>
1041 typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
1042 set_value(Ty &lhs, const cl_ulong &rhs)
1043 {
1044 const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1045 for (auto i = 0; i < size; ++i)
1046 {
1047 lhs.s[i] = rhs;
1048 }
1049 }
1050
1051 // set vector to vector value
1052 template <typename Ty>
1053 typename std::enable_if<TypeManager<Ty>::is_vector_type::value>::type
1054 set_value(Ty &lhs, const Ty &rhs)
1055 {
1056 lhs = rhs;
1057 }
1058
1059 // set scalar value to vector size 3
1060 template <typename Ty, int N = 0>
1061 typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value>::type
1062 set_value(Ty &lhs, const cl_ulong &rhs)
1063 {
1064 for (auto i = 0; i < 3; ++i)
1065 {
1066 lhs.data.s[i] = rhs;
1067 }
1068 }
1069
1070 // set scalar value to scalar
1071 template <typename Ty>
1072 typename std::enable_if<std::is_scalar<Ty>::value>::type
1073 set_value(Ty &lhs, const cl_ulong &rhs)
1074 {
1075 lhs = static_cast<Ty>(rhs);
1076 }
1077
1078 // set scalar value to half scalar
1079 template <typename Ty>
1080 typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value>::type
1081 set_value(Ty &lhs, const cl_ulong &rhs)
1082 {
1083 lhs.data = rhs;
1084 }
1085
1086 // compare for common vectors
1087 template <typename Ty>
1088 typename std::enable_if<TypeManager<Ty>::is_vector_type::value, bool>::type
1089 compare(const Ty &lhs, const Ty &rhs)
1090 {
1091 const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1092 for (auto i = 0; i < size; ++i)
1093 {
1094 if (lhs.s[i] != rhs.s[i])
1095 {
1096 return false;
1097 }
1098 }
1099 return true;
1100 }
1101
1102 // compare for vectors 3
1103 template <typename Ty>
1104 typename std::enable_if<TypeManager<Ty>::is_sb_vector_size3::value, bool>::type
1105 compare(const Ty &lhs, const Ty &rhs)
1106 {
1107 for (auto i = 0; i < 3; ++i)
1108 {
1109 if (lhs.data.s[i] != rhs.data.s[i])
1110 {
1111 return false;
1112 }
1113 }
1114 return true;
1115 }
1116
1117 // compare for half vectors
1118 template <typename Ty>
1119 typename std::enable_if<TypeManager<Ty>::is_sb_vector_type::value, bool>::type
1120 compare(const Ty &lhs, const Ty &rhs)
1121 {
1122 const int size = sizeof(Ty) / sizeof(typename TypeManager<Ty>::scalar_type);
1123 for (auto i = 0; i < size; ++i)
1124 {
1125 if (lhs.data.s[i] != rhs.data.s[i])
1126 {
1127 return false;
1128 }
1129 }
1130 return true;
1131 }
1132
1133 // compare for scalars
1134 template <typename Ty>
1135 typename std::enable_if<std::is_scalar<Ty>::value, bool>::type
1136 compare(const Ty &lhs, const Ty &rhs)
1137 {
1138 return lhs == rhs;
1139 }
1140
1141 // compare for scalar halfs
1142 template <typename Ty>
1143 typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value, bool>::type
1144 compare(const Ty &lhs, const Ty &rhs)
1145 {
1146 return lhs.data == rhs.data;
1147 }
1148
1149 template <typename Ty> inline bool compare_ordered(const Ty &lhs, const Ty &rhs)
1150 {
1151 return lhs == rhs;
1152 }
1153
1154 template <>
1155 inline bool compare_ordered(const subgroups::cl_half &lhs,
1156 const subgroups::cl_half &rhs)
1157 {
1158 return cl_half_to_float(lhs.data) == cl_half_to_float(rhs.data);
1159 }
1160
1161 template <typename Ty>
1162 inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs)
1163 {
1164 return cl_half_to_float(lhs.data) == rhs;
1165 }
1166
1167 // Run a test kernel to compute the result of a built-in on an input
1168 static int run_kernel(cl_context context, cl_command_queue queue,
1169 cl_kernel kernel, size_t global, size_t local,
1170 void *idata, size_t isize, void *mdata, size_t msize,
1171 void *odata, size_t osize, size_t tsize = 0)
1172 {
1173 clMemWrapper in;
1174 clMemWrapper xy;
1175 clMemWrapper out;
1176 clMemWrapper tmp;
1177 int error;
1178
1179 in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
1180 test_error(error, "clCreateBuffer failed");
1181
1182 xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
1183 test_error(error, "clCreateBuffer failed");
1184
1185 out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
1186 test_error(error, "clCreateBuffer failed");
1187
1188 if (tsize)
1189 {
1190 tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
1191 tsize, NULL, &error);
1192 test_error(error, "clCreateBuffer failed");
1193 }
1194
1195 error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
1196 test_error(error, "clSetKernelArg failed");
1197
1198 error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
1199 test_error(error, "clSetKernelArg failed");
1200
1201 error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
1202 test_error(error, "clSetKernelArg failed");
1203
1204 if (tsize)
1205 {
1206 error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
1207 test_error(error, "clSetKernelArg failed");
1208 }
1209
1210 error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL,
1211 NULL);
1212 test_error(error, "clEnqueueWriteBuffer failed");
1213
1214 error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
1215 NULL);
1216 test_error(error, "clEnqueueWriteBuffer failed");
1217 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
1218 NULL, NULL);
1219 test_error(error, "clEnqueueNDRangeKernel failed");
1220
1221 error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
1222 NULL);
1223 test_error(error, "clEnqueueReadBuffer failed");
1224
1225 error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL,
1226 NULL);
1227 test_error(error, "clEnqueueReadBuffer failed");
1228
1229 error = clFinish(queue);
1230 test_error(error, "clFinish failed");
1231
1232 return error;
1233 }
1234
1235 // Driver for testing a single built in function
1236 template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
1237 {
1238 static int mrun(cl_device_id device, cl_context context,
1239 cl_command_queue queue, int num_elements, const char *kname,
1240 const char *src, WorkGroupParams test_params)
1241 {
1242 int error = TEST_PASS;
1243 for (auto &mask : test_params.all_work_item_masks)
1244 {
1245 test_params.work_items_mask = mask;
1246 error |= run(device, context, queue, num_elements, kname, src,
1247 test_params);
1248 }
1249 return error;
1250 };
1251 static int run(cl_device_id device, cl_context context,
1252 cl_command_queue queue, int num_elements, const char *kname,
1253 const char *src, WorkGroupParams test_params)
1254 {
1255 size_t tmp;
1256 int error;
1257 int subgroup_size, num_subgroups;
1258 size_t realSize;
1259 size_t global = test_params.global_workgroup_size;
1260 size_t local = test_params.local_workgroup_size;
1261 clProgramWrapper program;
1262 clKernelWrapper kernel;
1263 cl_platform_id platform;
1264 std::vector<cl_int> sgmap;
1265 sgmap.resize(4 * global);
1266 std::vector<Ty> mapin;
1267 mapin.resize(local);
1268 std::vector<Ty> mapout;
1269 mapout.resize(local);
1270 std::stringstream kernel_sstr;
1271 if (test_params.work_items_mask != 0)
1272 {
1273 kernel_sstr << "#define WORK_ITEMS_MASK ";
1274 kernel_sstr << "0x" << std::hex << test_params.work_items_mask
1275 << "\n";
1276 }
1277
1278
1279 kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
1280 kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
1281 // Make sure a test of type Ty is supported by the device
1282 if (!TypeManager<Ty>::type_supported(device))
1283 {
1284 log_info("Data type not supported : %s\n", TypeManager<Ty>::name());
1285 return 0;
1286 }
1287 else
1288 {
1289 if (strstr(TypeManager<Ty>::name(), "double"))
1290 {
1291 kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
1292 }
1293 else if (strstr(TypeManager<Ty>::name(), "half"))
1294 {
1295 kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
1296 }
1297 }
1298
1299 for (std::string extension : test_params.required_extensions)
1300 {
1301 if (!is_extension_available(device, extension.c_str()))
1302 {
1303 log_info("The extension %s not supported on this device. SKIP "
1304 "testing - kernel %s data type %s\n",
1305 extension.c_str(), kname, TypeManager<Ty>::name());
1306 return TEST_PASS;
1307 }
1308 kernel_sstr << "#pragma OPENCL EXTENSION " + extension
1309 + ": enable\n";
1310 }
1311
1312 error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
1313 (void *)&platform, NULL);
1314 test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
1315 if (test_params.use_core_subgroups)
1316 {
1317 kernel_sstr
1318 << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
1319 }
1320 kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); "
1321 "M[I].y = get_sub_group_id();\n";
1322 kernel_sstr << TypeManager<Ty>::add_typedef();
1323 kernel_sstr << src;
1324 const std::string &kernel_str = kernel_sstr.str();
1325 const char *kernel_src = kernel_str.c_str();
1326
1327 error = create_single_kernel_helper(context, &program, &kernel, 1,
1328 &kernel_src, kname);
1329 if (error != 0) return error;
1330
1331 // Determine some local dimensions to use for the test.
1332 error = get_max_common_work_group_size(
1333 context, kernel, test_params.global_workgroup_size, &local);
1334 test_error(error, "get_max_common_work_group_size failed");
1335
1336 // Limit it a bit so we have muliple work groups
1337 // Ideally this will still be large enough to give us multiple
1338 if (local > test_params.local_workgroup_size)
1339 local = test_params.local_workgroup_size;
1340
1341
1342 // Get the sub group info
1343 subgroupsAPI subgroupsApiSet(platform, test_params.use_core_subgroups);
1344 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
1345 subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
1346 if (clGetKernelSubGroupInfo_ptr == NULL)
1347 {
1348 log_error("ERROR: %s function not available",
1349 subgroupsApiSet.clGetKernelSubGroupInfo_name);
1350 return TEST_FAIL;
1351 }
1352 error = clGetKernelSubGroupInfo_ptr(
1353 kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
1354 sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
1355 if (error != CL_SUCCESS)
1356 {
1357 log_error("ERROR: %s function error for "
1358 "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
1359 subgroupsApiSet.clGetKernelSubGroupInfo_name);
1360 return TEST_FAIL;
1361 }
1362
1363 subgroup_size = (int)tmp;
1364
1365 error = clGetKernelSubGroupInfo_ptr(
1366 kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
1367 sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
1368 if (error != CL_SUCCESS)
1369 {
1370 log_error("ERROR: %s function error for "
1371 "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
1372 subgroupsApiSet.clGetKernelSubGroupInfo_name);
1373 return TEST_FAIL;
1374 }
1375
1376 num_subgroups = (int)tmp;
1377 // Make sure the number of sub groups is what we expect
1378 if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
1379 {
1380 log_error("ERROR: unexpected number of subgroups (%d) returned\n",
1381 num_subgroups);
1382 return TEST_FAIL;
1383 }
1384
1385 std::vector<Ty> idata;
1386 std::vector<Ty> odata;
1387 size_t input_array_size = global;
1388 size_t output_array_size = global;
1389 int dynscl = test_params.dynsc;
1390
1391 if (dynscl != 0)
1392 {
1393 input_array_size =
1394 (int)global / (int)local * num_subgroups * dynscl;
1395 output_array_size = (int)global / (int)local * dynscl;
1396 }
1397
1398 idata.resize(input_array_size);
1399 odata.resize(output_array_size);
1400
1401 // Run the kernel once on zeroes to get the map
1402 memset(idata.data(), 0, input_array_size * sizeof(Ty));
1403 error = run_kernel(context, queue, kernel, global, local, idata.data(),
1404 input_array_size * sizeof(Ty), sgmap.data(),
1405 global * sizeof(cl_int4), odata.data(),
1406 output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
1407 test_error(error, "Running kernel first time failed");
1408
1409 // Generate the desired input for the kernel
1410
1411 test_params.subgroup_size = subgroup_size;
1412 Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
1413 error = run_kernel(context, queue, kernel, global, local, idata.data(),
1414 input_array_size * sizeof(Ty), sgmap.data(),
1415 global * sizeof(cl_int4), odata.data(),
1416 output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
1417 test_error(error, "Running kernel second time failed");
1418
1419 // Check the result
1420 error = Fns::chk(idata.data(), odata.data(), mapin.data(),
1421 mapout.data(), sgmap.data(), test_params);
1422 test_error(error, "Data verification failed");
1423 return TEST_PASS;
1424 }
1425 };
1426
1427 static void set_last_workgroup_params(int non_uniform_size,
1428 int &number_of_subgroups,
1429 int subgroup_size, int &workgroup_size,
1430 int &last_subgroup_size)
1431 {
1432 number_of_subgroups = 1 + non_uniform_size / subgroup_size;
1433 last_subgroup_size = non_uniform_size % subgroup_size;
1434 workgroup_size = non_uniform_size;
1435 }
1436
1437 template <typename Ty>
1438 static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset,
1439 int current_sbs)
1440 {
1441 int randomize_data = (int)(genrand_int32(gMTdata) % 3);
1442 // Initialize data matrix indexed by local id and sub group id
1443 switch (randomize_data)
1444 {
1445 case 0:
1446 memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
1447 break;
1448 case 1: {
1449 memset(&workgroup[wg_offset], 0, current_sbs * sizeof(Ty));
1450 int wi_id = (int)(genrand_int32(gMTdata) % (cl_uint)current_sbs);
1451 set_value(workgroup[wg_offset + wi_id], 41);
1452 }
1453 break;
1454 case 2:
1455 memset(&workgroup[wg_offset], 0xff, current_sbs * sizeof(Ty));
1456 break;
1457 }
1458 }
1459
1460 struct RunTestForType
1461 {
1462 RunTestForType(cl_device_id device, cl_context context,
1463 cl_command_queue queue, int num_elements,
1464 WorkGroupParams test_params)
1465 : device_(device), context_(context), queue_(queue),
1466 num_elements_(num_elements), test_params_(test_params)
1467 {}
1468 template <typename T, typename U>
1469 int run_impl(const char *kernel_name, const char *source)
1470 {
1471 int error = TEST_PASS;
1472 if (test_params_.all_work_item_masks.size() > 0)
1473 {
1474 error = test<T, U>::mrun(device_, context_, queue_, num_elements_,
1475 kernel_name, source, test_params_);
1476 }
1477 else
1478 {
1479 error = test<T, U>::run(device_, context_, queue_, num_elements_,
1480 kernel_name, source, test_params_);
1481 }
1482
1483 return error;
1484 }
1485
1486 private:
1487 cl_device_id device_;
1488 cl_context context_;
1489 cl_command_queue queue_;
1490 int num_elements_;
1491 WorkGroupParams test_params_;
1492 };
1493
1494 #endif
1495