1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "harness/typeWrappers.h"
19 #include "subgroup_common_templates.h"
20
21 namespace {
22
23 static const char *scinadd_non_uniform_source = R"(
24 __kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
25 int gid = get_global_id(0);
26 XY(xy,gid);
27 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
28 if (elect_work_item & WORK_ITEMS_MASK){
29 out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]);
30 }
31 }
32 )";
33
34 static const char *scinmax_non_uniform_source = R"(
35 __kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
36 int gid = get_global_id(0);
37 XY(xy,gid);
38 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
39 if (elect_work_item & WORK_ITEMS_MASK){
40 out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]);
41 }
42 }
43 )";
44
45 static const char *scinmin_non_uniform_source = R"(
46 __kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
47 int gid = get_global_id(0);
48 XY(xy,gid);
49 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
50 if (elect_work_item & WORK_ITEMS_MASK){
51 out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]);
52 }
53 }
54 )";
55
56 static const char *scinmul_non_uniform_source = R"(
57 __kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
58 int gid = get_global_id(0);
59 XY(xy,gid);
60 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
61 if (elect_work_item & WORK_ITEMS_MASK){
62 out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]);
63 }
64 }
65 )";
66
67 static const char *scinand_non_uniform_source = R"(
68 __kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
69 int gid = get_global_id(0);
70 XY(xy,gid);
71 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
72 if (elect_work_item & WORK_ITEMS_MASK){
73 out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]);
74 }
75 }
76 )";
77
78 static const char *scinor_non_uniform_source = R"(
79 __kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
80 int gid = get_global_id(0);
81 XY(xy,gid);
82 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
83 if (elect_work_item & WORK_ITEMS_MASK){
84 out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]);
85 }
86 }
87 )";
88
89 static const char *scinxor_non_uniform_source = R"(
90 __kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
91 int gid = get_global_id(0);
92 XY(xy,gid);
93 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
94 if (elect_work_item & WORK_ITEMS_MASK){
95 out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]);
96 }
97 }
98 )";
99
100 static const char *scinand_non_uniform_logical_source = R"(
101 __kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
102 int gid = get_global_id(0);
103 XY(xy,gid);
104 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
105 if (elect_work_item & WORK_ITEMS_MASK){
106 out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]);
107 }
108 }
109 )";
110
111 static const char *scinor_non_uniform_logical_source = R"(
112 __kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
113 int gid = get_global_id(0);
114 XY(xy,gid);
115 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
116 if (elect_work_item & WORK_ITEMS_MASK){
117 out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]);
118 }
119 }
120 )";
121
122 static const char *scinxor_non_uniform_logical_source = R"(
123 __kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
124 int gid = get_global_id(0);
125 XY(xy,gid);
126 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
127 if (elect_work_item & WORK_ITEMS_MASK){
128 out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]);
129 }
130 }
131 )";
132
133 static const char *scexadd_non_uniform_source = R"(
134 __kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
135 int gid = get_global_id(0);
136 XY(xy,gid);
137 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
138 if (elect_work_item & WORK_ITEMS_MASK){
139 out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]);
140 }
141 }
142 )";
143
144 static const char *scexmax_non_uniform_source = R"(
145 __kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
146 int gid = get_global_id(0);
147 XY(xy,gid);
148 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
149 if (elect_work_item & WORK_ITEMS_MASK){
150 out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]);
151 }
152 }
153 )";
154
155 static const char *scexmin_non_uniform_source = R"(
156 __kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
157 int gid = get_global_id(0);
158 XY(xy,gid);
159 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
160 if (elect_work_item & WORK_ITEMS_MASK){
161 out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]);
162 }
163 }
164 )";
165
166 static const char *scexmul_non_uniform_source = R"(
167 __kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
168 int gid = get_global_id(0);
169 XY(xy,gid);
170 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
171 if (elect_work_item & WORK_ITEMS_MASK){
172 out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]);
173 }
174 }
175 )";
176
177 static const char *scexand_non_uniform_source = R"(
178 __kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
179 int gid = get_global_id(0);
180 XY(xy,gid);
181 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
182 if (elect_work_item & WORK_ITEMS_MASK){
183 out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]);
184 }
185 }
186 )";
187
188 static const char *scexor_non_uniform_source = R"(
189 __kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
190 int gid = get_global_id(0);
191 XY(xy,gid);
192 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
193 if (elect_work_item & WORK_ITEMS_MASK){
194 out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]);
195 }
196 }
197 )";
198
199 static const char *scexxor_non_uniform_source = R"(
200 __kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
201 int gid = get_global_id(0);
202 XY(xy,gid);
203 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
204 if (elect_work_item & WORK_ITEMS_MASK){
205 out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]);
206 }
207 }
208 )";
209
210 static const char *scexand_non_uniform_logical_source = R"(
211 __kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
212 int gid = get_global_id(0);
213 XY(xy,gid);
214 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
215 if (elect_work_item & WORK_ITEMS_MASK){
216 out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]);
217 }
218 }
219 )";
220
221 static const char *scexor_non_uniform_logical_source = R"(
222 __kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
223 int gid = get_global_id(0);
224 XY(xy,gid);
225 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
226 if (elect_work_item & WORK_ITEMS_MASK){
227 out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]);
228 }
229 }
230 )";
231
232 static const char *scexxor_non_uniform_logical_source = R"(
233 __kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
234 int gid = get_global_id(0);
235 XY(xy,gid);
236 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
237 if (elect_work_item & WORK_ITEMS_MASK){
238 out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]);
239 }
240 }
241 )";
242
243 static const char *redadd_non_uniform_source = R"(
244 __kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
245 int gid = get_global_id(0);
246 XY(xy,gid);
247 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
248 if (elect_work_item & WORK_ITEMS_MASK){
249 out[gid] = sub_group_non_uniform_reduce_add(in[gid]);
250 }
251 }
252 )";
253
254 static const char *redmax_non_uniform_source = R"(
255 __kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
256 int gid = get_global_id(0);
257 XY(xy,gid);
258 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
259 if (elect_work_item & WORK_ITEMS_MASK){
260 out[gid] = sub_group_non_uniform_reduce_max(in[gid]);
261 }
262 }
263 )";
264
265 static const char *redmin_non_uniform_source = R"(
266 __kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
267 int gid = get_global_id(0);
268 XY(xy,gid);
269 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
270 if (elect_work_item & WORK_ITEMS_MASK){
271 out[gid] = sub_group_non_uniform_reduce_min(in[gid]);
272 }
273 }
274 )";
275
276 static const char *redmul_non_uniform_source = R"(
277 __kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
278 int gid = get_global_id(0);
279 XY(xy,gid);
280 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
281 if (elect_work_item & WORK_ITEMS_MASK){
282 out[gid] = sub_group_non_uniform_reduce_mul(in[gid]);
283 }
284 }
285 )";
286
287 static const char *redand_non_uniform_source = R"(
288 __kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
289 int gid = get_global_id(0);
290 XY(xy,gid);
291 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
292 if (elect_work_item & WORK_ITEMS_MASK){
293 out[gid] = sub_group_non_uniform_reduce_and(in[gid]);
294 }
295 }
296 )";
297
298 static const char *redor_non_uniform_source = R"(
299 __kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
300 int gid = get_global_id(0);
301 XY(xy,gid);
302 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
303 if (elect_work_item & WORK_ITEMS_MASK){
304 out[gid] = sub_group_non_uniform_reduce_or(in[gid]);
305 }
306 }
307 )";
308
309 static const char *redxor_non_uniform_source = R"(
310 __kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
311 int gid = get_global_id(0);
312 XY(xy,gid);
313 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
314 if (elect_work_item & WORK_ITEMS_MASK){
315 out[gid] = sub_group_non_uniform_reduce_xor(in[gid]);
316 }
317 }
318 )";
319
320 static const char *redand_non_uniform_logical_source = R"(
321 __kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
322 int gid = get_global_id(0);
323 XY(xy,gid);
324 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
325 if (elect_work_item & WORK_ITEMS_MASK){
326 out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]);
327 }
328 }
329 )";
330
331 static const char *redor_non_uniform_logical_source = R"(
332 __kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
333 int gid = get_global_id(0);
334 XY(xy,gid);
335 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
336 if (elect_work_item & WORK_ITEMS_MASK){
337 out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]);
338 }
339 }
340 )";
341
342 static const char *redxor_non_uniform_logical_source = R"(
343 __kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
344 int gid = get_global_id(0);
345 XY(xy,gid);
346 int elect_work_item = 1 << (get_sub_group_local_id() % 32);
347 if (elect_work_item & WORK_ITEMS_MASK){
348 out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]);
349 }
350 }
351 )";
352
353 template <typename T>
run_functions_add_mul_max_min_for_type(RunTestForType rft)354 int run_functions_add_mul_max_min_for_type(RunTestForType rft)
355 {
356 int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
357 "test_scinadd_non_uniform", scinadd_non_uniform_source);
358 error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::mul_>>(
359 "test_scinmul_non_uniform", scinmul_non_uniform_source);
360 error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
361 "test_scinmax_non_uniform", scinmax_non_uniform_source);
362 error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
363 "test_scinmin_non_uniform", scinmin_non_uniform_source);
364 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
365 "test_scexadd_non_uniform", scexadd_non_uniform_source);
366 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::mul_>>(
367 "test_scexmul_non_uniform", scexmul_non_uniform_source);
368 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
369 "test_scexmax_non_uniform", scexmax_non_uniform_source);
370 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
371 "test_scexmin_non_uniform", scexmin_non_uniform_source);
372 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>(
373 "test_redadd_non_uniform", redadd_non_uniform_source);
374 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::mul_>>(
375 "test_redmul_non_uniform", redmul_non_uniform_source);
376 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>(
377 "test_redmax_non_uniform", redmax_non_uniform_source);
378 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>(
379 "test_redmin_non_uniform", redmin_non_uniform_source);
380 return error;
381 }
382
run_functions_and_or_xor_for_type(RunTestForType rft)383 template <typename T> int run_functions_and_or_xor_for_type(RunTestForType rft)
384 {
385 int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::and_>>(
386 "test_scinand_non_uniform", scinand_non_uniform_source);
387 error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::or_>>(
388 "test_scinor_non_uniform", scinor_non_uniform_source);
389 error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::xor_>>(
390 "test_scinxor_non_uniform", scinxor_non_uniform_source);
391 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::and_>>(
392 "test_scexand_non_uniform", scexand_non_uniform_source);
393 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::or_>>(
394 "test_scexor_non_uniform", scexor_non_uniform_source);
395 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::xor_>>(
396 "test_scexxor_non_uniform", scexxor_non_uniform_source);
397 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::and_>>(
398 "test_redand_non_uniform", redand_non_uniform_source);
399 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::or_>>(
400 "test_redor_non_uniform", redor_non_uniform_source);
401 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::xor_>>(
402 "test_redxor_non_uniform", redxor_non_uniform_source);
403 return error;
404 }
405
406 template <typename T>
run_functions_logical_and_or_xor_for_type(RunTestForType rft)407 int run_functions_logical_and_or_xor_for_type(RunTestForType rft)
408 {
409 int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_and>>(
410 "test_scinand_non_uniform_logical", scinand_non_uniform_logical_source);
411 error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_or>>(
412 "test_scinor_non_uniform_logical", scinor_non_uniform_logical_source);
413 error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_xor>>(
414 "test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source);
415 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_and>>(
416 "test_scexand_non_uniform_logical", scexand_non_uniform_logical_source);
417 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_or>>(
418 "test_scexor_non_uniform_logical", scexor_non_uniform_logical_source);
419 error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_xor>>(
420 "test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source);
421 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_and>>(
422 "test_redand_non_uniform_logical", redand_non_uniform_logical_source);
423 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_or>>(
424 "test_redor_non_uniform_logical", redor_non_uniform_logical_source);
425 error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_xor>>(
426 "test_redxor_non_uniform_logical", redxor_non_uniform_logical_source);
427 return error;
428 }
429
430 }
431
test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)432 int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
433 cl_context context,
434 cl_command_queue queue,
435 int num_elements)
436 {
437 std::vector<std::string> required_extensions = {
438 "cl_khr_subgroup_non_uniform_arithmetic"
439 };
440 std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
441 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
442 0x00ffff00, 0x80000000, 0xaaaaaaaa };
443
444 constexpr size_t global_work_size = 2000;
445 constexpr size_t local_work_size = 200;
446 WorkGroupParams test_params(global_work_size, local_work_size,
447 required_extensions, masks);
448 RunTestForType rft(device, context, queue, num_elements, test_params);
449
450 int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
451 error |= run_functions_add_mul_max_min_for_type<cl_uint>(rft);
452 error |= run_functions_add_mul_max_min_for_type<cl_long>(rft);
453 error |= run_functions_add_mul_max_min_for_type<cl_ulong>(rft);
454 error |= run_functions_add_mul_max_min_for_type<cl_short>(rft);
455 error |= run_functions_add_mul_max_min_for_type<cl_ushort>(rft);
456 error |= run_functions_add_mul_max_min_for_type<cl_char>(rft);
457 error |= run_functions_add_mul_max_min_for_type<cl_uchar>(rft);
458 error |= run_functions_add_mul_max_min_for_type<cl_float>(rft);
459 error |= run_functions_add_mul_max_min_for_type<cl_double>(rft);
460 error |= run_functions_add_mul_max_min_for_type<subgroups::cl_half>(rft);
461
462 error |= run_functions_and_or_xor_for_type<cl_int>(rft);
463 error |= run_functions_and_or_xor_for_type<cl_uint>(rft);
464 error |= run_functions_and_or_xor_for_type<cl_long>(rft);
465 error |= run_functions_and_or_xor_for_type<cl_ulong>(rft);
466 error |= run_functions_and_or_xor_for_type<cl_short>(rft);
467 error |= run_functions_and_or_xor_for_type<cl_ushort>(rft);
468 error |= run_functions_and_or_xor_for_type<cl_char>(rft);
469 error |= run_functions_and_or_xor_for_type<cl_uchar>(rft);
470
471 error |= run_functions_logical_and_or_xor_for_type<cl_int>(rft);
472 return error;
473 }