• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "harness/typeWrappers.h"
19 #include "subgroup_common_templates.h"
20 
21 namespace {
22 
23 static const char *scinadd_non_uniform_source = R"(
24     __kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
25         int gid = get_global_id(0);
26         XY(xy,gid);
27         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
28             if (elect_work_item & WORK_ITEMS_MASK){
29                 out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]);
30             }
31     }
32 )";
33 
34 static const char *scinmax_non_uniform_source = R"(
35     __kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
36         int gid = get_global_id(0);
37         XY(xy,gid);
38         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
39             if (elect_work_item & WORK_ITEMS_MASK){
40                 out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]);
41             }
42     }
43 )";
44 
45 static const char *scinmin_non_uniform_source = R"(
46     __kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
47         int gid = get_global_id(0);
48         XY(xy,gid);
49         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
50             if (elect_work_item & WORK_ITEMS_MASK){
51                 out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]);
52             }
53     }
54 )";
55 
56 static const char *scinmul_non_uniform_source = R"(
57     __kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
58         int gid = get_global_id(0);
59         XY(xy,gid);
60         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
61             if (elect_work_item & WORK_ITEMS_MASK){
62                 out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]);
63             }
64     }
65 )";
66 
67 static const char *scinand_non_uniform_source = R"(
68     __kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
69         int gid = get_global_id(0);
70         XY(xy,gid);
71         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
72             if (elect_work_item & WORK_ITEMS_MASK){
73                 out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]);
74             }
75     }
76 )";
77 
78 static const char *scinor_non_uniform_source = R"(
79     __kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
80         int gid = get_global_id(0);
81         XY(xy,gid);
82         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
83             if (elect_work_item & WORK_ITEMS_MASK){
84                 out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]);
85             }
86     }
87 )";
88 
89 static const char *scinxor_non_uniform_source = R"(
90     __kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
91         int gid = get_global_id(0);
92         XY(xy,gid);
93         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
94             if (elect_work_item & WORK_ITEMS_MASK){
95                 out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]);
96             }
97     }
98 )";
99 
100 static const char *scinand_non_uniform_logical_source = R"(
101     __kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
102         int gid = get_global_id(0);
103         XY(xy,gid);
104         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
105             if (elect_work_item & WORK_ITEMS_MASK){
106                 out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]);
107             }
108     }
109 )";
110 
111 static const char *scinor_non_uniform_logical_source = R"(
112     __kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
113         int gid = get_global_id(0);
114         XY(xy,gid);
115         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
116             if (elect_work_item & WORK_ITEMS_MASK){
117                 out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]);
118             }
119     }
120 )";
121 
122 static const char *scinxor_non_uniform_logical_source = R"(
123     __kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
124         int gid = get_global_id(0);
125         XY(xy,gid);
126         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
127             if (elect_work_item & WORK_ITEMS_MASK){
128                 out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]);
129             }
130     }
131 )";
132 
133 static const char *scexadd_non_uniform_source = R"(
134     __kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
135         int gid = get_global_id(0);
136         XY(xy,gid);
137         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
138             if (elect_work_item & WORK_ITEMS_MASK){
139                 out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]);
140             }
141     }
142 )";
143 
144 static const char *scexmax_non_uniform_source = R"(
145     __kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
146         int gid = get_global_id(0);
147         XY(xy,gid);
148         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
149             if (elect_work_item & WORK_ITEMS_MASK){
150                 out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]);
151             }
152     }
153 )";
154 
155 static const char *scexmin_non_uniform_source = R"(
156     __kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
157         int gid = get_global_id(0);
158         XY(xy,gid);
159         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
160             if (elect_work_item & WORK_ITEMS_MASK){
161                 out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]);
162             }
163     }
164 )";
165 
166 static const char *scexmul_non_uniform_source = R"(
167     __kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
168         int gid = get_global_id(0);
169         XY(xy,gid);
170         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
171             if (elect_work_item & WORK_ITEMS_MASK){
172                 out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]);
173             }
174     }
175 )";
176 
177 static const char *scexand_non_uniform_source = R"(
178     __kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
179         int gid = get_global_id(0);
180         XY(xy,gid);
181         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
182             if (elect_work_item & WORK_ITEMS_MASK){
183                 out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]);
184             }
185     }
186 )";
187 
188 static const char *scexor_non_uniform_source = R"(
189     __kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
190         int gid = get_global_id(0);
191         XY(xy,gid);
192         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
193             if (elect_work_item & WORK_ITEMS_MASK){
194                 out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]);
195             }
196     }
197 )";
198 
199 static const char *scexxor_non_uniform_source = R"(
200     __kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
201         int gid = get_global_id(0);
202         XY(xy,gid);
203         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
204             if (elect_work_item & WORK_ITEMS_MASK){
205                 out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]);
206             }
207     }
208 )";
209 
210 static const char *scexand_non_uniform_logical_source = R"(
211     __kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
212         int gid = get_global_id(0);
213         XY(xy,gid);
214         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
215             if (elect_work_item & WORK_ITEMS_MASK){
216                 out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]);
217             }
218     }
219 )";
220 
221 static const char *scexor_non_uniform_logical_source = R"(
222     __kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
223         int gid = get_global_id(0);
224         XY(xy,gid);
225         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
226             if (elect_work_item & WORK_ITEMS_MASK){
227                 out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]);
228             }
229     }
230 )";
231 
232 static const char *scexxor_non_uniform_logical_source = R"(
233     __kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
234         int gid = get_global_id(0);
235         XY(xy,gid);
236         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
237             if (elect_work_item & WORK_ITEMS_MASK){
238                 out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]);
239             }
240     }
241 )";
242 
243 static const char *redadd_non_uniform_source = R"(
244     __kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
245         int gid = get_global_id(0);
246         XY(xy,gid);
247         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
248             if (elect_work_item & WORK_ITEMS_MASK){
249                 out[gid] = sub_group_non_uniform_reduce_add(in[gid]);
250             }
251     }
252 )";
253 
254 static const char *redmax_non_uniform_source = R"(
255     __kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
256         int gid = get_global_id(0);
257         XY(xy,gid);
258         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
259             if (elect_work_item & WORK_ITEMS_MASK){
260                 out[gid] = sub_group_non_uniform_reduce_max(in[gid]);
261             }
262     }
263 )";
264 
265 static const char *redmin_non_uniform_source = R"(
266     __kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
267         int gid = get_global_id(0);
268         XY(xy,gid);
269         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
270             if (elect_work_item & WORK_ITEMS_MASK){
271                 out[gid] = sub_group_non_uniform_reduce_min(in[gid]);
272             }
273     }
274 )";
275 
276 static const char *redmul_non_uniform_source = R"(
277     __kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
278         int gid = get_global_id(0);
279         XY(xy,gid);
280         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
281             if (elect_work_item & WORK_ITEMS_MASK){
282                 out[gid] = sub_group_non_uniform_reduce_mul(in[gid]);
283             }
284     }
285 )";
286 
287 static const char *redand_non_uniform_source = R"(
288     __kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
289         int gid = get_global_id(0);
290         XY(xy,gid);
291         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
292             if (elect_work_item & WORK_ITEMS_MASK){
293                 out[gid] = sub_group_non_uniform_reduce_and(in[gid]);
294             }
295     }
296 )";
297 
298 static const char *redor_non_uniform_source = R"(
299     __kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
300         int gid = get_global_id(0);
301         XY(xy,gid);
302         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
303             if (elect_work_item & WORK_ITEMS_MASK){
304                 out[gid] = sub_group_non_uniform_reduce_or(in[gid]);
305             }
306     }
307 )";
308 
309 static const char *redxor_non_uniform_source = R"(
310     __kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
311         int gid = get_global_id(0);
312         XY(xy,gid);
313         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
314             if (elect_work_item & WORK_ITEMS_MASK){
315                 out[gid] = sub_group_non_uniform_reduce_xor(in[gid]);
316             }
317     }
318 )";
319 
320 static const char *redand_non_uniform_logical_source = R"(
321     __kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
322         int gid = get_global_id(0);
323         XY(xy,gid);
324         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
325             if (elect_work_item & WORK_ITEMS_MASK){
326                 out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]);
327             }
328     }
329 )";
330 
331 static const char *redor_non_uniform_logical_source = R"(
332     __kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
333         int gid = get_global_id(0);
334         XY(xy,gid);
335         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
336             if (elect_work_item & WORK_ITEMS_MASK){
337                 out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]);
338             }
339     }
340 )";
341 
342 static const char *redxor_non_uniform_logical_source = R"(
343     __kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
344         int gid = get_global_id(0);
345         XY(xy,gid);
346         int elect_work_item = 1 << (get_sub_group_local_id() % 32);
347             if (elect_work_item & WORK_ITEMS_MASK){
348                 out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]);
349             }
350     }
351 )";
352 
353 template <typename T>
run_functions_add_mul_max_min_for_type(RunTestForType rft)354 int run_functions_add_mul_max_min_for_type(RunTestForType rft)
355 {
356     int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
357         "test_scinadd_non_uniform", scinadd_non_uniform_source);
358     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::mul_>>(
359         "test_scinmul_non_uniform", scinmul_non_uniform_source);
360     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
361         "test_scinmax_non_uniform", scinmax_non_uniform_source);
362     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
363         "test_scinmin_non_uniform", scinmin_non_uniform_source);
364     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
365         "test_scexadd_non_uniform", scexadd_non_uniform_source);
366     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::mul_>>(
367         "test_scexmul_non_uniform", scexmul_non_uniform_source);
368     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
369         "test_scexmax_non_uniform", scexmax_non_uniform_source);
370     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
371         "test_scexmin_non_uniform", scexmin_non_uniform_source);
372     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>(
373         "test_redadd_non_uniform", redadd_non_uniform_source);
374     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::mul_>>(
375         "test_redmul_non_uniform", redmul_non_uniform_source);
376     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>(
377         "test_redmax_non_uniform", redmax_non_uniform_source);
378     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>(
379         "test_redmin_non_uniform", redmin_non_uniform_source);
380     return error;
381 }
382 
run_functions_and_or_xor_for_type(RunTestForType rft)383 template <typename T> int run_functions_and_or_xor_for_type(RunTestForType rft)
384 {
385     int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::and_>>(
386         "test_scinand_non_uniform", scinand_non_uniform_source);
387     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::or_>>(
388         "test_scinor_non_uniform", scinor_non_uniform_source);
389     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::xor_>>(
390         "test_scinxor_non_uniform", scinxor_non_uniform_source);
391     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::and_>>(
392         "test_scexand_non_uniform", scexand_non_uniform_source);
393     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::or_>>(
394         "test_scexor_non_uniform", scexor_non_uniform_source);
395     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::xor_>>(
396         "test_scexxor_non_uniform", scexxor_non_uniform_source);
397     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::and_>>(
398         "test_redand_non_uniform", redand_non_uniform_source);
399     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::or_>>(
400         "test_redor_non_uniform", redor_non_uniform_source);
401     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::xor_>>(
402         "test_redxor_non_uniform", redxor_non_uniform_source);
403     return error;
404 }
405 
406 template <typename T>
run_functions_logical_and_or_xor_for_type(RunTestForType rft)407 int run_functions_logical_and_or_xor_for_type(RunTestForType rft)
408 {
409     int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_and>>(
410         "test_scinand_non_uniform_logical", scinand_non_uniform_logical_source);
411     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_or>>(
412         "test_scinor_non_uniform_logical", scinor_non_uniform_logical_source);
413     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_xor>>(
414         "test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source);
415     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_and>>(
416         "test_scexand_non_uniform_logical", scexand_non_uniform_logical_source);
417     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_or>>(
418         "test_scexor_non_uniform_logical", scexor_non_uniform_logical_source);
419     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_xor>>(
420         "test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source);
421     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_and>>(
422         "test_redand_non_uniform_logical", redand_non_uniform_logical_source);
423     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_or>>(
424         "test_redor_non_uniform_logical", redor_non_uniform_logical_source);
425     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_xor>>(
426         "test_redxor_non_uniform_logical", redxor_non_uniform_logical_source);
427     return error;
428 }
429 
430 }
431 
test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)432 int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
433                                                    cl_context context,
434                                                    cl_command_queue queue,
435                                                    int num_elements)
436 {
437     std::vector<std::string> required_extensions = {
438         "cl_khr_subgroup_non_uniform_arithmetic"
439     };
440     std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
441                                  0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
442                                  0x00ffff00, 0x80000000, 0xaaaaaaaa };
443 
444     constexpr size_t global_work_size = 2000;
445     constexpr size_t local_work_size = 200;
446     WorkGroupParams test_params(global_work_size, local_work_size,
447                                 required_extensions, masks);
448     RunTestForType rft(device, context, queue, num_elements, test_params);
449 
450     int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
451     error |= run_functions_add_mul_max_min_for_type<cl_uint>(rft);
452     error |= run_functions_add_mul_max_min_for_type<cl_long>(rft);
453     error |= run_functions_add_mul_max_min_for_type<cl_ulong>(rft);
454     error |= run_functions_add_mul_max_min_for_type<cl_short>(rft);
455     error |= run_functions_add_mul_max_min_for_type<cl_ushort>(rft);
456     error |= run_functions_add_mul_max_min_for_type<cl_char>(rft);
457     error |= run_functions_add_mul_max_min_for_type<cl_uchar>(rft);
458     error |= run_functions_add_mul_max_min_for_type<cl_float>(rft);
459     error |= run_functions_add_mul_max_min_for_type<cl_double>(rft);
460     error |= run_functions_add_mul_max_min_for_type<subgroups::cl_half>(rft);
461 
462     error |= run_functions_and_or_xor_for_type<cl_int>(rft);
463     error |= run_functions_and_or_xor_for_type<cl_uint>(rft);
464     error |= run_functions_and_or_xor_for_type<cl_long>(rft);
465     error |= run_functions_and_or_xor_for_type<cl_ulong>(rft);
466     error |= run_functions_and_or_xor_for_type<cl_short>(rft);
467     error |= run_functions_and_or_xor_for_type<cl_ushort>(rft);
468     error |= run_functions_and_or_xor_for_type<cl_char>(rft);
469     error |= run_functions_and_or_xor_for_type<cl_uchar>(rft);
470 
471     error |= run_functions_logical_and_or_xor_for_type<cl_int>(rft);
472     return error;
473 }