1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "subgroup_common_templates.h"
19 #include "harness/typeWrappers.h"
20
21 #define CLUSTER_SIZE 4
22 #define CLUSTER_SIZE_STR "4"
23
24 namespace {
25 static const char *redadd_clustered_source =
26 "__kernel void test_redadd_clustered(const __global Type *in, __global "
27 "int4 *xy, __global Type *out)\n"
28 "{\n"
29 " int gid = get_global_id(0);\n"
30 " XY(xy,gid);\n"
31 " xy[gid].w = 0;\n"
32 " if (sizeof(in[gid]) == "
33 "sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n"
34 " {xy[gid].w = sizeof(in[gid]);}\n"
35 " out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR
36 ");\n"
37 "}\n";
38
39 static const char *redmax_clustered_source =
40 "__kernel void test_redmax_clustered(const __global Type *in, __global "
41 "int4 *xy, __global Type *out)\n"
42 "{\n"
43 " int gid = get_global_id(0);\n"
44 " XY(xy,gid);\n"
45 " xy[gid].w = 0;\n"
46 " if (sizeof(in[gid]) == "
47 "sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n"
48 " {xy[gid].w = sizeof(in[gid]);}\n"
49 " out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR
50 ");\n"
51 "}\n";
52
53 static const char *redmin_clustered_source =
54 "__kernel void test_redmin_clustered(const __global Type *in, __global "
55 "int4 *xy, __global Type *out)\n"
56 "{\n"
57 " int gid = get_global_id(0);\n"
58 " XY(xy,gid);\n"
59 " xy[gid].w = 0;\n"
60 " if (sizeof(in[gid]) == "
61 "sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n"
62 " {xy[gid].w = sizeof(in[gid]);}\n"
63 " out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR
64 ");\n"
65 "}\n";
66
67 static const char *redmul_clustered_source =
68 "__kernel void test_redmul_clustered(const __global Type *in, __global "
69 "int4 *xy, __global Type *out)\n"
70 "{\n"
71 " int gid = get_global_id(0);\n"
72 " XY(xy,gid);\n"
73 " xy[gid].w = 0;\n"
74 " if (sizeof(in[gid]) == "
75 "sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n"
76 " {xy[gid].w = sizeof(in[gid]);}\n"
77 " out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR
78 ");\n"
79 "}\n";
80
81 static const char *redand_clustered_source =
82 "__kernel void test_redand_clustered(const __global Type *in, __global "
83 "int4 *xy, __global Type *out)\n"
84 "{\n"
85 " int gid = get_global_id(0);\n"
86 " XY(xy,gid);\n"
87 " xy[gid].w = 0;\n"
88 " if (sizeof(in[gid]) == "
89 "sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n"
90 " {xy[gid].w = sizeof(in[gid]);}\n"
91 " out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR
92 ");\n"
93 "}\n";
94
95 static const char *redor_clustered_source =
96 "__kernel void test_redor_clustered(const __global Type *in, __global int4 "
97 "*xy, __global Type *out)\n"
98 "{\n"
99 " int gid = get_global_id(0);\n"
100 " XY(xy,gid);\n"
101 " xy[gid].w = 0;\n"
102 " if (sizeof(in[gid]) == "
103 "sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n"
104 " {xy[gid].w = sizeof(in[gid]);}\n"
105 " out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR
106 ");\n"
107 "}\n";
108
109 static const char *redxor_clustered_source =
110 "__kernel void test_redxor_clustered(const __global Type *in, __global "
111 "int4 *xy, __global Type *out)\n"
112 "{\n"
113 " int gid = get_global_id(0);\n"
114 " XY(xy,gid);\n"
115 " xy[gid].w = 0;\n"
116 " if (sizeof(in[gid]) == "
117 "sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n"
118 " {xy[gid].w = sizeof(in[gid]);}\n"
119 " out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR
120 ");\n"
121 "}\n";
122
123 static const char *redand_clustered_logical_source =
124 "__kernel void test_redand_clustered_logical(const __global Type *in, "
125 "__global int4 *xy, __global Type *out)\n"
126 "{\n"
127 " int gid = get_global_id(0);\n"
128 " XY(xy,gid);\n"
129 " xy[gid].w = 0;\n"
130 " if (sizeof(in[gid]) == "
131 "sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR
132 ")))\n"
133 " {xy[gid].w = sizeof(in[gid]);}\n"
134 " out[gid] = "
135 "sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n"
136 "}\n";
137
138 static const char *redor_clustered_logical_source =
139 "__kernel void test_redor_clustered_logical(const __global Type *in, "
140 "__global int4 *xy, __global Type *out)\n"
141 "{\n"
142 " int gid = get_global_id(0);\n"
143 " XY(xy,gid);\n"
144 " xy[gid].w = 0;\n"
145 " if (sizeof(in[gid]) == "
146 "sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR
147 ")))\n"
148 " {xy[gid].w = sizeof(in[gid]);}\n"
149 " out[gid] = "
150 "sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n"
151 "}\n";
152
153 static const char *redxor_clustered_logical_source =
154 "__kernel void test_redxor_clustered_logical(const __global Type *in, "
155 "__global int4 *xy, __global Type *out)\n"
156 "{\n"
157 " int gid = get_global_id(0);\n"
158 " XY(xy,gid);\n"
159 " xy[gid].w = 0;\n"
160 " if ( sizeof(in[gid]) == "
161 "sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR
162 ")))\n"
163 " {xy[gid].w = sizeof(in[gid]);}\n"
164 " out[gid] = "
165 "sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n"
166 "}\n";
167
168
169 // DESCRIPTION:
170 // Test for reduce cluster functions
171 template <typename Ty, ArithmeticOp operation> struct RED_CLU
172 {
gen__anon181fc4d90111::RED_CLU173 static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
174 {
175 int nw = test_params.local_workgroup_size;
176 int ns = test_params.subgroup_size;
177 int ng = test_params.global_workgroup_size;
178 ng = ng / nw;
179 log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ...\n",
180 operation_names(operation), TypeManager<Ty>::name(),
181 sizeof(Ty));
182 genrand<Ty, operation>(x, t, m, ns, nw, ng);
183 }
184
chk__anon181fc4d90111::RED_CLU185 static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
186 const WorkGroupParams &test_params)
187 {
188 int nw = test_params.local_workgroup_size;
189 int ns = test_params.subgroup_size;
190 int ng = test_params.global_workgroup_size;
191 int nj = (nw + ns - 1) / ns;
192 ng = ng / nw;
193
194 for (int k = 0; k < ng; ++k)
195 {
196 std::vector<cl_int> data_type_sizes;
197 // Map to array indexed to array indexed by local ID and sub group
198 for (int j = 0; j < nw; ++j)
199 {
200 mx[j] = x[j];
201 my[j] = y[j];
202 data_type_sizes.push_back(m[4 * j + 3]);
203 }
204
205 for (cl_int dts : data_type_sizes)
206 {
207 if (dts != sizeof(Ty))
208 {
209 log_error("ERROR: sub_group_clustered_reduce_%s(%s) "
210 "wrong data type size detected, expected: %d, "
211 "used by device %d, in group %d\n",
212 operation_names(operation),
213 TypeManager<Ty>::name(), sizeof(Ty), dts, k);
214 return TEST_FAIL;
215 }
216 }
217
218 for (int j = 0; j < nj; ++j)
219 {
220 int ii = j * ns;
221 int n = ii + ns > nw ? nw - ii : ns;
222 int midx = 4 * ii + 2;
223 std::vector<Ty> clusters_results;
224 int clusters_counter = ns / CLUSTER_SIZE;
225 clusters_results.resize(clusters_counter);
226
227 // Compute target
228 Ty tr = mx[ii];
229 for (int i = 0; i < n; ++i)
230 {
231 if (i % CLUSTER_SIZE == 0)
232 tr = mx[ii + i];
233 else
234 tr = calculate<Ty>(tr, mx[ii + i], operation);
235 clusters_results[i / CLUSTER_SIZE] = tr;
236 }
237
238 // Check result
239 for (int i = 0; i < n; ++i)
240 {
241 Ty rr = my[ii + i];
242 tr = clusters_results[i / CLUSTER_SIZE];
243 if (!compare(rr, tr))
244 {
245 log_error(
246 "ERROR: sub_group_clustered_reduce_%s(%s) mismatch "
247 "for local id %d in sub group %d in group %d\n",
248 operation_names(operation), TypeManager<Ty>::name(),
249 i, j, k);
250 return TEST_FAIL;
251 }
252 }
253 }
254
255 x += nw;
256 y += nw;
257 m += 4 * nw;
258 }
259 log_info(" sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n",
260 operation_names(operation), TypeManager<Ty>::name(),
261 sizeof(Ty));
262 return TEST_PASS;
263 }
264 };
265
266 template <typename T>
run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)267 int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
268 {
269 int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
270 "test_redadd_clustered", redadd_clustered_source);
271 error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
272 "test_redmax_clustered", redmax_clustered_source);
273 error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
274 "test_redmin_clustered", redmin_clustered_source);
275 error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
276 "test_redmul_clustered", redmul_clustered_source);
277 return error;
278 }
run_cluster_and_or_xor_for_type(RunTestForType rft)279 template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
280 {
281 int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
282 "test_redand_clustered", redand_clustered_source);
283 error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
284 "test_redor_clustered", redor_clustered_source);
285 error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
286 "test_redxor_clustered", redxor_clustered_source);
287 return error;
288 }
289 template <typename T>
run_cluster_logical_and_or_xor_for_type(RunTestForType rft)290 int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
291 {
292 int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
293 "test_redand_clustered_logical", redand_clustered_logical_source);
294 error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
295 "test_redor_clustered_logical", redor_clustered_logical_source);
296 error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
297 "test_redxor_clustered_logical", redxor_clustered_logical_source);
298
299 return error;
300 }
301 }
302
test_subgroup_functions_clustered_reduce(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)303 int test_subgroup_functions_clustered_reduce(cl_device_id device,
304 cl_context context,
305 cl_command_queue queue,
306 int num_elements)
307 {
308 std::vector<std::string> required_extensions = {
309 "cl_khr_subgroup_clustered_reduce"
310 };
311 constexpr size_t global_work_size = 2000;
312 constexpr size_t local_work_size = 200;
313 WorkGroupParams test_params(global_work_size, local_work_size,
314 required_extensions);
315 RunTestForType rft(device, context, queue, num_elements, test_params);
316
317 int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
318 error |= run_cluster_red_add_max_min_mul_for_type<cl_uint>(rft);
319 error |= run_cluster_red_add_max_min_mul_for_type<cl_long>(rft);
320 error |= run_cluster_red_add_max_min_mul_for_type<cl_ulong>(rft);
321 error |= run_cluster_red_add_max_min_mul_for_type<cl_short>(rft);
322 error |= run_cluster_red_add_max_min_mul_for_type<cl_ushort>(rft);
323 error |= run_cluster_red_add_max_min_mul_for_type<cl_char>(rft);
324 error |= run_cluster_red_add_max_min_mul_for_type<cl_uchar>(rft);
325 error |= run_cluster_red_add_max_min_mul_for_type<cl_float>(rft);
326 error |= run_cluster_red_add_max_min_mul_for_type<cl_double>(rft);
327 error |= run_cluster_red_add_max_min_mul_for_type<subgroups::cl_half>(rft);
328
329 error |= run_cluster_and_or_xor_for_type<cl_int>(rft);
330 error |= run_cluster_and_or_xor_for_type<cl_uint>(rft);
331 error |= run_cluster_and_or_xor_for_type<cl_long>(rft);
332 error |= run_cluster_and_or_xor_for_type<cl_ulong>(rft);
333 error |= run_cluster_and_or_xor_for_type<cl_short>(rft);
334 error |= run_cluster_and_or_xor_for_type<cl_ushort>(rft);
335 error |= run_cluster_and_or_xor_for_type<cl_char>(rft);
336 error |= run_cluster_and_or_xor_for_type<cl_uchar>(rft);
337
338 error |= run_cluster_logical_and_or_xor_for_type<cl_int>(rft);
339 return error;
340 }
341