• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "subgroup_common_templates.h"
19 #include "harness/typeWrappers.h"
20 
21 #define CLUSTER_SIZE 4
22 #define CLUSTER_SIZE_STR "4"
23 
24 namespace {
25 static const char *redadd_clustered_source =
26     "__kernel void test_redadd_clustered(const __global Type *in, __global "
27     "int4 *xy, __global Type *out)\n"
28     "{\n"
29     "    int gid = get_global_id(0);\n"
30     "    XY(xy,gid);\n"
31     "    xy[gid].w = 0;\n"
32     "    if (sizeof(in[gid]) == "
33     "sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n"
34     "    {xy[gid].w = sizeof(in[gid]);}\n"
35     "    out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR
36     ");\n"
37     "}\n";
38 
39 static const char *redmax_clustered_source =
40     "__kernel void test_redmax_clustered(const __global Type *in, __global "
41     "int4 *xy, __global Type *out)\n"
42     "{\n"
43     "    int gid = get_global_id(0);\n"
44     "    XY(xy,gid);\n"
45     "    xy[gid].w = 0;\n"
46     "    if (sizeof(in[gid]) == "
47     "sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n"
48     "    {xy[gid].w = sizeof(in[gid]);}\n"
49     "    out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR
50     ");\n"
51     "}\n";
52 
53 static const char *redmin_clustered_source =
54     "__kernel void test_redmin_clustered(const __global Type *in, __global "
55     "int4 *xy, __global Type *out)\n"
56     "{\n"
57     "    int gid = get_global_id(0);\n"
58     "    XY(xy,gid);\n"
59     "    xy[gid].w = 0;\n"
60     "    if (sizeof(in[gid]) == "
61     "sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n"
62     "    {xy[gid].w = sizeof(in[gid]);}\n"
63     "    out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR
64     ");\n"
65     "}\n";
66 
67 static const char *redmul_clustered_source =
68     "__kernel void test_redmul_clustered(const __global Type *in, __global "
69     "int4 *xy, __global Type *out)\n"
70     "{\n"
71     "    int gid = get_global_id(0);\n"
72     "    XY(xy,gid);\n"
73     "    xy[gid].w = 0;\n"
74     "    if (sizeof(in[gid]) == "
75     "sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n"
76     "    {xy[gid].w = sizeof(in[gid]);}\n"
77     "    out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR
78     ");\n"
79     "}\n";
80 
81 static const char *redand_clustered_source =
82     "__kernel void test_redand_clustered(const __global Type *in, __global "
83     "int4 *xy, __global Type *out)\n"
84     "{\n"
85     "    int gid = get_global_id(0);\n"
86     "    XY(xy,gid);\n"
87     "    xy[gid].w = 0;\n"
88     "    if (sizeof(in[gid]) == "
89     "sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n"
90     "    {xy[gid].w = sizeof(in[gid]);}\n"
91     "    out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR
92     ");\n"
93     "}\n";
94 
95 static const char *redor_clustered_source =
96     "__kernel void test_redor_clustered(const __global Type *in, __global int4 "
97     "*xy, __global Type *out)\n"
98     "{\n"
99     "    int gid = get_global_id(0);\n"
100     "    XY(xy,gid);\n"
101     "    xy[gid].w = 0;\n"
102     "    if (sizeof(in[gid]) == "
103     "sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n"
104     "    {xy[gid].w = sizeof(in[gid]);}\n"
105     "    out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR
106     ");\n"
107     "}\n";
108 
109 static const char *redxor_clustered_source =
110     "__kernel void test_redxor_clustered(const __global Type *in, __global "
111     "int4 *xy, __global Type *out)\n"
112     "{\n"
113     "    int gid = get_global_id(0);\n"
114     "    XY(xy,gid);\n"
115     "    xy[gid].w = 0;\n"
116     "    if (sizeof(in[gid]) == "
117     "sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n"
118     "    {xy[gid].w = sizeof(in[gid]);}\n"
119     "    out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR
120     ");\n"
121     "}\n";
122 
123 static const char *redand_clustered_logical_source =
124     "__kernel void test_redand_clustered_logical(const __global Type *in, "
125     "__global int4 *xy, __global Type *out)\n"
126     "{\n"
127     "    int gid = get_global_id(0);\n"
128     "    XY(xy,gid);\n"
129     "    xy[gid].w = 0;\n"
130     "    if (sizeof(in[gid]) == "
131     "sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR
132     ")))\n"
133     "    {xy[gid].w = sizeof(in[gid]);}\n"
134     "    out[gid] = "
135     "sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n"
136     "}\n";
137 
138 static const char *redor_clustered_logical_source =
139     "__kernel void test_redor_clustered_logical(const __global Type *in, "
140     "__global int4 *xy, __global Type *out)\n"
141     "{\n"
142     "    int gid = get_global_id(0);\n"
143     "    XY(xy,gid);\n"
144     "    xy[gid].w = 0;\n"
145     "    if (sizeof(in[gid]) == "
146     "sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR
147     ")))\n"
148     "    {xy[gid].w = sizeof(in[gid]);}\n"
149     "    out[gid] = "
150     "sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n"
151     "}\n";
152 
153 static const char *redxor_clustered_logical_source =
154     "__kernel void test_redxor_clustered_logical(const __global Type *in, "
155     "__global int4 *xy, __global Type *out)\n"
156     "{\n"
157     "    int gid = get_global_id(0);\n"
158     "    XY(xy,gid);\n"
159     "    xy[gid].w = 0;\n"
160     "    if ( sizeof(in[gid]) == "
161     "sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR
162     ")))\n"
163     "    {xy[gid].w = sizeof(in[gid]);}\n"
164     "    out[gid] = "
165     "sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n"
166     "}\n";
167 
168 
169 // DESCRIPTION:
170 // Test for reduce cluster functions
171 template <typename Ty, ArithmeticOp operation> struct RED_CLU
172 {
gen__anon181fc4d90111::RED_CLU173     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
174     {
175         int nw = test_params.local_workgroup_size;
176         int ns = test_params.subgroup_size;
177         int ng = test_params.global_workgroup_size;
178         ng = ng / nw;
179         log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ...\n",
180                  operation_names(operation), TypeManager<Ty>::name(),
181                  sizeof(Ty));
182         genrand<Ty, operation>(x, t, m, ns, nw, ng);
183     }
184 
chk__anon181fc4d90111::RED_CLU185     static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
186                    const WorkGroupParams &test_params)
187     {
188         int nw = test_params.local_workgroup_size;
189         int ns = test_params.subgroup_size;
190         int ng = test_params.global_workgroup_size;
191         int nj = (nw + ns - 1) / ns;
192         ng = ng / nw;
193 
194         for (int k = 0; k < ng; ++k)
195         {
196             std::vector<cl_int> data_type_sizes;
197             // Map to array indexed to array indexed by local ID and sub group
198             for (int j = 0; j < nw; ++j)
199             {
200                 mx[j] = x[j];
201                 my[j] = y[j];
202                 data_type_sizes.push_back(m[4 * j + 3]);
203             }
204 
205             for (cl_int dts : data_type_sizes)
206             {
207                 if (dts != sizeof(Ty))
208                 {
209                     log_error("ERROR: sub_group_clustered_reduce_%s(%s) "
210                               "wrong data type size detected, expected: %d, "
211                               "used by device %d, in group %d\n",
212                               operation_names(operation),
213                               TypeManager<Ty>::name(), sizeof(Ty), dts, k);
214                     return TEST_FAIL;
215                 }
216             }
217 
218             for (int j = 0; j < nj; ++j)
219             {
220                 int ii = j * ns;
221                 int n = ii + ns > nw ? nw - ii : ns;
222                 int midx = 4 * ii + 2;
223                 std::vector<Ty> clusters_results;
224                 int clusters_counter = ns / CLUSTER_SIZE;
225                 clusters_results.resize(clusters_counter);
226 
227                 // Compute target
228                 Ty tr = mx[ii];
229                 for (int i = 0; i < n; ++i)
230                 {
231                     if (i % CLUSTER_SIZE == 0)
232                         tr = mx[ii + i];
233                     else
234                         tr = calculate<Ty>(tr, mx[ii + i], operation);
235                     clusters_results[i / CLUSTER_SIZE] = tr;
236                 }
237 
238                 // Check result
239                 for (int i = 0; i < n; ++i)
240                 {
241                     Ty rr = my[ii + i];
242                     tr = clusters_results[i / CLUSTER_SIZE];
243                     if (!compare(rr, tr))
244                     {
245                         log_error(
246                             "ERROR: sub_group_clustered_reduce_%s(%s) mismatch "
247                             "for local id %d in sub group %d in group %d\n",
248                             operation_names(operation), TypeManager<Ty>::name(),
249                             i, j, k);
250                         return TEST_FAIL;
251                     }
252                 }
253             }
254 
255             x += nw;
256             y += nw;
257             m += 4 * nw;
258         }
259         log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n",
260                  operation_names(operation), TypeManager<Ty>::name(),
261                  sizeof(Ty));
262         return TEST_PASS;
263     }
264 };
265 
266 template <typename T>
run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)267 int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
268 {
269     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
270         "test_redadd_clustered", redadd_clustered_source);
271     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
272         "test_redmax_clustered", redmax_clustered_source);
273     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
274         "test_redmin_clustered", redmin_clustered_source);
275     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
276         "test_redmul_clustered", redmul_clustered_source);
277     return error;
278 }
run_cluster_and_or_xor_for_type(RunTestForType rft)279 template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
280 {
281     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
282         "test_redand_clustered", redand_clustered_source);
283     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
284         "test_redor_clustered", redor_clustered_source);
285     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
286         "test_redxor_clustered", redxor_clustered_source);
287     return error;
288 }
289 template <typename T>
run_cluster_logical_and_or_xor_for_type(RunTestForType rft)290 int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
291 {
292     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
293         "test_redand_clustered_logical", redand_clustered_logical_source);
294     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
295         "test_redor_clustered_logical", redor_clustered_logical_source);
296     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
297         "test_redxor_clustered_logical", redxor_clustered_logical_source);
298 
299     return error;
300 }
301 }
302 
test_subgroup_functions_clustered_reduce(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)303 int test_subgroup_functions_clustered_reduce(cl_device_id device,
304                                              cl_context context,
305                                              cl_command_queue queue,
306                                              int num_elements)
307 {
308     std::vector<std::string> required_extensions = {
309         "cl_khr_subgroup_clustered_reduce"
310     };
311     constexpr size_t global_work_size = 2000;
312     constexpr size_t local_work_size = 200;
313     WorkGroupParams test_params(global_work_size, local_work_size,
314                                 required_extensions);
315     RunTestForType rft(device, context, queue, num_elements, test_params);
316 
317     int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
318     error |= run_cluster_red_add_max_min_mul_for_type<cl_uint>(rft);
319     error |= run_cluster_red_add_max_min_mul_for_type<cl_long>(rft);
320     error |= run_cluster_red_add_max_min_mul_for_type<cl_ulong>(rft);
321     error |= run_cluster_red_add_max_min_mul_for_type<cl_short>(rft);
322     error |= run_cluster_red_add_max_min_mul_for_type<cl_ushort>(rft);
323     error |= run_cluster_red_add_max_min_mul_for_type<cl_char>(rft);
324     error |= run_cluster_red_add_max_min_mul_for_type<cl_uchar>(rft);
325     error |= run_cluster_red_add_max_min_mul_for_type<cl_float>(rft);
326     error |= run_cluster_red_add_max_min_mul_for_type<cl_double>(rft);
327     error |= run_cluster_red_add_max_min_mul_for_type<subgroups::cl_half>(rft);
328 
329     error |= run_cluster_and_or_xor_for_type<cl_int>(rft);
330     error |= run_cluster_and_or_xor_for_type<cl_uint>(rft);
331     error |= run_cluster_and_or_xor_for_type<cl_long>(rft);
332     error |= run_cluster_and_or_xor_for_type<cl_ulong>(rft);
333     error |= run_cluster_and_or_xor_for_type<cl_short>(rft);
334     error |= run_cluster_and_or_xor_for_type<cl_ushort>(rft);
335     error |= run_cluster_and_or_xor_for_type<cl_char>(rft);
336     error |= run_cluster_and_or_xor_for_type<cl_uchar>(rft);
337 
338     error |= run_cluster_logical_and_or_xor_for_type<cl_int>(rft);
339     return error;
340 }
341