• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "subgroup_common_templates.h"
19 #include "harness/typeWrappers.h"
20 
21 namespace {
22 std::string sub_group_clustered_reduce_source = R"(
23 __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out,
24                       uint cluster_size) {
25         Type r;
26         int gid = get_global_id(0);
27         XY(xy,gid);
28         xy[gid].w = 0;
29         Type v = in[gid];
30         if (sizeof(in[gid]) == sizeof(%s(v, 1))) {
31             xy[gid].w = sizeof(in[gid]);
32         }
33         switch (cluster_size) {
34             case 1: r = %s(v, 1); break;
35             case 2: r = %s(v, 2); break;
36             case 4: r = %s(v, 4); break;
37             case 8: r = %s(v, 8); break;
38             case 16: r = %s(v, 16); break;
39             case 32: r = %s(v, 32); break;
40             case 64: r = %s(v, 64); break;
41             case 128: r = %s(v, 128); break;
42         }
43         out[gid] = r;
44 }
45 )";
46 
47 // DESCRIPTION:
48 // Test for reduce cluster functions
49 template <typename Ty, ArithmeticOp operation> struct RED_CLU
50 {
log_test__anon27c715880111::RED_CLU51     static void log_test(const WorkGroupParams &test_params,
52                          const char *extra_text)
53     {
54         log_info("  sub_group_clustered_reduce_%s(%s, %zu bytes) ...%s\n",
55                  operation_names(operation), TypeManager<Ty>::name(),
56                  sizeof(Ty), extra_text);
57     }
58 
gen__anon27c715880111::RED_CLU59     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
60     {
61         int nw = test_params.local_workgroup_size;
62         int ns = test_params.subgroup_size;
63         int ng = test_params.global_workgroup_size;
64         ng = ng / nw;
65         generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
66     }
67 
chk__anon27c715880111::RED_CLU68     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
69                            const WorkGroupParams &test_params)
70     {
71         int nw = test_params.local_workgroup_size;
72         int ns = test_params.subgroup_size;
73         int ng = test_params.global_workgroup_size;
74         int nj = (nw + ns - 1) / ns;
75         ng = ng / nw;
76 
77         for (int k = 0; k < ng; ++k)
78         {
79             std::vector<cl_int> data_type_sizes;
80             // Map to array indexed to array indexed by local ID and sub group
81             for (int j = 0; j < nw; ++j)
82             {
83                 mx[j] = x[j];
84                 my[j] = y[j];
85                 data_type_sizes.push_back(m[4 * j + 3]);
86             }
87 
88             for (cl_int dts : data_type_sizes)
89             {
90                 if (dts != sizeof(Ty))
91                 {
92                     log_error("ERROR: sub_group_clustered_reduce_%s(%s) "
93                               "wrong data type size detected, expected: %zu, "
94                               "used by device %d, in group %d\n",
95                               operation_names(operation),
96                               TypeManager<Ty>::name(), sizeof(Ty), dts, k);
97                     return TEST_FAIL;
98                 }
99             }
100 
101             for (int j = 0; j < nj; ++j)
102             {
103                 int ii = j * ns;
104                 int n = ii + ns > nw ? nw - ii : ns;
105                 std::vector<Ty> clusters_results;
106                 int clusters_counter = ns / test_params.cluster_size;
107                 clusters_results.resize(clusters_counter);
108 
109                 // Compute target
110                 Ty tr = mx[ii];
111                 for (int i = 0; i < n; ++i)
112                 {
113                     if (i % test_params.cluster_size == 0)
114                         tr = mx[ii + i];
115                     else
116                         tr = calculate<Ty>(tr, mx[ii + i], operation);
117                     clusters_results[i / test_params.cluster_size] = tr;
118                 }
119 
120                 // Check result
121                 for (int i = 0; i < n; ++i)
122                 {
123                     Ty rr = my[ii + i];
124                     tr = clusters_results[i / test_params.cluster_size];
125                     if (!compare(rr, tr))
126                     {
127                         log_error(
128                             "ERROR: sub_group_clustered_reduce_%s(%s, %u) "
129                             "mismatch for local id %d in sub group %d in group "
130                             "%d\n",
131                             operation_names(operation), TypeManager<Ty>::name(),
132                             test_params.cluster_size, i, j, k);
133                         return TEST_FAIL;
134                     }
135                 }
136             }
137 
138             x += nw;
139             y += nw;
140             m += 4 * nw;
141         }
142         return TEST_PASS;
143     }
144 };
145 
146 template <typename T>
run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)147 int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
148 {
149     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
150         "sub_group_clustered_reduce_add");
151     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
152         "sub_group_clustered_reduce_max");
153     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
154         "sub_group_clustered_reduce_min");
155     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
156         "sub_group_clustered_reduce_mul");
157     return error;
158 }
run_cluster_and_or_xor_for_type(RunTestForType rft)159 template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
160 {
161     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
162         "sub_group_clustered_reduce_and");
163     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
164         "sub_group_clustered_reduce_or");
165     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
166         "sub_group_clustered_reduce_xor");
167     return error;
168 }
169 template <typename T>
run_cluster_logical_and_or_xor_for_type(RunTestForType rft)170 int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
171 {
172     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
173         "sub_group_clustered_reduce_logical_and");
174     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
175         "sub_group_clustered_reduce_logical_or");
176     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
177         "sub_group_clustered_reduce_logical_xor");
178 
179     return error;
180 }
181 }
182 
test_subgroup_functions_clustered_reduce(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)183 int test_subgroup_functions_clustered_reduce(cl_device_id device,
184                                              cl_context context,
185                                              cl_command_queue queue,
186                                              int num_elements)
187 {
188     if (!is_extension_available(device, "cl_khr_subgroup_clustered_reduce"))
189     {
190         log_info("cl_khr_subgroup_clustered_reduce is not supported on this "
191                  "device, skipping test.\n");
192         return TEST_SKIPPED_ITSELF;
193     }
194 
195     constexpr size_t global_work_size = 2000;
196     constexpr size_t local_work_size = 200;
197     WorkGroupParams test_params(global_work_size, local_work_size, -1, 3);
198     test_params.save_kernel_source(sub_group_clustered_reduce_source);
199     RunTestForType rft(device, context, queue, num_elements, test_params);
200 
201     int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
202     error |= run_cluster_red_add_max_min_mul_for_type<cl_uint>(rft);
203     error |= run_cluster_red_add_max_min_mul_for_type<cl_long>(rft);
204     error |= run_cluster_red_add_max_min_mul_for_type<cl_ulong>(rft);
205     error |= run_cluster_red_add_max_min_mul_for_type<cl_short>(rft);
206     error |= run_cluster_red_add_max_min_mul_for_type<cl_ushort>(rft);
207     error |= run_cluster_red_add_max_min_mul_for_type<cl_char>(rft);
208     error |= run_cluster_red_add_max_min_mul_for_type<cl_uchar>(rft);
209     error |= run_cluster_red_add_max_min_mul_for_type<cl_float>(rft);
210     error |= run_cluster_red_add_max_min_mul_for_type<cl_double>(rft);
211     error |= run_cluster_red_add_max_min_mul_for_type<subgroups::cl_half>(rft);
212 
213     error |= run_cluster_and_or_xor_for_type<cl_int>(rft);
214     error |= run_cluster_and_or_xor_for_type<cl_uint>(rft);
215     error |= run_cluster_and_or_xor_for_type<cl_long>(rft);
216     error |= run_cluster_and_or_xor_for_type<cl_ulong>(rft);
217     error |= run_cluster_and_or_xor_for_type<cl_short>(rft);
218     error |= run_cluster_and_or_xor_for_type<cl_ushort>(rft);
219     error |= run_cluster_and_or_xor_for_type<cl_char>(rft);
220     error |= run_cluster_and_or_xor_for_type<cl_uchar>(rft);
221 
222     error |= run_cluster_logical_and_or_xor_for_type<cl_int>(rft);
223     return error;
224 }
225