1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19
20 struct get_test_data
21 {
22 cl_uint subGroupSize;
23 cl_uint maxSubGroupSize;
24 cl_uint numSubGroups;
25 cl_uint enqNumSubGroups;
26 cl_uint subGroupId;
27 cl_uint subGroupLocalId;
operator ==get_test_data28 bool operator==(get_test_data x)
29 {
30 return subGroupSize == x.subGroupSize
31 && maxSubGroupSize == x.maxSubGroupSize
32 && numSubGroups == x.numSubGroups && subGroupId == x.subGroupId
33 && subGroupLocalId == x.subGroupLocalId;
34 }
35 };
36
check_group(const get_test_data * result,int nw,cl_uint ensg,int maxwgs)37 static int check_group(const get_test_data *result, int nw, cl_uint ensg,
38 int maxwgs)
39 {
40 int first = -1;
41 int last = -1;
42 int i, j;
43 cl_uint hit[32];
44
45 for (i = 0; i < nw; ++i)
46 {
47 if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
48 first = i;
49 if (result[i].subGroupId == result[0].numSubGroups - 1
50 && result[i].subGroupLocalId == 0)
51 last = i;
52 if (first != -1 && last != -1) break;
53 }
54
55 if (first == -1 || last == -1)
56 {
57 log_error("ERROR: expected sub group id's are missing\n");
58 return -1;
59 }
60
61 // Check them
62 if (result[first].subGroupSize == 0)
63 {
64 log_error("ERROR: get_sub_group_size() returned 0\n");
65 return -1;
66 }
67 if (result[first].maxSubGroupSize == 0
68 || result[first].maxSubGroupSize > maxwgs)
69 {
70 log_error(
71 "ERROR: get_max_subgroup_size() returned incorrect result: %u\n",
72 result[first].maxSubGroupSize);
73 return -1;
74 }
75 if (result[first].subGroupSize > result[first].maxSubGroupSize)
76 {
77 log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
78 return -1;
79 }
80 if (result[last].subGroupSize > result[first].subGroupSize)
81 {
82 log_error("ERROR: last sub group larger than first sub group\n");
83 return -1;
84 }
85 if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg)
86 {
87 log_error(
88 "ERROR: get_num_sub_groups() returned incorrect result: %u \n",
89 result[first].numSubGroups);
90 return -1;
91 }
92
93 memset(hit, 0, sizeof(hit));
94 for (i = 0; i < nw; ++i)
95 {
96 if (result[i].maxSubGroupSize != result[first].maxSubGroupSize
97 || result[i].numSubGroups != result[first].numSubGroups)
98 {
99 log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
100 return -1;
101 }
102 if (result[i].subGroupId >= result[first].numSubGroups)
103 {
104 log_error(
105 "ERROR: get_sub_group_id() returned out of range value: %u\n",
106 result[i].subGroupId);
107 return -1;
108 }
109 if (result[i].enqNumSubGroups != ensg)
110 {
111 log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect "
112 "value: %u\n",
113 result[i].enqNumSubGroups);
114 return -1;
115 }
116 if (result[first].numSubGroups > 1)
117 {
118 if (result[i].subGroupId < result[first].numSubGroups - 1)
119 {
120 if (result[i].subGroupSize != result[first].subGroupSize)
121 {
122 log_error(
123 "ERROR: unexpected variation in get_*_sub_group_*()\n");
124 return -1;
125 }
126 if (result[i].subGroupLocalId >= result[first].subGroupSize)
127 {
128 log_error("ERROR: get_sub_group_local_id() returned out of "
129 "bounds value: %u \n",
130 result[i].subGroupLocalId);
131 return -1;
132 }
133 }
134 else
135 {
136 if (result[i].subGroupSize != result[last].subGroupSize)
137 {
138 log_error(
139 "ERROR: unexpected variation in get_*_sub_group_*()\n");
140 return -1;
141 }
142 if (result[i].subGroupLocalId >= result[last].subGroupSize)
143 {
144 log_error("ERROR: get_sub_group_local_id() returned out of "
145 "bounds value: %u \n",
146 result[i].subGroupLocalId);
147 return -1;
148 }
149 }
150 }
151 else
152 {
153 if (result[i].subGroupSize != result[first].subGroupSize)
154 {
155 log_error(
156 "ERROR: unexpected variation in get_*_sub_group_*()\n");
157 return -1;
158 }
159 if (result[i].subGroupLocalId >= result[first].subGroupSize)
160 {
161 log_error("ERROR: get_sub_group_local_id() returned out of "
162 "bounds value: %u \n",
163 result[i].subGroupLocalId);
164 return -1;
165 }
166 }
167
168 j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId
169 + (result[i].subGroupLocalId >> 5);
170 if (j < sizeof(hit) / 4)
171 {
172 cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
173 if ((hit[j] & b) != 0)
174 {
175 log_error("ERROR: get_sub_group_local_id() repeated a result "
176 "in the same sub group\n");
177 return -1;
178 }
179 hit[j] |= b;
180 }
181 }
182
183 return 0;
184 }
185
test_work_item_functions(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)186 int test_work_item_functions(cl_device_id device, cl_context context,
187 cl_command_queue queue, int num_elements,
188 bool useCoreSubgroups)
189 {
190 static const size_t lsize = 200;
191 int error;
192 int i, j, k, q, r, nw;
193 int maxwgs;
194 cl_uint ensg;
195 size_t global;
196 size_t local;
197 get_test_data result[lsize * 6];
198 clProgramWrapper program;
199 clKernelWrapper kernel;
200 clMemWrapper out;
201 std::stringstream kernel_sstr;
202 if (useCoreSubgroups)
203 {
204 kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
205 }
206 kernel_sstr
207 << "\n"
208 "\n"
209 "typedef struct {\n"
210 " uint subGroupSize;\n"
211 " uint maxSubGroupSize;\n"
212 " uint numSubGroups;\n"
213 " uint enqNumSubGroups;\n"
214 " uint subGroupId;\n"
215 " uint subGroupLocalId;\n"
216 "} get_test_data;\n"
217 "\n"
218 "__kernel void get_test( __global get_test_data *outData )\n"
219 "{\n"
220 " int gid = get_global_id( 0 );\n"
221 " outData[gid].subGroupSize = get_sub_group_size();\n"
222 " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
223 " outData[gid].numSubGroups = get_num_sub_groups();\n"
224 " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
225 " outData[gid].subGroupId = get_sub_group_id();\n"
226 " outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
227 "}";
228 const std::string &kernel_str = kernel_sstr.str();
229 const char *kernel_src = kernel_str.c_str();
230 error = create_single_kernel_helper(context, &program, &kernel, 1,
231 &kernel_src, "get_test");
232 if (error != 0) return error;
233
234 error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
235 if (error != 0) return error;
236
237 maxwgs = (int)local;
238
239 // Limit it a bit so we have muliple work groups
240 // Ideally this will still be large enough to give us multiple subgroups
241 if (local > lsize) local = lsize;
242
243 // Create our buffer
244 out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL,
245 &error);
246 test_error(error, "clCreateBuffer failed");
247
248 // Set argument
249 error = clSetKernelArg(kernel, 0, sizeof(out), &out);
250 test_error(error, "clSetKernelArg failed");
251
252 global = local * 5;
253
254 // Make sure we have a flexible range
255 global += 3 * local / 4;
256
257 // Collect the data
258 memset((void *)&result, 0xf0, sizeof(result));
259
260 error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result),
261 (void *)&result, 0, NULL, NULL);
262 test_error(error, "clEnqueueWriteBuffer failed");
263
264 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
265 NULL, NULL);
266 test_error(error, "clEnqueueNDRangeKernel failed");
267
268 error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
269 (void *)&result, 0, NULL, NULL);
270 test_error(error, "clEnqueueReadBuffer failed");
271
272 error = clFinish(queue);
273 test_error(error, "clFinish failed");
274
275 nw = (int)local;
276 ensg = result[0].enqNumSubGroups;
277
278 // Check the first group
279 error = check_group(result, nw, ensg, maxwgs);
280 if (error) return error;
281
282 q = (int)global / nw;
283 r = (int)global % nw;
284
285 // Check the remaining work groups including the last if it is the same size
286 for (k = 1; k < q; ++k)
287 {
288 for (j = 0; j < nw; ++j)
289 {
290 i = k * nw + j;
291 if (!(result[i] == result[i - nw]))
292 {
293 log_error("ERROR: sub group mapping is not identical for all "
294 "work groups\n");
295 return -1;
296 }
297 }
298 }
299
300 // Check the last group if it wasn't the same size
301 if (r != 0)
302 {
303 error = check_group(result + q * nw, r, ensg, maxwgs);
304 if (error) return error;
305 }
306
307 return 0;
308 }
309
test_work_item_functions_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)310 int test_work_item_functions_core(cl_device_id device, cl_context context,
311 cl_command_queue queue, int num_elements)
312 {
313 return test_work_item_functions(device, context, queue, num_elements, true);
314 }
315
test_work_item_functions_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)316 int test_work_item_functions_ext(cl_device_id device, cl_context context,
317 cl_command_queue queue, int num_elements)
318 {
319 bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
320
321 if (!hasExtension)
322 {
323 log_info(
324 "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
325 return TEST_SKIPPED_ITSELF;
326 }
327
328 return test_work_item_functions(device, context, queue, num_elements,
329 false);
330 }