1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19
20 struct get_test_data
21 {
22 cl_uint subGroupSize;
23 cl_uint maxSubGroupSize;
24 cl_uint numSubGroups;
25 cl_uint enqNumSubGroups;
26 cl_uint subGroupId;
27 cl_uint subGroupLocalId;
operator ==get_test_data28 bool operator==(get_test_data x)
29 {
30 return subGroupSize == x.subGroupSize
31 && maxSubGroupSize == x.maxSubGroupSize
32 && numSubGroups == x.numSubGroups && subGroupId == x.subGroupId
33 && subGroupLocalId == x.subGroupLocalId;
34 }
35 };
36
check_group(const get_test_data * result,int nw,cl_uint ensg,int maxwgs)37 static int check_group(const get_test_data *result, int nw, cl_uint ensg,
38 int maxwgs)
39 {
40 int first = -1;
41 int last = -1;
42 int i, j;
43 cl_uint hit[32];
44
45 for (i = 0; i < nw; ++i)
46 {
47 if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
48 first = i;
49 if (result[i].subGroupId == result[0].numSubGroups - 1
50 && result[i].subGroupLocalId == 0)
51 last = i;
52 if (first != -1 && last != -1) break;
53 }
54
55 if (first == -1 || last == -1)
56 {
57 log_error("ERROR: expected sub group id's are missing\n");
58 return -1;
59 }
60
61 // Check them
62 if (result[first].subGroupSize == 0)
63 {
64 log_error("ERROR: get_sub_group_size() returned 0\n");
65 return -1;
66 }
67 if (result[first].maxSubGroupSize == 0
68 || result[first].maxSubGroupSize > maxwgs)
69 {
70 log_error(
71 "ERROR: get_max_subgroup_size() returned incorrect result: %u\n",
72 result[first].maxSubGroupSize);
73 return -1;
74 }
75 if (result[first].subGroupSize > result[first].maxSubGroupSize)
76 {
77 log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
78 return -1;
79 }
80 if (result[last].subGroupSize > result[first].subGroupSize)
81 {
82 log_error("ERROR: last sub group larger than first sub group\n");
83 return -1;
84 }
85 if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg)
86 {
87 log_error(
88 "ERROR: get_num_sub_groups() returned incorrect result: %u \n",
89 result[first].numSubGroups);
90 return -1;
91 }
92
93 memset(hit, 0, sizeof(hit));
94 for (i = 0; i < nw; ++i)
95 {
96 if (result[i].maxSubGroupSize != result[first].maxSubGroupSize
97 || result[i].numSubGroups != result[first].numSubGroups)
98 {
99 log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
100 return -1;
101 }
102 if (result[i].subGroupId >= result[first].numSubGroups)
103 {
104 log_error(
105 "ERROR: get_sub_group_id() returned out of range value: %u\n",
106 result[i].subGroupId);
107 return -1;
108 }
109 if (result[i].enqNumSubGroups != ensg)
110 {
111 log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect "
112 "value: %u\n",
113 result[i].enqNumSubGroups);
114 return -1;
115 }
116 if (result[first].numSubGroups > 1)
117 {
118 if (result[i].subGroupId < result[first].numSubGroups - 1)
119 {
120 if (result[i].subGroupSize != result[first].subGroupSize)
121 {
122 log_error(
123 "ERROR: unexpected variation in get_*_sub_group_*()\n");
124 return -1;
125 }
126 if (result[i].subGroupLocalId >= result[first].subGroupSize)
127 {
128 log_error("ERROR: get_sub_group_local_id() returned out of "
129 "bounds value: %u \n",
130 result[i].subGroupLocalId);
131 return -1;
132 }
133 }
134 else
135 {
136 if (result[i].subGroupSize != result[last].subGroupSize)
137 {
138 log_error(
139 "ERROR: unexpected variation in get_*_sub_group_*()\n");
140 return -1;
141 }
142 if (result[i].subGroupLocalId >= result[last].subGroupSize)
143 {
144 log_error("ERROR: get_sub_group_local_id() returned out of "
145 "bounds value: %u \n",
146 result[i].subGroupLocalId);
147 return -1;
148 }
149 }
150 }
151 else
152 {
153 if (result[i].subGroupSize != result[first].subGroupSize)
154 {
155 log_error(
156 "ERROR: unexpected variation in get_*_sub_group_*()\n");
157 return -1;
158 }
159 if (result[i].subGroupLocalId >= result[first].subGroupSize)
160 {
161 log_error("ERROR: get_sub_group_local_id() returned out of "
162 "bounds value: %u \n",
163 result[i].subGroupLocalId);
164 return -1;
165 }
166 }
167
168 j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId
169 + (result[i].subGroupLocalId >> 5);
170 if (j < sizeof(hit) / 4)
171 {
172 cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
173 if ((hit[j] & b) != 0)
174 {
175 log_error("ERROR: get_sub_group_local_id() repeated a result "
176 "in the same sub group\n");
177 return -1;
178 }
179 hit[j] |= b;
180 }
181 }
182
183 return 0;
184 }
185
test_work_item_functions(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)186 int test_work_item_functions(cl_device_id device, cl_context context,
187 cl_command_queue queue, int num_elements,
188 bool useCoreSubgroups)
189 {
190 static const size_t lsize = 200;
191 int error;
192 int i, j, k, q, r, nw;
193 int maxwgs;
194 cl_uint ensg;
195 size_t global;
196 size_t local;
197 get_test_data result[lsize * 6];
198 clProgramWrapper program;
199 clKernelWrapper kernel;
200 clMemWrapper out;
201 std::stringstream kernel_sstr;
202 if (useCoreSubgroups)
203 {
204 kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
205 }
206 kernel_sstr
207 << "\n"
208 "\n"
209 "typedef struct {\n"
210 " uint subGroupSize;\n"
211 " uint maxSubGroupSize;\n"
212 " uint numSubGroups;\n"
213 " uint enqNumSubGroups;\n"
214 " uint subGroupId;\n"
215 " uint subGroupLocalId;\n"
216 "} get_test_data;\n"
217 "\n"
218 "__kernel void get_test( __global get_test_data *outData )\n"
219 "{\n"
220 " int gid = get_global_id( 0 );\n"
221 " outData[gid].subGroupSize = get_sub_group_size();\n"
222 " outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
223 " outData[gid].numSubGroups = get_num_sub_groups();\n"
224 " outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
225 " outData[gid].subGroupId = get_sub_group_id();\n"
226 " outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
227 "}";
228 const std::string &kernel_str = kernel_sstr.str();
229 const char *kernel_src = kernel_str.c_str();
230 error = create_single_kernel_helper_with_build_options(
231 context, &program, &kernel, 1, &kernel_src, "get_test",
232 "-cl-std=CL2.0");
233 if (error != 0) return error;
234
235 error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
236 if (error != 0) return error;
237
238 maxwgs = (int)local;
239
240 // Limit it a bit so we have muliple work groups
241 // Ideally this will still be large enough to give us multiple subgroups
242 if (local > lsize) local = lsize;
243
244 // Create our buffer
245 out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL,
246 &error);
247 test_error(error, "clCreateBuffer failed");
248
249 // Set argument
250 error = clSetKernelArg(kernel, 0, sizeof(out), &out);
251 test_error(error, "clSetKernelArg failed");
252
253 global = local * 5;
254
255 // Make sure we have a flexible range
256 global += 3 * local / 4;
257
258 // Collect the data
259 memset((void *)&result, 0xf0, sizeof(result));
260
261 error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result),
262 (void *)&result, 0, NULL, NULL);
263 test_error(error, "clEnqueueWriteBuffer failed");
264
265 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
266 NULL, NULL);
267 test_error(error, "clEnqueueNDRangeKernel failed");
268
269 error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
270 (void *)&result, 0, NULL, NULL);
271 test_error(error, "clEnqueueReadBuffer failed");
272
273 error = clFinish(queue);
274 test_error(error, "clFinish failed");
275
276 nw = (int)local;
277 ensg = result[0].enqNumSubGroups;
278
279 // Check the first group
280 error = check_group(result, nw, ensg, maxwgs);
281 if (error) return error;
282
283 q = (int)global / nw;
284 r = (int)global % nw;
285
286 // Check the remaining work groups including the last if it is the same size
287 for (k = 1; k < q; ++k)
288 {
289 for (j = 0; j < nw; ++j)
290 {
291 i = k * nw + j;
292 if (!(result[i] == result[i - nw]))
293 {
294 log_error("ERROR: sub group mapping is not identical for all "
295 "work groups\n");
296 return -1;
297 }
298 }
299 }
300
301 // Check the last group if it wasn't the same size
302 if (r != 0)
303 {
304 error = check_group(result + q * nw, r, ensg, maxwgs);
305 if (error) return error;
306 }
307
308 return 0;
309 }
310
test_work_item_functions_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)311 int test_work_item_functions_core(cl_device_id device, cl_context context,
312 cl_command_queue queue, int num_elements)
313 {
314 return test_work_item_functions(device, context, queue, num_elements, true);
315 }
316
test_work_item_functions_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)317 int test_work_item_functions_ext(cl_device_id device, cl_context context,
318 cl_command_queue queue, int num_elements)
319 {
320 bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
321
322 if (!hasExtension)
323 {
324 log_info(
325 "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
326 return TEST_SKIPPED_ITSELF;
327 }
328
329 return test_work_item_functions(device, context, queue, num_elements,
330 false);
331 }