1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "harness/conversions.h"
19 #include "harness/typeWrappers.h"
20
21
22 // These need to stay in sync with the kernel source below
23 #define NUM_LOC 49
24 #define INST_LOC_MASK 0x7f
25 #define INST_OP_SHIFT 0
26 #define INST_OP_MASK 0xf
27 #define INST_LOC_SHIFT 4
28 #define INST_VAL_SHIFT 12
29 #define INST_VAL_MASK 0x7ffff
30 #define INST_END 0x0
31 #define INST_STORE 0x1
32 #define INST_WAIT 0x2
33 #define INST_COUNT 0x3
34
35 static const char *ifp_source =
36 "#define NUM_LOC 49\n"
37 "#define INST_LOC_MASK 0x7f\n"
38 "#define INST_OP_SHIFT 0\n"
39 "#define INST_OP_MASK 0xf\n"
40 "#define INST_LOC_SHIFT 4\n"
41 "#define INST_VAL_SHIFT 12\n"
42 "#define INST_VAL_MASK 0x7ffff\n"
43 "#define INST_END 0x0\n"
44 "#define INST_STORE 0x1\n"
45 "#define INST_WAIT 0x2\n"
46 "#define INST_COUNT 0x3\n"
47 "\n"
48 "__kernel void\n"
49 "test_ifp(const __global int *in, __global int4 *xy, __global int *out)\n"
50 "{\n"
51 " __local atomic_int loc[NUM_LOC];\n"
52 "\n"
53 " // Don't run if there is only one sub group\n"
54 " if (get_num_sub_groups() == 1)\n"
55 " return;\n"
56 "\n"
57 " // First initialize loc[]\n"
58 " int lid = (int)get_local_id(0);\n"
59 "\n"
60 " if (lid < NUM_LOC)\n"
61 " atomic_init(loc+lid, 0);\n"
62 "\n"
63 " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
64 "\n"
65 " // Compute pointer to this sub group's \"instructions\"\n"
66 " const __global int *pc = in +\n"
67 " ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
68 " (int)get_sub_group_id()) *\n"
69 " (NUM_LOC+1);\n"
70 "\n"
71 " // Set up to \"run\"\n"
72 " bool ok = (int)get_sub_group_local_id() == 0;\n"
73 " bool run = true;\n"
74 "\n"
75 " while (run) {\n"
76 " int inst = *pc++;\n"
77 " int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
78 " int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
79 " int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
80 "\n"
81 " switch (iop) {\n"
82 " case INST_STORE:\n"
83 " if (ok)\n"
84 " atomic_store(loc+iloc, ival);\n"
85 " break;\n"
86 " case INST_WAIT:\n"
87 " if (ok) {\n"
88 " while (atomic_load(loc+iloc) != ival)\n"
89 " ;\n"
90 " }\n"
91 " break;\n"
92 " case INST_COUNT:\n"
93 " if (ok) {\n"
94 " int i;\n"
95 " for (i=0;i<ival;++i)\n"
96 " atomic_fetch_add(loc+iloc, 1);\n"
97 " }\n"
98 " break;\n"
99 " case INST_END:\n"
100 " run = false;\n"
101 " break;\n"
102 " }\n"
103 "\n"
104 " sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
105 " }\n"
106 "\n"
107 " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
108 "\n"
109 " // Save this group's result\n"
110 " __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
111 " if (lid < NUM_LOC)\n"
112 " op[lid] = atomic_load(loc+lid);\n"
113 "}\n";
114
115
116 // Independent forward progress stuff
117 // Note:
118 // Output needs num_groups * NUM_LOC elements
119 // local_size must be > NUM_LOC
120 // Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
121
inst(int op,int loc,int val)122 static inline int inst(int op, int loc, int val)
123 {
124 return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
125 | (op << INST_OP_SHIFT);
126 }
127
gen_insts(cl_int * x,cl_int * p,int n)128 void gen_insts(cl_int *x, cl_int *p, int n)
129 {
130 int i, j0, j1;
131 int val;
132 int ii[NUM_LOC];
133
134 // Create a random permutation of 0...NUM_LOC-1
135 ii[0] = 0;
136 for (i = 1; i < NUM_LOC; ++i)
137 {
138 j0 = random_in_range(0, i, gMTdata);
139 if (j0 != i) ii[i] = ii[j0];
140 ii[j0] = i;
141 }
142
143 // Initialize "instruction pointers"
144 memset(p, 0, n * 4);
145
146 for (i = 0; i < NUM_LOC; ++i)
147 {
148 // Randomly choose 2 different sub groups
149 // One does a random amount of work, and the other waits for it
150 j0 = random_in_range(0, n - 1, gMTdata);
151
152 do
153 {
154 j1 = random_in_range(0, n - 1, gMTdata);
155 } while (j1 == j0);
156
157 // Randomly choose a wait value and assign "instructions"
158 val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
159 x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
160 x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
161 ++p[j0];
162 ++p[j1];
163 }
164
165 // Last "inst" for each sub group is END
166 for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
167 }
168
169 // Execute one group's "instructions"
run_insts(cl_int * x,cl_int * p,int n)170 void run_insts(cl_int *x, cl_int *p, int n)
171 {
172 int i, nend;
173 bool scont;
174 cl_int loc[NUM_LOC];
175
176 // Initialize result and "instruction pointers"
177 memset(loc, 0, sizeof(loc));
178 memset(p, 0, 4 * n);
179
180 // Repetitively loop over subgroups with each executing "instructions" until
181 // blocked The loop terminates when all subgroups have hit the "END
182 // instruction"
183 do
184 {
185 nend = 0;
186 for (i = 0; i < n; ++i)
187 {
188 do
189 {
190 cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
191 cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
192 cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
193 cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
194 scont = false;
195
196 switch (iop)
197 {
198 case INST_STORE:
199 loc[iloc] = ival;
200 ++p[i];
201 scont = true;
202 break;
203 case INST_WAIT:
204 if (loc[iloc] == ival)
205 {
206 ++p[i];
207 scont = true;
208 }
209 break;
210 case INST_COUNT:
211 loc[iloc] += ival;
212 ++p[i];
213 scont = true;
214 break;
215 case INST_END: ++nend; break;
216 }
217 } while (scont);
218 }
219 } while (nend < n);
220
221 // Return result, reusing "p"
222 memcpy(p, loc, sizeof(loc));
223 }
224
225
226 struct IFP
227 {
genIFP228 static void gen(cl_int *x, cl_int *t, cl_int *,
229 const WorkGroupParams &test_params)
230 {
231 int k;
232 int nw = test_params.local_workgroup_size;
233 int ns = test_params.subgroup_size;
234 int ng = test_params.global_workgroup_size;
235 int nj = (nw + ns - 1) / ns;
236 ng = ng / nw;
237
238 // We need at least 2 sub groups per group for this test
239 if (nj == 1) return;
240
241 for (k = 0; k < ng; ++k)
242 {
243 gen_insts(x, t, nj);
244 x += nj * (NUM_LOC + 1);
245 }
246 }
247
chkIFP248 static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
249 const WorkGroupParams &test_params)
250 {
251 int i, k;
252 int nw = test_params.local_workgroup_size;
253 int ns = test_params.subgroup_size;
254 int ng = test_params.global_workgroup_size;
255 int nj = (nw + ns - 1) / ns;
256 ng = ng / nw;
257
258 // We need at least 2 sub groups per group for this tes
259 if (nj == 1) return 0;
260
261 log_info(" independent forward progress...\n");
262
263 for (k = 0; k < ng; ++k)
264 {
265 run_insts(x, t, nj);
266 for (i = 0; i < NUM_LOC; ++i)
267 {
268 if (t[i] != y[i])
269 {
270 log_error(
271 "ERROR: mismatch at element %d in work group %d\n", i,
272 k);
273 return -1;
274 }
275 }
276 x += nj * (NUM_LOC + 1);
277 y += NUM_LOC;
278 }
279
280 return 0;
281 }
282 };
283
test_ifp(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)284 int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
285 int num_elements, bool useCoreSubgroups)
286 {
287 int error = TEST_PASS;
288
289 // Global/local work group sizes
290 // Adjust these individually below if desired/needed
291 constexpr size_t global_work_size = 2000;
292 constexpr size_t local_work_size = 200;
293 WorkGroupParams test_params(global_work_size, local_work_size);
294 test_params.use_core_subgroups = useCoreSubgroups;
295 test_params.dynsc = NUM_LOC + 1;
296 error = test<cl_int, IFP>::run(device, context, queue, num_elements,
297 "test_ifp", ifp_source, test_params);
298 return error;
299 }
300
checkIFPSupport(cl_device_id device,bool & ifpSupport)301 static test_status checkIFPSupport(cl_device_id device, bool &ifpSupport)
302 {
303 cl_uint ifp_supported;
304 cl_uint error;
305 error = clGetDeviceInfo(device,
306 CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
307 sizeof(ifp_supported), &ifp_supported, NULL);
308 if (error != CL_SUCCESS)
309 {
310 print_error(
311 error,
312 "Unable to get CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS "
313 "capability");
314 return TEST_FAIL;
315 }
316 // skip testing ifp
317 if (ifp_supported != 1)
318 {
319 log_info("INDEPENDENT FORWARD PROGRESS not supported...\n");
320 ifpSupport = false;
321 }
322 else
323 {
324 log_info("INDEPENDENT FORWARD PROGRESS supported...\n");
325 ifpSupport = true;
326 }
327 return TEST_PASS;
328 }
329
test_ifp_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)330 int test_ifp_core(cl_device_id device, cl_context context,
331 cl_command_queue queue, int num_elements)
332 {
333 bool ifpSupport = true;
334 test_status error;
335 error = checkIFPSupport(device, ifpSupport);
336 if (error != TEST_PASS)
337 {
338 return error;
339 }
340 if (ifpSupport == false)
341 {
342 log_info("Independed forward progress skipped.\n");
343 return TEST_SKIPPED_ITSELF;
344 }
345
346 return test_ifp(device, context, queue, num_elements, true);
347 }
348
test_ifp_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)349 int test_ifp_ext(cl_device_id device, cl_context context,
350 cl_command_queue queue, int num_elements)
351 {
352 bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
353 bool ifpSupport = true;
354
355 if (!hasExtension)
356 {
357 log_info(
358 "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
359 return TEST_SKIPPED_ITSELF;
360 }
361 // ifp only in subgroup functions tests:
362 test_status error;
363 error = checkIFPSupport(device, ifpSupport);
364 if (error != TEST_PASS)
365 {
366 return error;
367 }
368 if (ifpSupport == false)
369 {
370 log_info(
371 "Error reason: the extension cl_khr_subgroups requires that "
372 "Independed forward progress has to be supported by device.\n");
373 return TEST_FAIL;
374 }
375 return test_ifp(device, context, queue, num_elements, false);
376 }