1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "harness/conversions.h"
19 #include "harness/typeWrappers.h"
20
21
22 // These need to stay in sync with the kernel source below
23 #define NUM_LOC 49
24 #define INST_LOC_MASK 0x7f
25 #define INST_OP_SHIFT 0
26 #define INST_OP_MASK 0xf
27 #define INST_LOC_SHIFT 4
28 #define INST_VAL_SHIFT 12
29 #define INST_VAL_MASK 0x7ffff
30 #define INST_END 0x0
31 #define INST_STORE 0x1
32 #define INST_WAIT 0x2
33 #define INST_COUNT 0x3
34
35 static const char *ifp_source =
36 "#define NUM_LOC 49\n"
37 "#define INST_LOC_MASK 0x7f\n"
38 "#define INST_OP_SHIFT 0\n"
39 "#define INST_OP_MASK 0xf\n"
40 "#define INST_LOC_SHIFT 4\n"
41 "#define INST_VAL_SHIFT 12\n"
42 "#define INST_VAL_MASK 0x7ffff\n"
43 "#define INST_END 0x0\n"
44 "#define INST_STORE 0x1\n"
45 "#define INST_WAIT 0x2\n"
46 "#define INST_COUNT 0x3\n"
47 "\n"
48 "__kernel void\n"
49 "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
50 "{\n"
51 " __local atomic_int loc[NUM_LOC];\n"
52 "\n"
53 " // Don't run if there is only one sub group\n"
54 " if (get_num_sub_groups() == 1)\n"
55 " return;\n"
56 "\n"
57 " // First initialize loc[]\n"
58 " int lid = (int)get_local_id(0);\n"
59 "\n"
60 " if (lid < NUM_LOC)\n"
61 " atomic_init(loc+lid, 0);\n"
62 "\n"
63 " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
64 "\n"
65 " // Compute pointer to this sub group's \"instructions\"\n"
66 " const __global int *pc = in +\n"
67 " ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
68 " (int)get_sub_group_id()) *\n"
69 " (NUM_LOC+1);\n"
70 "\n"
71 " // Set up to \"run\"\n"
72 " bool ok = (int)get_sub_group_local_id() == 0;\n"
73 " bool run = true;\n"
74 "\n"
75 " while (run) {\n"
76 " int inst = *pc++;\n"
77 " int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
78 " int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
79 " int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
80 "\n"
81 " switch (iop) {\n"
82 " case INST_STORE:\n"
83 " if (ok)\n"
84 " atomic_store(loc+iloc, ival);\n"
85 " break;\n"
86 " case INST_WAIT:\n"
87 " if (ok) {\n"
88 " while (atomic_load(loc+iloc) != ival)\n"
89 " ;\n"
90 " }\n"
91 " break;\n"
92 " case INST_COUNT:\n"
93 " if (ok) {\n"
94 " int i;\n"
95 " for (i=0;i<ival;++i)\n"
96 " atomic_fetch_add(loc+iloc, 1);\n"
97 " }\n"
98 " break;\n"
99 " case INST_END:\n"
100 " run = false;\n"
101 " break;\n"
102 " }\n"
103 "\n"
104 " sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
105 " }\n"
106 "\n"
107 " work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
108 "\n"
109 " // Save this group's result\n"
110 " __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
111 " if (lid < NUM_LOC)\n"
112 " op[lid] = atomic_load(loc+lid);\n"
113 "}\n";
114
115
116 // Independent forward progress stuff
117 // Note:
118 // Output needs num_groups * NUM_LOC elements
119 // local_size must be > NUM_LOC
120 // Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
121
inst(int op,int loc,int val)122 static inline int inst(int op, int loc, int val)
123 {
124 return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
125 | (op << INST_OP_SHIFT);
126 }
127
gen_insts(cl_int * x,cl_int * p,int n)128 void gen_insts(cl_int *x, cl_int *p, int n)
129 {
130 int i, j0, j1;
131 int val;
132 int ii[NUM_LOC];
133
134 // Create a random permutation of 0...NUM_LOC-1
135 ii[0] = 0;
136 for (i = 1; i < NUM_LOC; ++i)
137 {
138 j0 = random_in_range(0, i, gMTdata);
139 if (j0 != i) ii[i] = ii[j0];
140 ii[j0] = i;
141 }
142
143 // Initialize "instruction pointers"
144 memset(p, 0, n * 4);
145
146 for (i = 0; i < NUM_LOC; ++i)
147 {
148 // Randomly choose 2 different sub groups
149 // One does a random amount of work, and the other waits for it
150 j0 = random_in_range(0, n - 1, gMTdata);
151
152 do
153 {
154 j1 = random_in_range(0, n - 1, gMTdata);
155 } while (j1 == j0);
156
157 // Randomly choose a wait value and assign "instructions"
158 val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
159 x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
160 x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
161 ++p[j0];
162 ++p[j1];
163 }
164
165 // Last "inst" for each sub group is END
166 for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
167 }
168
169 // Execute one group's "instructions"
run_insts(cl_int * x,cl_int * p,int n)170 void run_insts(cl_int *x, cl_int *p, int n)
171 {
172 int i, nend;
173 bool scont;
174 cl_int loc[NUM_LOC];
175
176 // Initialize result and "instruction pointers"
177 memset(loc, 0, sizeof(loc));
178 memset(p, 0, 4 * n);
179
180 // Repetitively loop over subgroups with each executing "instructions" until
181 // blocked The loop terminates when all subgroups have hit the "END
182 // instruction"
183 do
184 {
185 nend = 0;
186 for (i = 0; i < n; ++i)
187 {
188 do
189 {
190 cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
191 cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
192 cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
193 cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
194 scont = false;
195
196 switch (iop)
197 {
198 case INST_STORE:
199 loc[iloc] = ival;
200 ++p[i];
201 scont = true;
202 break;
203 case INST_WAIT:
204 if (loc[iloc] == ival)
205 {
206 ++p[i];
207 scont = true;
208 }
209 break;
210 case INST_COUNT:
211 loc[iloc] += ival;
212 ++p[i];
213 scont = true;
214 break;
215 case INST_END: ++nend; break;
216 }
217 } while (scont);
218 }
219 } while (nend < n);
220
221 // Return result, reusing "p"
222 memcpy(p, loc, sizeof(loc));
223 }
224
225
226 struct IFP
227 {
genIFP228 static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
229 {
230 int k;
231 int nj = (nw + ns - 1) / ns;
232
233 // We need at least 2 sub groups per group for this test
234 if (nj == 1) return;
235
236 for (k = 0; k < ng; ++k)
237 {
238 gen_insts(x, t, nj);
239 x += nj * (NUM_LOC + 1);
240 }
241 }
242
chkIFP243 static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
244 int nw, int ng)
245 {
246 int i, k;
247 int nj = (nw + ns - 1) / ns;
248
249 // We need at least 2 sub groups per group for this tes
250 if (nj == 1) return 0;
251
252 log_info(" independent forward progress...\n");
253
254 for (k = 0; k < ng; ++k)
255 {
256 run_insts(x, t, nj);
257 for (i = 0; i < NUM_LOC; ++i)
258 {
259 if (t[i] != y[i])
260 {
261 log_error(
262 "ERROR: mismatch at element %d in work group %d\n", i,
263 k);
264 return -1;
265 }
266 }
267 x += nj * (NUM_LOC + 1);
268 y += NUM_LOC;
269 }
270
271 return 0;
272 }
273 };
274
test_ifp(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)275 int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
276 int num_elements, bool useCoreSubgroups)
277 {
278 int error;
279
280 // Adjust these individually below if desired/needed
281 #define G 2000
282 #define L 200
283 error = test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
284 "test_ifp", ifp_source, NUM_LOC + 1,
285 useCoreSubgroups);
286 return error;
287 }
288
checkIFPSupport(cl_device_id device,bool & ifpSupport)289 static test_status checkIFPSupport(cl_device_id device, bool &ifpSupport)
290 {
291 cl_uint ifp_supported;
292 cl_uint error;
293 error = clGetDeviceInfo(device,
294 CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
295 sizeof(ifp_supported), &ifp_supported, NULL);
296 if (error != CL_SUCCESS)
297 {
298 print_error(
299 error,
300 "Unable to get CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS "
301 "capability");
302 return TEST_FAIL;
303 }
304 // skip testing ifp
305 if (ifp_supported != 1)
306 {
307 log_info("INDEPENDENT FORWARD PROGRESS not supported...\n");
308 ifpSupport = false;
309 }
310 else
311 {
312 log_info("INDEPENDENT FORWARD PROGRESS supported...\n");
313 ifpSupport = true;
314 }
315 return TEST_PASS;
316 }
317
test_ifp_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)318 int test_ifp_core(cl_device_id device, cl_context context,
319 cl_command_queue queue, int num_elements)
320 {
321 bool ifpSupport = true;
322 test_status error;
323 error = checkIFPSupport(device, ifpSupport);
324 if (error != TEST_PASS)
325 {
326 return error;
327 }
328 if (ifpSupport == false)
329 {
330 log_info("Independed forward progress skipped.\n");
331 return TEST_SKIPPED_ITSELF;
332 }
333
334 return test_ifp(device, context, queue, num_elements, true);
335 }
336
test_ifp_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)337 int test_ifp_ext(cl_device_id device, cl_context context,
338 cl_command_queue queue, int num_elements)
339 {
340 bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
341 bool ifpSupport = true;
342
343 if (!hasExtension)
344 {
345 log_info(
346 "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
347 return TEST_SKIPPED_ITSELF;
348 }
349 // ifp only in subgroup functions tests:
350 test_status error;
351 error = checkIFPSupport(device, ifpSupport);
352 if (error != TEST_PASS)
353 {
354 return error;
355 }
356 if (ifpSupport == false)
357 {
358 log_info(
359 "Error reason: the extension cl_khr_subgroups requires that "
360 "Independed forward progress has to be supported by device.\n");
361 return TEST_FAIL;
362 }
363 return test_ifp(device, context, queue, num_elements, false);
364 }