• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "harness/conversions.h"
19 #include "harness/typeWrappers.h"
20 
21 
22 // These need to stay in sync with the kernel source below
23 #define NUM_LOC 49
24 #define INST_LOC_MASK 0x7f
25 #define INST_OP_SHIFT 0
26 #define INST_OP_MASK 0xf
27 #define INST_LOC_SHIFT 4
28 #define INST_VAL_SHIFT 12
29 #define INST_VAL_MASK 0x7ffff
30 #define INST_END 0x0
31 #define INST_STORE 0x1
32 #define INST_WAIT 0x2
33 #define INST_COUNT 0x3
34 
35 static const char *ifp_source =
36     "#define NUM_LOC 49\n"
37     "#define INST_LOC_MASK 0x7f\n"
38     "#define INST_OP_SHIFT 0\n"
39     "#define INST_OP_MASK 0xf\n"
40     "#define INST_LOC_SHIFT 4\n"
41     "#define INST_VAL_SHIFT 12\n"
42     "#define INST_VAL_MASK 0x7ffff\n"
43     "#define INST_END 0x0\n"
44     "#define INST_STORE 0x1\n"
45     "#define INST_WAIT 0x2\n"
46     "#define INST_COUNT 0x3\n"
47     "\n"
48     "__kernel void\n"
49     "test_ifp(const __global int *in, __global int4 *xy, __global int *out)\n"
50     "{\n"
51     "    __local atomic_int loc[NUM_LOC];\n"
52     "\n"
53     "    // Don't run if there is only one sub group\n"
54     "    if (get_num_sub_groups() == 1)\n"
55     "        return;\n"
56     "\n"
57     "    // First initialize loc[]\n"
58     "    int lid = (int)get_local_id(0);\n"
59     "\n"
60     "    if (lid < NUM_LOC)\n"
61     "        atomic_init(loc+lid, 0);\n"
62     "\n"
63     "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
64     "\n"
65     "    // Compute pointer to this sub group's \"instructions\"\n"
66     "    const __global int *pc = in +\n"
67     "        ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
68     "         (int)get_sub_group_id()) *\n"
69     "        (NUM_LOC+1);\n"
70     "\n"
71     "    // Set up to \"run\"\n"
72     "    bool ok = (int)get_sub_group_local_id() == 0;\n"
73     "    bool run = true;\n"
74     "\n"
75     "    while (run) {\n"
76     "        int inst = *pc++;\n"
77     "        int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
78     "        int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
79     "        int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
80     "\n"
81     "        switch (iop) {\n"
82     "        case INST_STORE:\n"
83     "            if (ok)\n"
84     "                atomic_store(loc+iloc, ival);\n"
85     "            break;\n"
86     "        case INST_WAIT:\n"
87     "            if (ok) {\n"
88     "                while (atomic_load(loc+iloc) != ival)\n"
89     "                    ;\n"
90     "            }\n"
91     "            break;\n"
92     "        case INST_COUNT:\n"
93     "            if (ok) {\n"
94     "                int i;\n"
95     "                for (i=0;i<ival;++i)\n"
96     "                    atomic_fetch_add(loc+iloc, 1);\n"
97     "            }\n"
98     "            break;\n"
99     "        case INST_END:\n"
100     "            run = false;\n"
101     "            break;\n"
102     "        }\n"
103     "\n"
104     "        sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
105     "    }\n"
106     "\n"
107     "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
108     "\n"
109     "    // Save this group's result\n"
110     "    __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
111     "    if (lid < NUM_LOC)\n"
112     "        op[lid] = atomic_load(loc+lid);\n"
113     "}\n";
114 
115 
116 // Independent forward progress stuff
117 // Note:
118 //   Output needs num_groups * NUM_LOC elements
119 //   local_size must be > NUM_LOC
120 //   Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
121 
inst(int op,int loc,int val)122 static inline int inst(int op, int loc, int val)
123 {
124     return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
125         | (op << INST_OP_SHIFT);
126 }
127 
gen_insts(cl_int * x,cl_int * p,int n)128 void gen_insts(cl_int *x, cl_int *p, int n)
129 {
130     int i, j0, j1;
131     int val;
132     int ii[NUM_LOC];
133 
134     // Create a random permutation of 0...NUM_LOC-1
135     ii[0] = 0;
136     for (i = 1; i < NUM_LOC; ++i)
137     {
138         j0 = random_in_range(0, i, gMTdata);
139         if (j0 != i) ii[i] = ii[j0];
140         ii[j0] = i;
141     }
142 
143     // Initialize "instruction pointers"
144     memset(p, 0, n * 4);
145 
146     for (i = 0; i < NUM_LOC; ++i)
147     {
148         // Randomly choose 2 different sub groups
149         // One does a random amount of work, and the other waits for it
150         j0 = random_in_range(0, n - 1, gMTdata);
151 
152         do
153         {
154             j1 = random_in_range(0, n - 1, gMTdata);
155         } while (j1 == j0);
156 
157         // Randomly choose a wait value and assign "instructions"
158         val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
159         x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
160         x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
161         ++p[j0];
162         ++p[j1];
163     }
164 
165     // Last "inst" for each sub group is END
166     for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
167 }
168 
169 // Execute one group's "instructions"
run_insts(cl_int * x,cl_int * p,int n)170 void run_insts(cl_int *x, cl_int *p, int n)
171 {
172     int i, nend;
173     bool scont;
174     cl_int loc[NUM_LOC];
175 
176     // Initialize result and "instruction pointers"
177     memset(loc, 0, sizeof(loc));
178     memset(p, 0, 4 * n);
179 
180     // Repetitively loop over subgroups with each executing "instructions" until
181     // blocked The loop terminates when all subgroups have hit the "END
182     // instruction"
183     do
184     {
185         nend = 0;
186         for (i = 0; i < n; ++i)
187         {
188             do
189             {
190                 cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
191                 cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
192                 cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
193                 cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
194                 scont = false;
195 
196                 switch (iop)
197                 {
198                     case INST_STORE:
199                         loc[iloc] = ival;
200                         ++p[i];
201                         scont = true;
202                         break;
203                     case INST_WAIT:
204                         if (loc[iloc] == ival)
205                         {
206                             ++p[i];
207                             scont = true;
208                         }
209                         break;
210                     case INST_COUNT:
211                         loc[iloc] += ival;
212                         ++p[i];
213                         scont = true;
214                         break;
215                     case INST_END: ++nend; break;
216                 }
217             } while (scont);
218         }
219     } while (nend < n);
220 
221     // Return result, reusing "p"
222     memcpy(p, loc, sizeof(loc));
223 }
224 
225 
226 struct IFP
227 {
genIFP228     static void gen(cl_int *x, cl_int *t, cl_int *,
229                     const WorkGroupParams &test_params)
230     {
231         int k;
232         int nw = test_params.local_workgroup_size;
233         int ns = test_params.subgroup_size;
234         int ng = test_params.global_workgroup_size;
235         int nj = (nw + ns - 1) / ns;
236         ng = ng / nw;
237 
238         // We need at least 2 sub groups per group for this test
239         if (nj == 1) return;
240 
241         for (k = 0; k < ng; ++k)
242         {
243             gen_insts(x, t, nj);
244             x += nj * (NUM_LOC + 1);
245         }
246     }
247 
chkIFP248     static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
249                    const WorkGroupParams &test_params)
250     {
251         int i, k;
252         int nw = test_params.local_workgroup_size;
253         int ns = test_params.subgroup_size;
254         int ng = test_params.global_workgroup_size;
255         int nj = (nw + ns - 1) / ns;
256         ng = ng / nw;
257 
258         // We need at least 2 sub groups per group for this tes
259         if (nj == 1) return 0;
260 
261         log_info("  independent forward progress...\n");
262 
263         for (k = 0; k < ng; ++k)
264         {
265             run_insts(x, t, nj);
266             for (i = 0; i < NUM_LOC; ++i)
267             {
268                 if (t[i] != y[i])
269                 {
270                     log_error(
271                         "ERROR: mismatch at element %d in work group %d\n", i,
272                         k);
273                     return -1;
274                 }
275             }
276             x += nj * (NUM_LOC + 1);
277             y += NUM_LOC;
278         }
279 
280         return 0;
281     }
282 };
283 
test_ifp(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)284 int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
285              int num_elements, bool useCoreSubgroups)
286 {
287     int error = TEST_PASS;
288 
289     // Global/local work group sizes
290     // Adjust these individually below if desired/needed
291     constexpr size_t global_work_size = 2000;
292     constexpr size_t local_work_size = 200;
293     WorkGroupParams test_params(global_work_size, local_work_size);
294     test_params.use_core_subgroups = useCoreSubgroups;
295     test_params.dynsc = NUM_LOC + 1;
296     error = test<cl_int, IFP>::run(device, context, queue, num_elements,
297                                    "test_ifp", ifp_source, test_params);
298     return error;
299 }
300 
checkIFPSupport(cl_device_id device,bool & ifpSupport)301 static test_status checkIFPSupport(cl_device_id device, bool &ifpSupport)
302 {
303     cl_uint ifp_supported;
304     cl_uint error;
305     error = clGetDeviceInfo(device,
306                             CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
307                             sizeof(ifp_supported), &ifp_supported, NULL);
308     if (error != CL_SUCCESS)
309     {
310         print_error(
311             error,
312             "Unable to get CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS "
313             "capability");
314         return TEST_FAIL;
315     }
316     // skip testing ifp
317     if (ifp_supported != 1)
318     {
319         log_info("INDEPENDENT FORWARD PROGRESS not supported...\n");
320         ifpSupport = false;
321     }
322     else
323     {
324         log_info("INDEPENDENT FORWARD PROGRESS supported...\n");
325         ifpSupport = true;
326     }
327     return TEST_PASS;
328 }
329 
test_ifp_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)330 int test_ifp_core(cl_device_id device, cl_context context,
331                   cl_command_queue queue, int num_elements)
332 {
333     bool ifpSupport = true;
334     test_status error;
335     error = checkIFPSupport(device, ifpSupport);
336     if (error != TEST_PASS)
337     {
338         return error;
339     }
340     if (ifpSupport == false)
341     {
342         log_info("Independed forward progress skipped.\n");
343         return TEST_SKIPPED_ITSELF;
344     }
345 
346     return test_ifp(device, context, queue, num_elements, true);
347 }
348 
test_ifp_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)349 int test_ifp_ext(cl_device_id device, cl_context context,
350                  cl_command_queue queue, int num_elements)
351 {
352     bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
353     bool ifpSupport = true;
354 
355     if (!hasExtension)
356     {
357         log_info(
358             "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
359         return TEST_SKIPPED_ITSELF;
360     }
361     // ifp only in subgroup functions tests:
362     test_status error;
363     error = checkIFPSupport(device, ifpSupport);
364     if (error != TEST_PASS)
365     {
366         return error;
367     }
368     if (ifpSupport == false)
369     {
370         log_info(
371             "Error reason: the extension cl_khr_subgroups requires that "
372             "Independed forward progress has to be supported by device.\n");
373         return TEST_FAIL;
374     }
375     return test_ifp(device, context, queue, num_elements, false);
376 }