• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "procs.h"
17 #include "subhelpers.h"
18 #include "harness/conversions.h"
19 #include "harness/typeWrappers.h"
20 
21 
22 // These need to stay in sync with the kernel source below
23 #define NUM_LOC 49
24 #define INST_LOC_MASK 0x7f
25 #define INST_OP_SHIFT 0
26 #define INST_OP_MASK 0xf
27 #define INST_LOC_SHIFT 4
28 #define INST_VAL_SHIFT 12
29 #define INST_VAL_MASK 0x7ffff
30 #define INST_END 0x0
31 #define INST_STORE 0x1
32 #define INST_WAIT 0x2
33 #define INST_COUNT 0x3
34 
35 static const char *ifp_source =
36     "#define NUM_LOC 49\n"
37     "#define INST_LOC_MASK 0x7f\n"
38     "#define INST_OP_SHIFT 0\n"
39     "#define INST_OP_MASK 0xf\n"
40     "#define INST_LOC_SHIFT 4\n"
41     "#define INST_VAL_SHIFT 12\n"
42     "#define INST_VAL_MASK 0x7ffff\n"
43     "#define INST_END 0x0\n"
44     "#define INST_STORE 0x1\n"
45     "#define INST_WAIT 0x2\n"
46     "#define INST_COUNT 0x3\n"
47     "\n"
48     "__kernel void\n"
49     "test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
50     "{\n"
51     "    __local atomic_int loc[NUM_LOC];\n"
52     "\n"
53     "    // Don't run if there is only one sub group\n"
54     "    if (get_num_sub_groups() == 1)\n"
55     "        return;\n"
56     "\n"
57     "    // First initialize loc[]\n"
58     "    int lid = (int)get_local_id(0);\n"
59     "\n"
60     "    if (lid < NUM_LOC)\n"
61     "        atomic_init(loc+lid, 0);\n"
62     "\n"
63     "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
64     "\n"
65     "    // Compute pointer to this sub group's \"instructions\"\n"
66     "    const __global int *pc = in +\n"
67     "        ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
68     "         (int)get_sub_group_id()) *\n"
69     "        (NUM_LOC+1);\n"
70     "\n"
71     "    // Set up to \"run\"\n"
72     "    bool ok = (int)get_sub_group_local_id() == 0;\n"
73     "    bool run = true;\n"
74     "\n"
75     "    while (run) {\n"
76     "        int inst = *pc++;\n"
77     "        int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
78     "        int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
79     "        int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
80     "\n"
81     "        switch (iop) {\n"
82     "        case INST_STORE:\n"
83     "            if (ok)\n"
84     "                atomic_store(loc+iloc, ival);\n"
85     "            break;\n"
86     "        case INST_WAIT:\n"
87     "            if (ok) {\n"
88     "                while (atomic_load(loc+iloc) != ival)\n"
89     "                    ;\n"
90     "            }\n"
91     "            break;\n"
92     "        case INST_COUNT:\n"
93     "            if (ok) {\n"
94     "                int i;\n"
95     "                for (i=0;i<ival;++i)\n"
96     "                    atomic_fetch_add(loc+iloc, 1);\n"
97     "            }\n"
98     "            break;\n"
99     "        case INST_END:\n"
100     "            run = false;\n"
101     "            break;\n"
102     "        }\n"
103     "\n"
104     "        sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
105     "    }\n"
106     "\n"
107     "    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
108     "\n"
109     "    // Save this group's result\n"
110     "    __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
111     "    if (lid < NUM_LOC)\n"
112     "        op[lid] = atomic_load(loc+lid);\n"
113     "}\n";
114 
115 
116 // Independent forward progress stuff
117 // Note:
118 //   Output needs num_groups * NUM_LOC elements
119 //   local_size must be > NUM_LOC
120 //   Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
121 
inst(int op,int loc,int val)122 static inline int inst(int op, int loc, int val)
123 {
124     return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT)
125         | (op << INST_OP_SHIFT);
126 }
127 
gen_insts(cl_int * x,cl_int * p,int n)128 void gen_insts(cl_int *x, cl_int *p, int n)
129 {
130     int i, j0, j1;
131     int val;
132     int ii[NUM_LOC];
133 
134     // Create a random permutation of 0...NUM_LOC-1
135     ii[0] = 0;
136     for (i = 1; i < NUM_LOC; ++i)
137     {
138         j0 = random_in_range(0, i, gMTdata);
139         if (j0 != i) ii[i] = ii[j0];
140         ii[j0] = i;
141     }
142 
143     // Initialize "instruction pointers"
144     memset(p, 0, n * 4);
145 
146     for (i = 0; i < NUM_LOC; ++i)
147     {
148         // Randomly choose 2 different sub groups
149         // One does a random amount of work, and the other waits for it
150         j0 = random_in_range(0, n - 1, gMTdata);
151 
152         do
153         {
154             j1 = random_in_range(0, n - 1, gMTdata);
155         } while (j1 == j0);
156 
157         // Randomly choose a wait value and assign "instructions"
158         val = random_in_range(100, 200 + 10 * NUM_LOC, gMTdata);
159         x[j0 * (NUM_LOC + 1) + p[j0]] = inst(INST_COUNT, ii[i], val);
160         x[j1 * (NUM_LOC + 1) + p[j1]] = inst(INST_WAIT, ii[i], val);
161         ++p[j0];
162         ++p[j1];
163     }
164 
165     // Last "inst" for each sub group is END
166     for (i = 0; i < n; ++i) x[i * (NUM_LOC + 1) + p[i]] = inst(INST_END, 0, 0);
167 }
168 
169 // Execute one group's "instructions"
run_insts(cl_int * x,cl_int * p,int n)170 void run_insts(cl_int *x, cl_int *p, int n)
171 {
172     int i, nend;
173     bool scont;
174     cl_int loc[NUM_LOC];
175 
176     // Initialize result and "instruction pointers"
177     memset(loc, 0, sizeof(loc));
178     memset(p, 0, 4 * n);
179 
180     // Repetitively loop over subgroups with each executing "instructions" until
181     // blocked The loop terminates when all subgroups have hit the "END
182     // instruction"
183     do
184     {
185         nend = 0;
186         for (i = 0; i < n; ++i)
187         {
188             do
189             {
190                 cl_int inst = x[i * (NUM_LOC + 1) + p[i]];
191                 cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
192                 cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
193                 cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
194                 scont = false;
195 
196                 switch (iop)
197                 {
198                     case INST_STORE:
199                         loc[iloc] = ival;
200                         ++p[i];
201                         scont = true;
202                         break;
203                     case INST_WAIT:
204                         if (loc[iloc] == ival)
205                         {
206                             ++p[i];
207                             scont = true;
208                         }
209                         break;
210                     case INST_COUNT:
211                         loc[iloc] += ival;
212                         ++p[i];
213                         scont = true;
214                         break;
215                     case INST_END: ++nend; break;
216                 }
217             } while (scont);
218         }
219     } while (nend < n);
220 
221     // Return result, reusing "p"
222     memcpy(p, loc, sizeof(loc));
223 }
224 
225 
226 struct IFP
227 {
genIFP228     static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
229     {
230         int k;
231         int nj = (nw + ns - 1) / ns;
232 
233         // We need at least 2 sub groups per group for this test
234         if (nj == 1) return;
235 
236         for (k = 0; k < ng; ++k)
237         {
238             gen_insts(x, t, nj);
239             x += nj * (NUM_LOC + 1);
240         }
241     }
242 
chkIFP243     static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns,
244                    int nw, int ng)
245     {
246         int i, k;
247         int nj = (nw + ns - 1) / ns;
248 
249         // We need at least 2 sub groups per group for this tes
250         if (nj == 1) return 0;
251 
252         log_info("  independent forward progress...\n");
253 
254         for (k = 0; k < ng; ++k)
255         {
256             run_insts(x, t, nj);
257             for (i = 0; i < NUM_LOC; ++i)
258             {
259                 if (t[i] != y[i])
260                 {
261                     log_error(
262                         "ERROR: mismatch at element %d in work group %d\n", i,
263                         k);
264                     return -1;
265                 }
266             }
267             x += nj * (NUM_LOC + 1);
268             y += NUM_LOC;
269         }
270 
271         return 0;
272     }
273 };
274 
test_ifp(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,bool useCoreSubgroups)275 int test_ifp(cl_device_id device, cl_context context, cl_command_queue queue,
276              int num_elements, bool useCoreSubgroups)
277 {
278     int error;
279 
280     // Adjust these individually below if desired/needed
281 #define G 2000
282 #define L 200
283     error = test<cl_int, IFP, G, L>::run(device, context, queue, num_elements,
284                                          "test_ifp", ifp_source, NUM_LOC + 1,
285                                          useCoreSubgroups);
286     return error;
287 }
288 
checkIFPSupport(cl_device_id device,bool & ifpSupport)289 static test_status checkIFPSupport(cl_device_id device, bool &ifpSupport)
290 {
291     cl_uint ifp_supported;
292     cl_uint error;
293     error = clGetDeviceInfo(device,
294                             CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
295                             sizeof(ifp_supported), &ifp_supported, NULL);
296     if (error != CL_SUCCESS)
297     {
298         print_error(
299             error,
300             "Unable to get CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS "
301             "capability");
302         return TEST_FAIL;
303     }
304     // skip testing ifp
305     if (ifp_supported != 1)
306     {
307         log_info("INDEPENDENT FORWARD PROGRESS not supported...\n");
308         ifpSupport = false;
309     }
310     else
311     {
312         log_info("INDEPENDENT FORWARD PROGRESS supported...\n");
313         ifpSupport = true;
314     }
315     return TEST_PASS;
316 }
317 
test_ifp_core(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)318 int test_ifp_core(cl_device_id device, cl_context context,
319                   cl_command_queue queue, int num_elements)
320 {
321     bool ifpSupport = true;
322     test_status error;
323     error = checkIFPSupport(device, ifpSupport);
324     if (error != TEST_PASS)
325     {
326         return error;
327     }
328     if (ifpSupport == false)
329     {
330         log_info("Independed forward progress skipped.\n");
331         return TEST_SKIPPED_ITSELF;
332     }
333 
334     return test_ifp(device, context, queue, num_elements, true);
335 }
336 
test_ifp_ext(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)337 int test_ifp_ext(cl_device_id device, cl_context context,
338                  cl_command_queue queue, int num_elements)
339 {
340     bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
341     bool ifpSupport = true;
342 
343     if (!hasExtension)
344     {
345         log_info(
346             "Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
347         return TEST_SKIPPED_ITSELF;
348     }
349     // ifp only in subgroup functions tests:
350     test_status error;
351     error = checkIFPSupport(device, ifpSupport);
352     if (error != TEST_PASS)
353     {
354         return error;
355     }
356     if (ifpSupport == false)
357     {
358         log_info(
359             "Error reason: the extension cl_khr_subgroups requires that "
360             "Independed forward progress has to be supported by device.\n");
361         return TEST_FAIL;
362     }
363     return test_ifp(device, context, queue, num_elements, false);
364 }