• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18 
19 // clang-format off
20 const char *atomic_index_source =
21     "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
22     "// Counter keeps track of which index in counts we are using.\n"
23     "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
24     "// At the end of this we should have all thread IDs in some random location in counts\n"
25     "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
26     "// will be missing some.\n"
27     "\n"
28     "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
29     "    int tid = get_global_id(0);\n"
30     "    \n"
31     "    int counter_to_use = atom_add(counter, 1);\n"
32     "    counts[counter_to_use] = tid;\n"
33     "}";
34 // clang-format on
35 
test_atomic_add_index(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)36 int test_atomic_add_index(cl_device_id deviceID, cl_context context,
37                           cl_command_queue queue, int num_elements)
38 {
39     clProgramWrapper program;
40     clKernelWrapper kernel;
41     clMemWrapper counter, counters;
42     size_t numGlobalThreads, numLocalThreads;
43     int fail = 0, err;
44 
45     /* Check if atomics are supported. */
46     if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
47     {
48         log_info("Base atomics not supported "
49                  "(cl_khr_global_int32_base_atomics). Skipping test.\n");
50         return 0;
51     }
52 
53     //===== add_index test
54     // The index test replicates what particles does.
55     // It uses one memory location to keep track of the current index and then
56     // each thread does an atomic add to it to get its new location. The threads
57     // then write to their assigned location. At the end we check to make sure
58     // that each thread's ID shows up exactly once in the output.
59 
60     numGlobalThreads = 2048;
61 
62     if (create_single_kernel_helper(context, &program, &kernel, 1,
63                                     &atomic_index_source, "add_index_test"))
64         return -1;
65 
66     if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
67                                        &numLocalThreads))
68         return -1;
69 
70     log_info("Execute global_threads:%d local_threads:%d\n",
71              (int)numGlobalThreads, (int)numLocalThreads);
72 
73     // Create the counter that will keep track of where each thread writes.
74     counter = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1,
75                              NULL, NULL);
76     // Create the counters that will hold the results of each thread writing
77     // its ID into a (hopefully) unique location.
78     counters = clCreateBuffer(context, CL_MEM_READ_WRITE,
79                               sizeof(cl_int) * numGlobalThreads, NULL, NULL);
80 
81     // Reset all those locations to -1 to indciate they have not been used.
82     cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
83     if (values == NULL)
84     {
85         log_error(
86             "add_index_test FAILED to allocate memory for initial values.\n");
87         fail = 1;
88     }
89     else
90     {
91         memset(values, -1, numLocalThreads);
92         unsigned int i = 0;
93         for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
94         int init = 0;
95         err = clEnqueueWriteBuffer(queue, counters, true, 0,
96                                    numGlobalThreads * sizeof(cl_int), values, 0,
97                                    NULL, NULL);
98         err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
99                                     &init, 0, NULL, NULL);
100         if (err)
101         {
102             log_error(
103                 "add_index_test FAILED to write initial values to arrays: %d\n",
104                 err);
105             fail = 1;
106         }
107         else
108         {
109             err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
110             err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
111             if (err)
112             {
113                 log_error("add_index_test FAILED to set kernel arguments: %d\n",
114                           err);
115                 fail = 1;
116             }
117             else
118             {
119                 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
120                                              &numGlobalThreads,
121                                              &numLocalThreads, 0, NULL, NULL);
122                 if (err)
123                 {
124                     log_error("add_index_test FAILED to execute kernel: %d\n",
125                               err);
126                     fail = 1;
127                 }
128                 else
129                 {
130                     err = clEnqueueReadBuffer(queue, counters, true, 0,
131                                               sizeof(cl_int) * numGlobalThreads,
132                                               values, 0, NULL, NULL);
133                     if (err)
134                     {
135                         log_error(
136                             "add_index_test FAILED to read back results: %d\n",
137                             err);
138                         fail = 1;
139                     }
140                     else
141                     {
142                         unsigned int looking_for, index;
143                         for (looking_for = 0; looking_for < numGlobalThreads;
144                              looking_for++)
145                         {
146                             int instances_found = 0;
147                             for (index = 0; index < numGlobalThreads; index++)
148                             {
149                                 if (values[index] == (int)looking_for)
150                                     instances_found++;
151                             }
152                             if (instances_found != 1)
153                             {
154                                 log_error(
155                                     "add_index_test FAILED: wrong number of "
156                                     "instances (%d!=1) for counter %d.\n",
157                                     instances_found, looking_for);
158                                 fail = 1;
159                             }
160                         }
161                     }
162                 }
163             }
164         }
165         if (!fail)
166         {
167             log_info(
168                 "add_index_test passed. Each thread used exactly one index.\n");
169         }
170         free(values);
171     }
172     return fail;
173 }
174 
175 // clang-format off
176 const char *add_index_bin_kernel[] = {
177     "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
178     "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
179     "// using an atomic add to keep track of the current location to write into in each bin.\n"
180     "// This is the same as the memory update for the particles demo.\n"
181     "\n"
182     "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
183     "    int tid = get_global_id(0);\n"
184     "\n"
185     "    int location = bin_assignments[tid];\n"
186     "    int counter = atom_add(&bin_counters[location], 1);\n"
187     "    bins[location*max_counts_per_bin + counter] = tid;\n"
188     "}" };
189 // clang-format on
190 
191 // This test assigns a bunch of values to bins and then tries to put them in the
192 // bins in parallel using an atomic add to keep track of the current location to
193 // write into in each bin. This is the same as the memory update for the
194 // particles demo.
add_index_bin_test(size_t * global_threads,cl_command_queue queue,cl_context context,MTdata d)195 int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
196                        cl_context context, MTdata d)
197 {
198     int number_of_items = (int)global_threads[0];
199     size_t local_threads[1];
200     int divisor = 12;
201     int number_of_bins = number_of_items / divisor;
202     int max_counts_per_bin = divisor * 2;
203 
204     int fail = 0;
205     int err;
206 
207     clProgramWrapper program;
208     clKernelWrapper kernel;
209 
210     //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
211     //  items per bin (bins is %d long).\n",
212     //           number_of_items, number_of_bins, max_counts_per_bin,
213     //           number_of_bins*max_counts_per_bin);
214 
215     //===== add_index_bin test
216     // The index test replicates what particles does.
217     err =
218         create_single_kernel_helper(context, &program, &kernel, 1,
219                                     add_index_bin_kernel, "add_index_bin_test");
220     test_error(err, "Unable to create testing kernel");
221 
222     if (get_max_common_work_group_size(context, kernel, global_threads[0],
223                                        &local_threads[0]))
224         return -1;
225 
226     log_info("Execute global_threads:%d local_threads:%d\n",
227              (int)global_threads[0], (int)local_threads[0]);
228 
229     // Allocate our storage
230     cl_mem bin_counters =
231         clCreateBuffer(context, CL_MEM_READ_WRITE,
232                        sizeof(cl_int) * number_of_bins, NULL, NULL);
233     cl_mem bins = clCreateBuffer(
234         context, CL_MEM_READ_WRITE,
235         sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL);
236     cl_mem bin_assignments =
237         clCreateBuffer(context, CL_MEM_READ_ONLY,
238                        sizeof(cl_int) * number_of_items, NULL, NULL);
239 
240     if (bin_counters == NULL)
241     {
242         log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
243         return -1;
244     }
245     if (bins == NULL)
246     {
247         log_error("add_index_bin_test FAILED to allocate bins.\n");
248         return -1;
249     }
250     if (bin_assignments == NULL)
251     {
252         log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
253         return -1;
254     }
255 
256     // Initialize our storage
257     cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
258     if (!l_bin_counts)
259     {
260         log_error("add_index_bin_test FAILED to allocate initial values for "
261                   "bin_counters.\n");
262         return -1;
263     }
264     int i;
265     for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
266     err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
267                                sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
268                                NULL, NULL);
269     if (err)
270     {
271         log_error("add_index_bin_test FAILED to set initial values for "
272                   "bin_counters: %d\n",
273                   err);
274         return -1;
275     }
276 
277     cl_int *values =
278         (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
279     if (!values)
280     {
281         log_error(
282             "add_index_bin_test FAILED to allocate initial values for bins.\n");
283         return -1;
284     }
285     for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
286     err = clEnqueueWriteBuffer(queue, bins, true, 0,
287                                sizeof(cl_int) * number_of_bins
288                                    * max_counts_per_bin,
289                                values, 0, NULL, NULL);
290     if (err)
291     {
292         log_error(
293             "add_index_bin_test FAILED to set initial values for bins: %d\n",
294             err);
295         return -1;
296     }
297     free(values);
298 
299     cl_int *l_bin_assignments =
300         (cl_int *)malloc(sizeof(cl_int) * number_of_items);
301     if (!l_bin_assignments)
302     {
303         log_error("add_index_bin_test FAILED to allocate initial values for "
304                   "l_bin_assignments.\n");
305         return -1;
306     }
307     for (i = 0; i < number_of_items; i++)
308     {
309         int bin = random_in_range(0, number_of_bins - 1, d);
310         while (l_bin_counts[bin] >= max_counts_per_bin)
311         {
312             bin = random_in_range(0, number_of_bins - 1, d);
313         }
314         if (bin >= number_of_bins)
315             log_error("add_index_bin_test internal error generating bin "
316                       "assignments: bin %d >= number_of_bins %d.\n",
317                       bin, number_of_bins);
318         if (l_bin_counts[bin] + 1 > max_counts_per_bin)
319             log_error(
320                 "add_index_bin_test internal error generating bin assignments: "
321                 "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
322                 bin, l_bin_counts[bin], max_counts_per_bin);
323         l_bin_counts[bin]++;
324         l_bin_assignments[i] = bin;
325         //     log_info("item %d assigned to bin %d (%d items)\n", i, bin,
326         //     l_bin_counts[bin]);
327     }
328     err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
329                                sizeof(cl_int) * number_of_items,
330                                l_bin_assignments, 0, NULL, NULL);
331     if (err)
332     {
333         log_error("add_index_bin_test FAILED to set initial values for "
334                   "bin_assignments: %d\n",
335                   err);
336         return -1;
337     }
338     // Setup the kernel
339     err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
340     err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
341     err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
342     err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
343                           &max_counts_per_bin);
344     if (err)
345     {
346         log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
347                   err);
348         fail = 1;
349         return -1;
350     }
351 
352     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
353                                  local_threads, 0, NULL, NULL);
354     if (err)
355     {
356         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
357         fail = 1;
358     }
359 
360     cl_int *final_bin_assignments =
361         (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
362     if (!final_bin_assignments)
363     {
364         log_error("add_index_bin_test FAILED to allocate initial values for "
365                   "final_bin_assignments.\n");
366         return -1;
367     }
368     err = clEnqueueReadBuffer(queue, bins, true, 0,
369                               sizeof(cl_int) * number_of_bins
370                                   * max_counts_per_bin,
371                               final_bin_assignments, 0, NULL, NULL);
372     if (err)
373     {
374         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
375         fail = 1;
376     }
377 
378     cl_int *final_bin_counts =
379         (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
380     if (!final_bin_counts)
381     {
382         log_error("add_index_bin_test FAILED to allocate initial values for "
383                   "final_bin_counts.\n");
384         return -1;
385     }
386     err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
387                               sizeof(cl_int) * number_of_bins, final_bin_counts,
388                               0, NULL, NULL);
389     if (err)
390     {
391         log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
392                   err);
393         fail = 1;
394     }
395 
396     // Verification.
397     int errors = 0;
398     int current_bin;
399     int search;
400     //  Print out all the contents of the bins.
401     //  for (current_bin=0; current_bin<number_of_bins; current_bin++)
402     //        for (search=0; search<max_counts_per_bin; search++)
403     //      log_info("[bin %d, entry %d] = %d\n", current_bin, search,
404     //      final_bin_assignments[current_bin*max_counts_per_bin+search]);
405 
406     // First verify that there are the correct number in each bin.
407     for (current_bin = 0; current_bin < number_of_bins; current_bin++)
408     {
409         int expected_number = l_bin_counts[current_bin];
410         int actual_number = final_bin_counts[current_bin];
411         if (expected_number != actual_number)
412         {
413             log_error("add_index_bin_test FAILED: bin %d reported %d entries "
414                       "when %d were expected.\n",
415                       current_bin, actual_number, expected_number);
416             errors++;
417         }
418         for (search = 0; search < expected_number; search++)
419         {
420             if (final_bin_assignments[current_bin * max_counts_per_bin + search]
421                 == -1)
422             {
423                 log_error("add_index_bin_test FAILED: bin %d had no entry at "
424                           "position %d when it should have had %d entries.\n",
425                           current_bin, search, expected_number);
426                 errors++;
427             }
428         }
429         for (search = expected_number; search < max_counts_per_bin; search++)
430         {
431             if (final_bin_assignments[current_bin * max_counts_per_bin + search]
432                 != -1)
433             {
434                 log_error(
435                     "add_index_bin_test FAILED: bin %d had an extra entry at "
436                     "position %d when it should have had only %d entries.\n",
437                     current_bin, search, expected_number);
438                 errors++;
439             }
440         }
441     }
442     // Now verify that the correct ones are in each bin
443     int index;
444     for (index = 0; index < number_of_items; index++)
445     {
446         int expected_bin = l_bin_assignments[index];
447         int found_it = 0;
448         for (search = 0; search < l_bin_counts[expected_bin]; search++)
449         {
450             if (final_bin_assignments[expected_bin * max_counts_per_bin
451                                       + search]
452                 == index)
453             {
454                 found_it = 1;
455             }
456         }
457         if (found_it == 0)
458         {
459             log_error(
460                 "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
461                 index, expected_bin);
462             errors++;
463         }
464     }
465     free(l_bin_counts);
466     free(l_bin_assignments);
467     free(final_bin_assignments);
468     free(final_bin_counts);
469     clReleaseMemObject(bin_counters);
470     clReleaseMemObject(bins);
471     clReleaseMemObject(bin_assignments);
472     if (errors == 0)
473     {
474         log_info("add_index_bin_test passed. Each item was put in the correct "
475                  "bin in parallel.\n");
476         return 0;
477     }
478     else
479     {
480         log_error("add_index_bin_test FAILED: %d errors.\n", errors);
481         return -1;
482     }
483 }
484 
test_atomic_add_index_bin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)485 int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
486                               cl_command_queue queue, int num_elements)
487 {
488     //===== add_index_bin test
489     size_t numGlobalThreads = 2048;
490     int iteration = 0;
491     int err, failed = 0;
492     MTdata d = init_genrand(gRandomSeed);
493 
494     /* Check if atomics are supported. */
495     if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
496     {
497         log_info("Base atomics not supported "
498                  "(cl_khr_global_int32_base_atomics). Skipping test.\n");
499         free_mtdata(d);
500         return 0;
501     }
502 
503     for (iteration = 0; iteration < 10; iteration++)
504     {
505         log_info("add_index_bin_test with %d elements:\n",
506                  (int)numGlobalThreads);
507         err = add_index_bin_test(&numGlobalThreads, queue, context, d);
508         if (err)
509         {
510             failed++;
511             break;
512         }
513         numGlobalThreads *= 2;
514     }
515     free_mtdata(d);
516     return failed;
517 }
518