1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18
19 // clang-format off
20 const char *atomic_index_source =
21 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
22 "// Counter keeps track of which index in counts we are using.\n"
23 "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
24 "// At the end of this we should have all thread IDs in some random location in counts\n"
25 "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
26 "// will be missing some.\n"
27 "\n"
28 "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
29 " int tid = get_global_id(0);\n"
30 " \n"
31 " int counter_to_use = atom_add(counter, 1);\n"
32 " counts[counter_to_use] = tid;\n"
33 "}";
34 // clang-format on
35
test_atomic_add_index(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)36 int test_atomic_add_index(cl_device_id deviceID, cl_context context,
37 cl_command_queue queue, int num_elements)
38 {
39 clProgramWrapper program;
40 clKernelWrapper kernel;
41 clMemWrapper counter, counters;
42 size_t numGlobalThreads, numLocalThreads;
43 int fail = 0, err;
44
45 /* Check if atomics are supported. */
46 if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
47 {
48 log_info("Base atomics not supported "
49 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
50 return 0;
51 }
52
53 //===== add_index test
54 // The index test replicates what particles does.
55 // It uses one memory location to keep track of the current index and then
56 // each thread does an atomic add to it to get its new location. The threads
57 // then write to their assigned location. At the end we check to make sure
58 // that each thread's ID shows up exactly once in the output.
59
60 numGlobalThreads = 2048;
61
62 if (create_single_kernel_helper(context, &program, &kernel, 1,
63 &atomic_index_source, "add_index_test"))
64 return -1;
65
66 if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
67 &numLocalThreads))
68 return -1;
69
70 log_info("Execute global_threads:%d local_threads:%d\n",
71 (int)numGlobalThreads, (int)numLocalThreads);
72
73 // Create the counter that will keep track of where each thread writes.
74 counter = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * 1,
75 NULL, NULL);
76 // Create the counters that will hold the results of each thread writing
77 // its ID into a (hopefully) unique location.
78 counters = clCreateBuffer(context, CL_MEM_READ_WRITE,
79 sizeof(cl_int) * numGlobalThreads, NULL, NULL);
80
81 // Reset all those locations to -1 to indciate they have not been used.
82 cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
83 if (values == NULL)
84 {
85 log_error(
86 "add_index_test FAILED to allocate memory for initial values.\n");
87 fail = 1;
88 }
89 else
90 {
91 memset(values, -1, numLocalThreads);
92 unsigned int i = 0;
93 for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
94 int init = 0;
95 err = clEnqueueWriteBuffer(queue, counters, true, 0,
96 numGlobalThreads * sizeof(cl_int), values, 0,
97 NULL, NULL);
98 err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
99 &init, 0, NULL, NULL);
100 if (err)
101 {
102 log_error(
103 "add_index_test FAILED to write initial values to arrays: %d\n",
104 err);
105 fail = 1;
106 }
107 else
108 {
109 err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
110 err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
111 if (err)
112 {
113 log_error("add_index_test FAILED to set kernel arguments: %d\n",
114 err);
115 fail = 1;
116 }
117 else
118 {
119 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
120 &numGlobalThreads,
121 &numLocalThreads, 0, NULL, NULL);
122 if (err)
123 {
124 log_error("add_index_test FAILED to execute kernel: %d\n",
125 err);
126 fail = 1;
127 }
128 else
129 {
130 err = clEnqueueReadBuffer(queue, counters, true, 0,
131 sizeof(cl_int) * numGlobalThreads,
132 values, 0, NULL, NULL);
133 if (err)
134 {
135 log_error(
136 "add_index_test FAILED to read back results: %d\n",
137 err);
138 fail = 1;
139 }
140 else
141 {
142 unsigned int looking_for, index;
143 for (looking_for = 0; looking_for < numGlobalThreads;
144 looking_for++)
145 {
146 int instances_found = 0;
147 for (index = 0; index < numGlobalThreads; index++)
148 {
149 if (values[index] == (int)looking_for)
150 instances_found++;
151 }
152 if (instances_found != 1)
153 {
154 log_error(
155 "add_index_test FAILED: wrong number of "
156 "instances (%d!=1) for counter %d.\n",
157 instances_found, looking_for);
158 fail = 1;
159 }
160 }
161 }
162 }
163 }
164 }
165 if (!fail)
166 {
167 log_info(
168 "add_index_test passed. Each thread used exactly one index.\n");
169 }
170 free(values);
171 }
172 return fail;
173 }
174
175 // clang-format off
176 const char *add_index_bin_kernel[] = {
177 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
178 "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
179 "// using an atomic add to keep track of the current location to write into in each bin.\n"
180 "// This is the same as the memory update for the particles demo.\n"
181 "\n"
182 "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
183 " int tid = get_global_id(0);\n"
184 "\n"
185 " int location = bin_assignments[tid];\n"
186 " int counter = atom_add(&bin_counters[location], 1);\n"
187 " bins[location*max_counts_per_bin + counter] = tid;\n"
188 "}" };
189 // clang-format on
190
191 // This test assigns a bunch of values to bins and then tries to put them in the
192 // bins in parallel using an atomic add to keep track of the current location to
193 // write into in each bin. This is the same as the memory update for the
194 // particles demo.
add_index_bin_test(size_t * global_threads,cl_command_queue queue,cl_context context,MTdata d)195 int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
196 cl_context context, MTdata d)
197 {
198 int number_of_items = (int)global_threads[0];
199 size_t local_threads[1];
200 int divisor = 12;
201 int number_of_bins = number_of_items / divisor;
202 int max_counts_per_bin = divisor * 2;
203
204 int fail = 0;
205 int err;
206
207 clProgramWrapper program;
208 clKernelWrapper kernel;
209
210 // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
211 // items per bin (bins is %d long).\n",
212 // number_of_items, number_of_bins, max_counts_per_bin,
213 // number_of_bins*max_counts_per_bin);
214
215 //===== add_index_bin test
216 // The index test replicates what particles does.
217 err =
218 create_single_kernel_helper(context, &program, &kernel, 1,
219 add_index_bin_kernel, "add_index_bin_test");
220 test_error(err, "Unable to create testing kernel");
221
222 if (get_max_common_work_group_size(context, kernel, global_threads[0],
223 &local_threads[0]))
224 return -1;
225
226 log_info("Execute global_threads:%d local_threads:%d\n",
227 (int)global_threads[0], (int)local_threads[0]);
228
229 // Allocate our storage
230 cl_mem bin_counters =
231 clCreateBuffer(context, CL_MEM_READ_WRITE,
232 sizeof(cl_int) * number_of_bins, NULL, NULL);
233 cl_mem bins = clCreateBuffer(
234 context, CL_MEM_READ_WRITE,
235 sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL);
236 cl_mem bin_assignments =
237 clCreateBuffer(context, CL_MEM_READ_ONLY,
238 sizeof(cl_int) * number_of_items, NULL, NULL);
239
240 if (bin_counters == NULL)
241 {
242 log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
243 return -1;
244 }
245 if (bins == NULL)
246 {
247 log_error("add_index_bin_test FAILED to allocate bins.\n");
248 return -1;
249 }
250 if (bin_assignments == NULL)
251 {
252 log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
253 return -1;
254 }
255
256 // Initialize our storage
257 cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
258 if (!l_bin_counts)
259 {
260 log_error("add_index_bin_test FAILED to allocate initial values for "
261 "bin_counters.\n");
262 return -1;
263 }
264 int i;
265 for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
266 err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
267 sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
268 NULL, NULL);
269 if (err)
270 {
271 log_error("add_index_bin_test FAILED to set initial values for "
272 "bin_counters: %d\n",
273 err);
274 return -1;
275 }
276
277 cl_int *values =
278 (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
279 if (!values)
280 {
281 log_error(
282 "add_index_bin_test FAILED to allocate initial values for bins.\n");
283 return -1;
284 }
285 for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
286 err = clEnqueueWriteBuffer(queue, bins, true, 0,
287 sizeof(cl_int) * number_of_bins
288 * max_counts_per_bin,
289 values, 0, NULL, NULL);
290 if (err)
291 {
292 log_error(
293 "add_index_bin_test FAILED to set initial values for bins: %d\n",
294 err);
295 return -1;
296 }
297 free(values);
298
299 cl_int *l_bin_assignments =
300 (cl_int *)malloc(sizeof(cl_int) * number_of_items);
301 if (!l_bin_assignments)
302 {
303 log_error("add_index_bin_test FAILED to allocate initial values for "
304 "l_bin_assignments.\n");
305 return -1;
306 }
307 for (i = 0; i < number_of_items; i++)
308 {
309 int bin = random_in_range(0, number_of_bins - 1, d);
310 while (l_bin_counts[bin] >= max_counts_per_bin)
311 {
312 bin = random_in_range(0, number_of_bins - 1, d);
313 }
314 if (bin >= number_of_bins)
315 log_error("add_index_bin_test internal error generating bin "
316 "assignments: bin %d >= number_of_bins %d.\n",
317 bin, number_of_bins);
318 if (l_bin_counts[bin] + 1 > max_counts_per_bin)
319 log_error(
320 "add_index_bin_test internal error generating bin assignments: "
321 "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
322 bin, l_bin_counts[bin], max_counts_per_bin);
323 l_bin_counts[bin]++;
324 l_bin_assignments[i] = bin;
325 // log_info("item %d assigned to bin %d (%d items)\n", i, bin,
326 // l_bin_counts[bin]);
327 }
328 err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
329 sizeof(cl_int) * number_of_items,
330 l_bin_assignments, 0, NULL, NULL);
331 if (err)
332 {
333 log_error("add_index_bin_test FAILED to set initial values for "
334 "bin_assignments: %d\n",
335 err);
336 return -1;
337 }
338 // Setup the kernel
339 err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
340 err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
341 err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
342 err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
343 &max_counts_per_bin);
344 if (err)
345 {
346 log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
347 err);
348 fail = 1;
349 return -1;
350 }
351
352 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
353 local_threads, 0, NULL, NULL);
354 if (err)
355 {
356 log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
357 fail = 1;
358 }
359
360 cl_int *final_bin_assignments =
361 (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
362 if (!final_bin_assignments)
363 {
364 log_error("add_index_bin_test FAILED to allocate initial values for "
365 "final_bin_assignments.\n");
366 return -1;
367 }
368 err = clEnqueueReadBuffer(queue, bins, true, 0,
369 sizeof(cl_int) * number_of_bins
370 * max_counts_per_bin,
371 final_bin_assignments, 0, NULL, NULL);
372 if (err)
373 {
374 log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
375 fail = 1;
376 }
377
378 cl_int *final_bin_counts =
379 (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
380 if (!final_bin_counts)
381 {
382 log_error("add_index_bin_test FAILED to allocate initial values for "
383 "final_bin_counts.\n");
384 return -1;
385 }
386 err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
387 sizeof(cl_int) * number_of_bins, final_bin_counts,
388 0, NULL, NULL);
389 if (err)
390 {
391 log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
392 err);
393 fail = 1;
394 }
395
396 // Verification.
397 int errors = 0;
398 int current_bin;
399 int search;
400 // Print out all the contents of the bins.
401 // for (current_bin=0; current_bin<number_of_bins; current_bin++)
402 // for (search=0; search<max_counts_per_bin; search++)
403 // log_info("[bin %d, entry %d] = %d\n", current_bin, search,
404 // final_bin_assignments[current_bin*max_counts_per_bin+search]);
405
406 // First verify that there are the correct number in each bin.
407 for (current_bin = 0; current_bin < number_of_bins; current_bin++)
408 {
409 int expected_number = l_bin_counts[current_bin];
410 int actual_number = final_bin_counts[current_bin];
411 if (expected_number != actual_number)
412 {
413 log_error("add_index_bin_test FAILED: bin %d reported %d entries "
414 "when %d were expected.\n",
415 current_bin, actual_number, expected_number);
416 errors++;
417 }
418 for (search = 0; search < expected_number; search++)
419 {
420 if (final_bin_assignments[current_bin * max_counts_per_bin + search]
421 == -1)
422 {
423 log_error("add_index_bin_test FAILED: bin %d had no entry at "
424 "position %d when it should have had %d entries.\n",
425 current_bin, search, expected_number);
426 errors++;
427 }
428 }
429 for (search = expected_number; search < max_counts_per_bin; search++)
430 {
431 if (final_bin_assignments[current_bin * max_counts_per_bin + search]
432 != -1)
433 {
434 log_error(
435 "add_index_bin_test FAILED: bin %d had an extra entry at "
436 "position %d when it should have had only %d entries.\n",
437 current_bin, search, expected_number);
438 errors++;
439 }
440 }
441 }
442 // Now verify that the correct ones are in each bin
443 int index;
444 for (index = 0; index < number_of_items; index++)
445 {
446 int expected_bin = l_bin_assignments[index];
447 int found_it = 0;
448 for (search = 0; search < l_bin_counts[expected_bin]; search++)
449 {
450 if (final_bin_assignments[expected_bin * max_counts_per_bin
451 + search]
452 == index)
453 {
454 found_it = 1;
455 }
456 }
457 if (found_it == 0)
458 {
459 log_error(
460 "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
461 index, expected_bin);
462 errors++;
463 }
464 }
465 free(l_bin_counts);
466 free(l_bin_assignments);
467 free(final_bin_assignments);
468 free(final_bin_counts);
469 clReleaseMemObject(bin_counters);
470 clReleaseMemObject(bins);
471 clReleaseMemObject(bin_assignments);
472 if (errors == 0)
473 {
474 log_info("add_index_bin_test passed. Each item was put in the correct "
475 "bin in parallel.\n");
476 return 0;
477 }
478 else
479 {
480 log_error("add_index_bin_test FAILED: %d errors.\n", errors);
481 return -1;
482 }
483 }
484
test_atomic_add_index_bin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)485 int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
486 cl_command_queue queue, int num_elements)
487 {
488 //===== add_index_bin test
489 size_t numGlobalThreads = 2048;
490 int iteration = 0;
491 int err, failed = 0;
492 MTdata d = init_genrand(gRandomSeed);
493
494 /* Check if atomics are supported. */
495 if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
496 {
497 log_info("Base atomics not supported "
498 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
499 free_mtdata(d);
500 return 0;
501 }
502
503 for (iteration = 0; iteration < 10; iteration++)
504 {
505 log_info("add_index_bin_test with %d elements:\n",
506 (int)numGlobalThreads);
507 err = add_index_bin_test(&numGlobalThreads, queue, context, d);
508 if (err)
509 {
510 failed++;
511 break;
512 }
513 numGlobalThreads *= 2;
514 }
515 free_mtdata(d);
516 return failed;
517 }
518