• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18 
19 const char * atomic_index_source =
20 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
21 "// Counter keeps track of which index in counts we are using.\n"
22 "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
23 "// At the end of this we should have all thread IDs in some random location in counts\n"
24 "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
25 "// will be missing some.\n"
26 "\n"
27 "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
28 "    int tid = get_global_id(0);\n"
29 "    \n"
30 "    int counter_to_use = atom_add(counter, 1);\n"
31 "    counts[counter_to_use] = tid;\n"
32 "}";
33 
test_atomic_add_index(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)34 int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
35 {
36     clProgramWrapper program;
37     clKernelWrapper kernel;
38     clMemWrapper counter, counters;
39     size_t numGlobalThreads, numLocalThreads;
40     int fail = 0, succeed = 0, err;
41 
42   /* Check if atomics are supported. */
43   if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
44     log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
45     return 0;
46   }
47 
48     //===== add_index test
49     // The index test replicates what particles does.
50     // It uses one memory location to keep track of the current index and then each thread
51     // does an atomic add to it to get its new location. The threads then write to their
52     // assigned location. At the end we check to make sure that each thread's ID shows up
53     // exactly once in the output.
54 
55     numGlobalThreads = 2048;
56 
57     if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
58         return -1;
59 
60     if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
61         return -1;
62 
63     log_info("Execute global_threads:%d local_threads:%d\n",
64              (int)numGlobalThreads, (int)numLocalThreads);
65 
66     // Create the counter that will keep track of where each thread writes.
67     counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
68                                    sizeof(cl_int) * 1, NULL, NULL);
69     // Create the counters that will hold the results of each thread writing
70     // its ID into a (hopefully) unique location.
71     counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
72                                     sizeof(cl_int) * numGlobalThreads, NULL, NULL);
73 
74     // Reset all those locations to -1 to indciate they have not been used.
75     cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
76     if (values == NULL) {
77         log_error("add_index_test FAILED to allocate memory for initial values.\n");
78         fail = 1; succeed = -1;
79     } else {
80         memset(values, -1, numLocalThreads);
81         unsigned int i=0;
82         for (i=0; i<numGlobalThreads; i++)
83             values[i] = -1;
84         int init=0;
85         err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
86         err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
87         if (err) {
88             log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
89             fail=1; succeed=-1;
90         } else {
91             err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
92             err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
93             if (err) {
94                 log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
95                 fail=1; succeed=-1;
96             } else {
97                 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
98                 if (err) {
99                     log_error("add_index_test FAILED to execute kernel: %d\n", err);
100                     fail=1; succeed=-1;
101                 } else {
102                     err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
103                     if (err) {
104                         log_error("add_index_test FAILED to read back results: %d\n", err);
105                         fail = 1; succeed=-1;
106                     } else {
107                         unsigned int looking_for, index;
108                         for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
109                             int instances_found=0;
110                             for (index=0; index<numGlobalThreads; index++) {
111                                 if (values[index]==(int)looking_for)
112                                     instances_found++;
113                             }
114                             if (instances_found != 1) {
115                                 log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
116                                 fail = 1; succeed=-1;
117                             }
118                         }
119                     }
120                 }
121             }
122         }
123         if (!fail) {
124             log_info("add_index_test passed. Each thread used exactly one index.\n");
125         }
126         free(values);
127     }
128     return fail;
129 }
130 
131 const char *add_index_bin_kernel[] = {
132 "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
133 "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
134 "// using an atomic add to keep track of the current location to write into in each bin.\n"
135 "// This is the same as the memory update for the particles demo.\n"
136 "\n"
137 "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
138 "    int tid = get_global_id(0);\n"
139 "\n"
140 "    int location = bin_assignments[tid];\n"
141 "    int counter = atom_add(&bin_counters[location], 1);\n"
142 "    bins[location*max_counts_per_bin + counter] = tid;\n"
143 "}" };
144 
145 // This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
146 // using an atomic add to keep track of the current location to write into in each bin.
147 // This is the same as the memory update for the particles demo.
add_index_bin_test(size_t * global_threads,cl_command_queue queue,cl_context context,MTdata d)148 int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
149 {
150     int number_of_items = (int)global_threads[0];
151     size_t local_threads[1];
152     int divisor = 12;
153     int number_of_bins = number_of_items/divisor;
154     int max_counts_per_bin = divisor*2;
155 
156     int fail = 0;
157     int succeed = 0;
158     int err;
159 
160     clProgramWrapper program;
161     clKernelWrapper kernel;
162 
163     //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
164     //           number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
165 
166     //===== add_index_bin test
167     // The index test replicates what particles does.
168     err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
169     test_error( err, "Unable to create testing kernel" );
170 
171     if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
172         return -1;
173 
174     log_info("Execute global_threads:%d local_threads:%d\n",
175              (int)global_threads[0], (int)local_threads[0]);
176 
177     // Allocate our storage
178     cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
179                                         sizeof(cl_int) * number_of_bins, NULL, NULL);
180     cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
181                                 sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
182     cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
183                                            sizeof(cl_int) * number_of_items, NULL, NULL);
184 
185     if (bin_counters == NULL) {
186         log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
187         return -1;
188     }
189     if (bins == NULL) {
190         log_error("add_index_bin_test FAILED to allocate bins.\n");
191         return -1;
192     }
193     if (bin_assignments == NULL) {
194         log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
195         return -1;
196     }
197 
198     // Initialize our storage
199     cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
200     if (!l_bin_counts) {
201         log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
202         return -1;
203     }
204     int i;
205     for (i=0; i<number_of_bins; i++)
206         l_bin_counts[i] = 0;
207     err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
208     if (err) {
209         log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
210         return -1;
211     }
212 
213     cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
214     if (!values) {
215         log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
216         return -1;
217     }
218     for (i=0; i<number_of_bins*max_counts_per_bin; i++)
219         values[i] = -1;
220     err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
221     if (err) {
222         log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
223         return -1;
224     }
225     free(values);
226 
227     cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
228     if (!l_bin_assignments) {
229         log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
230         return -1;
231     }
232     for (i=0; i<number_of_items; i++) {
233         int bin = random_in_range(0, number_of_bins-1, d);
234         while (l_bin_counts[bin] >= max_counts_per_bin) {
235             bin = random_in_range(0, number_of_bins-1, d);
236         }
237         if (bin >= number_of_bins)
238             log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
239         if (l_bin_counts[bin]+1 > max_counts_per_bin)
240             log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
241         l_bin_counts[bin]++;
242         l_bin_assignments[i] = bin;
243         //     log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
244     }
245     err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
246     if (err) {
247         log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
248         return -1;
249     }
250     // Setup the kernel
251     err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
252     err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
253     err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
254     err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
255     if (err) {
256         log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
257         fail=1; succeed=-1;
258         return -1;
259     }
260 
261     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
262     if (err) {
263         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
264         fail=1; succeed=-1;
265     }
266 
267     cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
268     if (!final_bin_assignments) {
269         log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
270         return -1;
271     }
272     err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
273     if (err) {
274         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
275         fail = 1; succeed=-1;
276     }
277 
278     cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
279     if (!final_bin_counts) {
280         log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
281         return -1;
282     }
283     err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
284     if (err) {
285         log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
286         fail = 1; succeed=-1;
287     }
288 
289     // Verification.
290     int errors=0;
291     int current_bin;
292     int search;
293     //  Print out all the contents of the bins.
294     //  for (current_bin=0; current_bin<number_of_bins; current_bin++)
295     //        for (search=0; search<max_counts_per_bin; search++)
296     //      log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
297 
298     // First verify that there are the correct number in each bin.
299     for (current_bin=0; current_bin<number_of_bins; current_bin++) {
300         int expected_number = l_bin_counts[current_bin];
301         int actual_number = final_bin_counts[current_bin];
302         if (expected_number != actual_number) {
303             log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
304             errors++;
305         }
306         for (search=0; search<expected_number; search++) {
307             if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
308                 log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
309                 errors++;
310             }
311         }
312         for (search=expected_number; search<max_counts_per_bin; search++) {
313             if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
314                 log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
315                 errors++;
316             }
317         }
318     }
319     // Now verify that the correct ones are in each bin
320     int index;
321     for (index=0; index<number_of_items; index++) {
322         int expected_bin = l_bin_assignments[index];
323         int found_it = 0;
324         for (search=0; search<l_bin_counts[expected_bin]; search++) {
325             if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
326                 found_it = 1;
327             }
328         }
329         if (found_it == 0) {
330             log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
331             errors++;
332         }
333     }
334     free(l_bin_counts);
335     free(l_bin_assignments);
336     free(final_bin_assignments);
337     free(final_bin_counts);
338     clReleaseMemObject(bin_counters);
339     clReleaseMemObject(bins);
340     clReleaseMemObject(bin_assignments);
341     if (errors == 0) {
342         log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
343         return 0;
344     } else {
345         log_error("add_index_bin_test FAILED: %d errors.\n", errors);
346         return -1;
347     }
348 }
349 
test_atomic_add_index_bin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)350 int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
351 {
352     //===== add_index_bin test
353     size_t numGlobalThreads = 2048;
354     int iteration=0;
355     int err, failed = 0;
356     MTdata d = init_genrand( gRandomSeed );
357 
358   /* Check if atomics are supported. */
359   if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
360     log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
361     free_mtdata( d );
362     return 0;
363   }
364 
365     for(iteration=0; iteration<10; iteration++) {
366         log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
367         err = add_index_bin_test(&numGlobalThreads,  queue,  context, d);
368         if (err) {
369             failed++;
370             break;
371         }
372         numGlobalThreads*=2;
373     }
374     free_mtdata( d );
375     return failed;
376 }
377 
378 
379