1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18 #ifndef _WIN32
19 #include <unistd.h>
20 #endif
21
22 #define INT_TEST_VALUE 402258822
23 #define LONG_TEST_VALUE 515154531254381446LL
24
25
26 const char *atomic_global_pattern[] = {
27 "__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
28 "{\n"
29 " int tid = get_global_id(0);\n"
30 "\n"
31 ,
32 "\n"
33 "}\n" };
34
35 const char *atomic_local_pattern[] = {
36 "__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
37 "{\n"
38 " int tid = get_global_id(0);\n"
39 " int dstItemIdx;\n"
40 "\n"
41 " // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
42 " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
43 " destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
44 " barrier( CLK_LOCAL_MEM_FENCE );\n"
45 "\n"
46 ,
47 " barrier( CLK_LOCAL_MEM_FENCE );\n"
48 " // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
49 " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
50 " finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
51 "}\n" };
52
53
54 #define TEST_COUNT 128 * 1024
55
56
57 struct TestFns
58 {
59 cl_int mIntStartValue;
60 cl_long mLongStartValue;
61
62 size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
63
64 // Integer versions
65 cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
66 void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
67 bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
68
69 // Long versions
70 cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
71 void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
72 bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
73
74 // Float versions
75 cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
76 void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
77 bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
78 };
79
check_atomic_support(cl_device_id device,bool extended,bool isLocal,ExplicitType dataType)80 bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
81 {
82 const char *extensionNames[8] = {
83 "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
84 "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics",
85 "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics",
86 "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one
87 };
88
89 size_t index = 0;
90 if( extended )
91 index += 1;
92 if( isLocal )
93 index += 2;
94
95 Version version = get_device_cl_version(device);
96
97 switch (dataType)
98 {
99 case kInt:
100 case kUInt:
101 if( version >= Version(1,1) )
102 return 1;
103 break;
104 case kLong:
105 case kULong:
106 index += 4;
107 break;
108 case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions
109 return version >= Version(1,1);
110 default:
111 log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType );
112 return 0;
113 }
114
115 return is_extension_available( device, extensionNames[index] );
116 }
117
test_atomic_function(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const char * programCore,TestFns testFns,bool extended,bool isLocal,ExplicitType dataType,bool matchGroupSize)118 int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
119 TestFns testFns,
120 bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
121 {
122 clProgramWrapper program;
123 clKernelWrapper kernel;
124 int error;
125 size_t threads[1];
126 clMemWrapper streams[2];
127 void *refValues, *startRefValues;
128 size_t threadSize, groupSize;
129 const char *programLines[4];
130 char pragma[ 512 ];
131 char programHeader[ 512 ];
132 MTdata d;
133 size_t typeSize = get_explicit_type_size( dataType );
134
135
136 // Verify we can run first
137 bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
138 if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
139 {
140 // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
141 if( dataType == kFloat )
142 log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
143 else if( !isUnsigned )
144 log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
145 // Since we don't support the operation, they implicitly pass
146 return 0;
147 }
148 else
149 {
150 if( dataType == kFloat )
151 log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
152 else
153 log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
154 (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
155 }
156
157 //// Set up the kernel code
158
159 // Create the pragma line for this kernel
160 bool isLong = ( dataType == kLong || dataType == kULong );
161 sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
162 isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
163 extended ? "extended" : "base" );
164
165 // Now create the program header
166 const char *typeName = get_explicit_type_name( dataType );
167 if( isLocal )
168 sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
169 else
170 sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
171
172 // Set up our entire program now
173 programLines[ 0 ] = pragma;
174 programLines[ 1 ] = programHeader;
175 programLines[ 2 ] = programCore;
176 programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
177
178 if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
179 {
180 return -1;
181 }
182
183 //// Set up to actually run
184 threadSize = num_elements;
185
186 error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
187 test_error( error, "Unable to get thread group max size" );
188
189 if( matchGroupSize )
190 // HACK because xchg and cmpxchg apparently are limited by hardware
191 threadSize = groupSize;
192
193 if( isLocal )
194 {
195 size_t maxSizes[3] = {0, 0, 0};
196 error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
197 test_error( error, "Unable to obtain max work item sizes for the device" );
198
199 size_t workSize;
200 error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
201 test_error( error, "Unable to obtain max work group size for device and kernel combo" );
202
203 // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
204 if( maxSizes[0] < workSize )
205 {
206 workSize = maxSizes[0];
207 }
208
209 threadSize = groupSize = workSize;
210 }
211
212
213 log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
214
215 refValues = (cl_int *)malloc( typeSize * threadSize );
216
217 if( testFns.GenerateRefsIntFn != NULL )
218 {
219 // We have a ref generator provided
220 d = init_genrand( gRandomSeed );
221 startRefValues = malloc( typeSize * threadSize );
222 if( typeSize == 4 )
223 testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
224 else
225 testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
226 free_mtdata(d);
227 d = NULL;
228 }
229 else
230 startRefValues = NULL;
231
232 // If we're given a num_results function, we need to determine how many result objects we need. If
233 // we don't have it, we assume it's just 1
234 size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
235
236 char * destItems = new char[ typeSize * numDestItems ];
237 if( destItems == NULL )
238 {
239 log_error( "ERROR: Unable to allocate memory!\n" );
240 return -1;
241 }
242 void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
243 for( size_t i = 0; i < numDestItems; i++ )
244 memcpy( destItems + i * typeSize, startValue, typeSize );
245
246 streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, destItems, NULL);
247 if (!streams[0])
248 {
249 log_error("ERROR: Creating output array failed!\n");
250 return -1;
251 }
252 streams[1] = clCreateBuffer(context, (cl_mem_flags)(( startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE )), typeSize * threadSize, startRefValues, NULL);
253 if (!streams[1])
254 {
255 log_error("ERROR: Creating reference array failed!\n");
256 return -1;
257 }
258
259 /* Set the arguments */
260 error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
261 test_error( error, "Unable to set indexed kernel arguments" );
262 error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
263 test_error( error, "Unable to set indexed kernel arguments" );
264
265 if( isLocal )
266 {
267 error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
268 test_error( error, "Unable to set indexed local kernel argument" );
269
270 cl_int numDestItemsInt = (cl_int)numDestItems;
271 error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
272 test_error( error, "Unable to set indexed kernel argument" );
273 }
274
275 /* Run the kernel */
276 threads[0] = threadSize;
277 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
278 test_error( error, "Unable to execute test kernel" );
279
280 error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
281 test_error( error, "Unable to read result value!" );
282
283 error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
284 test_error( error, "Unable to read reference values!" );
285
286 // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
287 // have one, it's because we're comparing ref values only
288 if( testFns.ExpectedValueIntFn != NULL )
289 {
290 for( size_t i = 0; i < numDestItems; i++ )
291 {
292 char expected[ 8 ];
293 cl_int intVal;
294 cl_long longVal;
295 if( typeSize == 4 )
296 {
297 // Int version
298 intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
299 memcpy( expected, &intVal, sizeof( intVal ) );
300 }
301 else
302 {
303 // Long version
304 longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
305 memcpy( expected, &longVal, sizeof( longVal ) );
306 }
307
308 if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
309 {
310 if( typeSize == 4 )
311 {
312 cl_int *outValue = (cl_int *)( destItems + i * typeSize );
313 log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
314 cl_int *startRefs = (cl_int *)startRefValues;
315 cl_int *refs = (cl_int *)refValues;
316 for( i = 0; i < threadSize; i++ )
317 {
318 if( startRefs != NULL )
319 log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
320 else
321 log_info( " --- %ld --- %d\n", i, refs[i] );
322 }
323 }
324 else
325 {
326 cl_long *outValue = (cl_long *)( destItems + i * typeSize );
327 log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
328 cl_long *startRefs = (cl_long *)startRefValues;
329 cl_long *refs = (cl_long *)refValues;
330 for( i = 0; i < threadSize; i++ )
331 {
332 if( startRefs != NULL )
333 log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
334 else
335 log_info( " --- %ld --- %lld\n", i, refs[i] );
336 }
337 }
338 return -1;
339 }
340 }
341 }
342
343 if( testFns.VerifyRefsIntFn != NULL )
344 {
345 /* Use the verify function to also check the results */
346 if( dataType == kFloat )
347 {
348 cl_float *outValue = (cl_float *)destItems;
349 if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
350 {
351 log_error( "ERROR: Reference values did not validate!\n" );
352 return -1;
353 }
354 }
355 else if( typeSize == 4 )
356 {
357 cl_int *outValue = (cl_int *)destItems;
358 if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
359 {
360 log_error( "ERROR: Reference values did not validate!\n" );
361 return -1;
362 }
363 }
364 else
365 {
366 cl_long *outValue = (cl_long *)destItems;
367 if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
368 {
369 log_error( "ERROR: Reference values did not validate!\n" );
370 return -1;
371 }
372 }
373 }
374 else if( testFns.ExpectedValueIntFn == NULL )
375 {
376 log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
377 return -1;
378 }
379
380
381 /* Re-write the starting value */
382 for( size_t i = 0; i < numDestItems; i++ )
383 memcpy( destItems + i * typeSize, startValue, typeSize );
384 error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
385 test_error( error, "Unable to write starting values!" );
386
387 /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
388 threads[0] = 1;
389 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
390 test_error( error, "Unable to execute test kernel" );
391
392 error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
393 test_error( error, "Unable to read reference values!" );
394
395 if( memcmp( refValues, destItems, typeSize ) != 0 )
396 {
397 if( typeSize == 4 )
398 {
399 cl_int *s = (cl_int *)destItems;
400 cl_int *r = (cl_int *)refValues;
401 log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
402 " (should have been %d, returned %d)!\n", *s, *r );
403 }
404 else
405 {
406 cl_long *s = (cl_long *)destItems;
407 cl_long *r = (cl_long *)refValues;
408 log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
409 " (should have been %lld, returned %lld)!\n", *s, *r );
410 }
411 return -1;
412 }
413
414 delete [] destItems;
415 free( refValues );
416 if( startRefValues != NULL )
417 free( startRefValues );
418
419 return 0;
420 }
421
test_atomic_function_set(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const char * programCore,TestFns testFns,bool extended,bool matchGroupSize,bool usingAtomicPrefix)422 int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
423 TestFns testFns,
424 bool extended, bool matchGroupSize, bool usingAtomicPrefix )
425 {
426 log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
427
428 int errors = 0;
429 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
430 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
431 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
432 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
433
434 // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
435 // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
436 if (!usingAtomicPrefix) {
437 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
438 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
439 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
440 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
441 }
442
443 return errors;
444 }
445
446 #pragma mark ---- add
447
448 const char atom_add_core[] =
449 " oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
450 " atom_add( &destMemory[0], tid + 3 );\n"
451 " atom_add( &destMemory[0], tid + 3 );\n"
452 " atom_add( &destMemory[0], tid + 3 );\n";
453
454 const char atomic_add_core[] =
455 " oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
456 " atomic_add( &destMemory[0], tid + 3 );\n"
457 " atomic_add( &destMemory[0], tid + 3 );\n"
458 " atomic_add( &destMemory[0], tid + 3 );\n";
459
test_atomic_add_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)460 cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
461 {
462 cl_int total = 0;
463 for( size_t i = 0; i < size; i++ )
464 total += ( (cl_int)i + 3 ) * 4;
465 return total;
466 }
467
test_atomic_add_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)468 cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
469 {
470 cl_long total = 0;
471 for( size_t i = 0; i < size; i++ )
472 total += ( ( i + 3 ) * 4 );
473 return total;
474 }
475
test_atomic_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)476 int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
477 {
478 TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
479
480 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
481 return -1;
482 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
483 return -1;
484 return 0;
485 }
486
487 #pragma mark ---- sub
488
489 const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
490
491 const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
492
test_atomic_sub_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)493 cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
494 {
495 cl_int total = INT_TEST_VALUE;
496 for( size_t i = 0; i < size; i++ )
497 total -= (cl_int)i + 3;
498 return total;
499 }
500
test_atomic_sub_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)501 cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
502 {
503 cl_long total = LONG_TEST_VALUE;
504 for( size_t i = 0; i < size; i++ )
505 total -= i + 3;
506 return total;
507 }
508
test_atomic_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)509 int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
510 {
511 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
512
513 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
514 return -1;
515 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
516 return -1;
517 return 0;
518 }
519
520 #pragma mark ---- xchg
521
522 const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
523
524 const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
525 const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
526
test_atomic_xchg_verify_int(size_t size,cl_int * refValues,cl_int finalValue)527 bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
528 {
529 /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
530 char *valids;
531 size_t i;
532 char originalValidCount = 0;
533
534 valids = (char *)malloc( sizeof( char ) * size );
535 memset( valids, 0, sizeof( char ) * size );
536
537 for( i = 0; i < size; i++ )
538 {
539 if( refValues[ i ] == INT_TEST_VALUE )
540 {
541 // Special initial value
542 originalValidCount++;
543 continue;
544 }
545 if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
546 {
547 log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
548 return false;
549 }
550 valids[ refValues[ i ] ] ++;
551 }
552
553 /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
554 the final value outputted */
555 if( valids[ finalValue ] > 0 )
556 {
557 log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
558 return false;
559 }
560 else
561 valids[ finalValue ] = 1; // So the following loop will be okay
562
563 /* Now check that every entry has one and only one count */
564 if( originalValidCount != 1 )
565 {
566 log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
567 return false;
568 }
569 for( i = 0; i < size; i++ )
570 {
571 if( valids[ i ] != 1 )
572 {
573 log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
574 for( size_t j = 0; j < size; j++ )
575 log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
576 return false;
577 }
578 }
579
580 free( valids );
581 return true;
582 }
583
test_atomic_xchg_verify_long(size_t size,cl_long * refValues,cl_long finalValue)584 bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
585 {
586 /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
587 char *valids;
588 size_t i;
589 char originalValidCount = 0;
590
591 valids = (char *)malloc( sizeof( char ) * size );
592 memset( valids, 0, sizeof( char ) * size );
593
594 for( i = 0; i < size; i++ )
595 {
596 if( refValues[ i ] == LONG_TEST_VALUE )
597 {
598 // Special initial value
599 originalValidCount++;
600 continue;
601 }
602 if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
603 {
604 log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
605 return false;
606 }
607 valids[ refValues[ i ] ] ++;
608 }
609
610 /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
611 the final value outputted */
612 if( valids[ finalValue ] > 0 )
613 {
614 log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
615 return false;
616 }
617 else
618 valids[ finalValue ] = 1; // So the following loop will be okay
619
620 /* Now check that every entry has one and only one count */
621 if( originalValidCount != 1 )
622 {
623 log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
624 return false;
625 }
626 for( i = 0; i < size; i++ )
627 {
628 if( valids[ i ] != 1 )
629 {
630 log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
631 for( size_t j = 0; j < size; j++ )
632 log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
633 return false;
634 }
635 }
636
637 free( valids );
638 return true;
639 }
640
test_atomic_xchg_verify_float(size_t size,cl_float * refValues,cl_float finalValue)641 bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
642 {
643 /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
644 char *valids;
645 size_t i;
646 char originalValidCount = 0;
647
648 valids = (char *)malloc( sizeof( char ) * size );
649 memset( valids, 0, sizeof( char ) * size );
650
651 for( i = 0; i < size; i++ )
652 {
653 cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
654 if( *intRefValue == INT_TEST_VALUE )
655 {
656 // Special initial value
657 originalValidCount++;
658 continue;
659 }
660 if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
661 {
662 log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
663 return false;
664 }
665 valids[ (int)refValues[ i ] ] ++;
666 }
667
668 /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
669 the final value outputted */
670 if( valids[ (int)finalValue ] > 0 )
671 {
672 log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
673 return false;
674 }
675 else
676 valids[ (int)finalValue ] = 1; // So the following loop will be okay
677
678 /* Now check that every entry has one and only one count */
679 if( originalValidCount != 1 )
680 {
681 log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
682 return false;
683 }
684 for( i = 0; i < size; i++ )
685 {
686 if( valids[ i ] != 1 )
687 {
688 log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
689 for( size_t j = 0; j < size; j++ )
690 log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
691 return false;
692 }
693 }
694
695 free( valids );
696 return true;
697 }
698
test_atomic_xchg(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)699 int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
700 {
701 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
702
703 int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false );
704 errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true );
705
706 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
707 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
708
709 return errors;
710 }
711
712
713 #pragma mark ---- min
714
715 const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
716
717 const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
718
test_atomic_min_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)719 cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
720 {
721 cl_int total = 0x7fffffffL;
722 for( size_t i = 0; i < size; i++ )
723 {
724 if( startRefValues[ i ] < total )
725 total = startRefValues[ i ];
726 }
727 return total;
728 }
729
test_atomic_min_gen_int(size_t size,cl_int * startRefValues,MTdata d)730 void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
731 {
732 for( size_t i = 0; i < size; i++ )
733 startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
734 }
735
test_atomic_min_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)736 cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
737 {
738 cl_long total = 0x7fffffffffffffffLL;
739 for( size_t i = 0; i < size; i++ )
740 {
741 if( startRefValues[ i ] < total )
742 total = startRefValues[ i ];
743 }
744 return total;
745 }
746
test_atomic_min_gen_long(size_t size,cl_long * startRefValues,MTdata d)747 void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
748 {
749 for( size_t i = 0; i < size; i++ )
750 startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
751 }
752
test_atomic_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)753 int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
754 {
755 TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
756
757 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
758 return -1;
759 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
760 return -1;
761 return 0;
762 }
763
764
765 #pragma mark ---- max
766
767 const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
768
769 const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
770
test_atomic_max_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)771 cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
772 {
773 cl_int total = 0;
774 for( size_t i = 0; i < size; i++ )
775 {
776 if( startRefValues[ i ] > total )
777 total = startRefValues[ i ];
778 }
779 return total;
780 }
781
test_atomic_max_gen_int(size_t size,cl_int * startRefValues,MTdata d)782 void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
783 {
784 for( size_t i = 0; i < size; i++ )
785 startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
786 }
787
test_atomic_max_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)788 cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
789 {
790 cl_long total = 0;
791 for( size_t i = 0; i < size; i++ )
792 {
793 if( startRefValues[ i ] > total )
794 total = startRefValues[ i ];
795 }
796 return total;
797 }
798
test_atomic_max_gen_long(size_t size,cl_long * startRefValues,MTdata d)799 void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
800 {
801 for( size_t i = 0; i < size; i++ )
802 startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
803 }
804
test_atomic_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)805 int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
806 {
807 TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
808
809 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
810 return -1;
811 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
812 return -1;
813 return 0;
814 }
815
816
817 #pragma mark ---- inc
818
819 const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n";
820
821 const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
822
test_atomic_inc_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)823 cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
824 {
825 return INT_TEST_VALUE + (cl_int)size;
826 }
827
test_atomic_inc_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)828 cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
829 {
830 return LONG_TEST_VALUE + size;
831 }
832
test_atomic_inc(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)833 int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
834 {
835 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
836
837 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
838 return -1;
839 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
840 return -1;
841 return 0;
842 }
843
844
845 #pragma mark ---- dec
846
847 const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n";
848
849 const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
850
test_atomic_dec_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)851 cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
852 {
853 return INT_TEST_VALUE - (cl_int)size;
854 }
855
test_atomic_dec_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)856 cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
857 {
858 return LONG_TEST_VALUE - size;
859 }
860
test_atomic_dec(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)861 int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
862 {
863 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
864
865 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
866 return -1;
867 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
868 return -1;
869 return 0;
870 }
871
872
873 #pragma mark ---- cmpxchg
874
875 /* We test cmpxchg by implementing (the long way) atom_add */
876 const char atom_cmpxchg_core[] =
877 " int oldValue, origValue, newValue;\n"
878 " do { \n"
879 " origValue = destMemory[0];\n"
880 " newValue = origValue + tid + 2;\n"
881 " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
882 " } while( oldValue != origValue );\n"
883 " oldValues[tid] = oldValue;\n"
884 ;
885
886 const char atom_cmpxchg64_core[] =
887 " long oldValue, origValue, newValue;\n"
888 " do { \n"
889 " origValue = destMemory[0];\n"
890 " newValue = origValue + tid + 2;\n"
891 " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
892 " } while( oldValue != origValue );\n"
893 " oldValues[tid] = oldValue;\n"
894 ;
895
896 const char atomic_cmpxchg_core[] =
897 " int oldValue, origValue, newValue;\n"
898 " do { \n"
899 " origValue = destMemory[0];\n"
900 " newValue = origValue + tid + 2;\n"
901 " oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
902 " } while( oldValue != origValue );\n"
903 " oldValues[tid] = oldValue;\n"
904 ;
905
test_atomic_cmpxchg_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)906 cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
907 {
908 cl_int total = INT_TEST_VALUE;
909 for( size_t i = 0; i < size; i++ )
910 total += (cl_int)i + 2;
911 return total;
912 }
913
test_atomic_cmpxchg_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)914 cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
915 {
916 cl_long total = LONG_TEST_VALUE;
917 for( size_t i = 0; i < size; i++ )
918 total += i + 2;
919 return total;
920 }
921
test_atomic_cmpxchg(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)922 int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
923 {
924 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
925
926 int errors = 0;
927
928 log_info(" Testing atom_ functions...\n");
929 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
930 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
931 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
932 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
933
934 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
935 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
936 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
937 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
938
939 log_info(" Testing atomic_ functions...\n");
940 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
941 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
942 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
943 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
944
945 if( errors )
946 return -1;
947
948 return 0;
949 }
950
951 #pragma mark -------- Bitwise functions
952
test_bitwise_num_results(size_t threadCount,ExplicitType dataType)953 size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
954 {
955 size_t numBits = get_explicit_type_size( dataType ) * 8;
956
957 return ( threadCount + numBits - 1 ) / numBits;
958 }
959
960 #pragma mark ---- and
961
962 const char atom_and_core[] =
963 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
964 " int whichResult = tid / numBits;\n"
965 " int bitIndex = tid - ( whichResult * numBits );\n"
966 "\n"
967 " oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
968 ;
969
970 const char atomic_and_core[] =
971 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
972 " int whichResult = tid / numBits;\n"
973 " int bitIndex = tid - ( whichResult * numBits );\n"
974 "\n"
975 " oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
976 ;
977
978
test_atomic_and_result_int(size_t size,cl_int * startRefValues,size_t whichResult)979 cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
980 {
981 size_t numThreads = ( (size_t)size + 31 ) / 32;
982 if( whichResult < numThreads - 1 )
983 return 0;
984
985 // Last item doesn't get and'ed on every bit, so we have to mask away
986 size_t numBits = (size_t)size - whichResult * 32;
987 cl_int bits = (cl_int)0xffffffffL;
988 for( size_t i = 0; i < numBits; i++ )
989 bits &= ~( 1 << i );
990
991 return bits;
992 }
993
test_atomic_and_result_long(size_t size,cl_long * startRefValues,size_t whichResult)994 cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
995 {
996 size_t numThreads = ( (size_t)size + 63 ) / 64;
997 if( whichResult < numThreads - 1 )
998 return 0;
999
1000 // Last item doesn't get and'ed on every bit, so we have to mask away
1001 size_t numBits = (size_t)size - whichResult * 64;
1002 cl_long bits = (cl_long)0xffffffffffffffffLL;
1003 for( size_t i = 0; i < numBits; i++ )
1004 bits &= ~( 1 << i );
1005
1006 return bits;
1007 }
1008
test_atomic_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1009 int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1010 {
1011 TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
1012 test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
1013
1014 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
1015 return -1;
1016 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
1017 return -1;
1018 return 0;
1019 }
1020
1021
1022 #pragma mark ---- or
1023
1024 const char atom_or_core[] =
1025 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1026 " int whichResult = tid / numBits;\n"
1027 " int bitIndex = tid - ( whichResult * numBits );\n"
1028 "\n"
1029 " oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
1030 ;
1031
1032 const char atomic_or_core[] =
1033 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1034 " int whichResult = tid / numBits;\n"
1035 " int bitIndex = tid - ( whichResult * numBits );\n"
1036 "\n"
1037 " oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
1038 ;
1039
test_atomic_or_result_int(size_t size,cl_int * startRefValues,size_t whichResult)1040 cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
1041 {
1042 size_t numThreads = ( (size_t)size + 31 ) / 32;
1043 if( whichResult < numThreads - 1 )
1044 return 0xffffffff;
1045
1046 // Last item doesn't get and'ed on every bit, so we have to mask away
1047 size_t numBits = (size_t)size - whichResult * 32;
1048 cl_int bits = 0;
1049 for( size_t i = 0; i < numBits; i++ )
1050 bits |= ( 1 << i );
1051
1052 return bits;
1053 }
1054
test_atomic_or_result_long(size_t size,cl_long * startRefValues,size_t whichResult)1055 cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
1056 {
1057 size_t numThreads = ( (size_t)size + 63 ) / 64;
1058 if( whichResult < numThreads - 1 )
1059 return 0x0ffffffffffffffffLL;
1060
1061 // Last item doesn't get and'ed on every bit, so we have to mask away
1062 size_t numBits = (size_t)size - whichResult * 64;
1063 cl_long bits = 0;
1064 for( size_t i = 0; i < numBits; i++ )
1065 bits |= ( 1LL << i );
1066
1067 return bits;
1068 }
1069
test_atomic_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1070 int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1071 {
1072 TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
1073
1074 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
1075 return -1;
1076 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
1077 return -1;
1078 return 0;
1079 }
1080
1081
1082 #pragma mark ---- xor
1083
1084 const char atom_xor_core[] =
1085 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1086 " int bitIndex = tid & ( numBits - 1 );\n"
1087 "\n"
1088 " oldValues[tid] = atom_xor( &destMemory[0], 1 << bitIndex );\n"
1089 ;
1090
1091 const char atomic_xor_core[] =
1092 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1093 " int bitIndex = tid & ( numBits - 1 );\n"
1094 "\n"
1095 " oldValues[tid] = atomic_xor( &destMemory[0], 1 << bitIndex );\n"
1096 ;
1097
test_atomic_xor_result_int(size_t size,cl_int * startRefValues,size_t whichResult)1098 cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
1099 {
1100 cl_int total = 0x2f08ab41;
1101 for( size_t i = 0; i < size; i++ )
1102 total ^= ( 1 << ( i & 31 ) );
1103 return total;
1104 }
1105
test_atomic_xor_result_long(size_t size,cl_long * startRefValues,size_t whichResult)1106 cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
1107 {
1108 cl_long total = 0x2f08ab418ba0541LL;
1109 for( size_t i = 0; i < size; i++ )
1110 total ^= ( 1LL << ( i & 63 ) );
1111 return total;
1112 }
1113
test_atomic_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1114 int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1115 {
1116 TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
1117
1118 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
1119 return -1;
1120 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
1121 return -1;
1122 return 0;
1123 }
1124
1125
1126
1127
1128