1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18 
19 #include <algorithm>
20 
21 #define TEST_SIZE 512
22 
23 const char *singleParamIntegerKernelSourcePattern =
24 "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
25 "{\n"
26 "    int  tid = get_global_id(0);\n"
27 "    %s%s tmp = vload%s( tid, destValues );\n"
28 "    tmp %s= %s( vload%s( tid, sourceA ) );\n"
29 "    vstore%s( tmp, tid, destValues );\n"
30 "\n"
31 "}\n";
32 
33 const char *singleParamSingleSizeIntegerKernelSourcePattern =
34 "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
35 "{\n"
36 "    int  tid = get_global_id(0);\n"
37 "    destValues[tid] %s= %s( sourceA[tid] );\n"
38 "}\n";
39 
40 typedef bool (*singleParamIntegerVerifyFn)( void *source, void *destination, ExplicitType vecType );
41 static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType );
42 bool verify_integer_divideAssign( void *source, void *destination, ExplicitType vecType );
43 bool verify_integer_moduloAssign( void *source, void *destination, ExplicitType vecType );
44 
test_single_param_integer_kernel(cl_command_queue queue,cl_context context,const char * fnName,ExplicitType vecType,size_t vecSize,singleParamIntegerVerifyFn verifyFn,MTdata d,bool useOpKernel=false)45 int test_single_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
46                                   ExplicitType vecType, size_t vecSize, singleParamIntegerVerifyFn verifyFn,
47                                      MTdata d, bool useOpKernel = false )
48 {
49     clProgramWrapper program;
50     clKernelWrapper kernel;
51     clMemWrapper streams[2];
52     cl_long inDataA[TEST_SIZE * 16], outData[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], expected;
53     int error, i;
54     size_t threads[1], localThreads[1];
55     char kernelSource[10240];
56     char *programPtr;
57     char sizeName[4];
58 
59     if (! gHasLong && strstr(get_explicit_type_name(vecType),"long"))
60     {
61        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecType) );
62        return CL_SUCCESS;
63     }
64 
65     /* Create the source */
66     if( vecSize == 1 )
67         sizeName[ 0 ] = 0;
68     else
69         sprintf( sizeName, "%d", (int)vecSize );
70 
71     if( vecSize == 1 )
72         sprintf( kernelSource, singleParamSingleSizeIntegerKernelSourcePattern,
73                 get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
74                 useOpKernel ? fnName : "", useOpKernel ? "" : fnName );
75     else
76         sprintf( kernelSource, singleParamIntegerKernelSourcePattern,
77                 get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
78                 get_explicit_type_name( vecType ), sizeName, sizeName,
79                 useOpKernel ? fnName : "", useOpKernel ? "" : fnName, sizeName,
80                 sizeName );
81 
82     /* Create kernels */
83     programPtr = kernelSource;
84     if (create_single_kernel_helper(context, &program, &kernel, 1,
85                                     (const char **)&programPtr, "sample_test"))
86     {
87         log_error("The program we attempted to compile was: \n%s\n", kernelSource);
88         return -1;
89     }
90 
91     /* Generate some streams */
92     generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataA );
93 
94     streams[0] = clCreateBuffer(
95         context, CL_MEM_COPY_HOST_PTR,
96         get_explicit_type_size(vecType) * vecSize * TEST_SIZE, inDataA, NULL);
97     if( streams[0] == NULL )
98     {
99         log_error("ERROR: Creating input array A failed!\n");
100         return -1;
101     }
102 
103     if( useOpKernel )
104     {
105         // Op kernels use an r/w buffer for the second param, so we need to init it with data
106         generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataB );
107     }
108     streams[1] = clCreateBuffer(
109         context, (CL_MEM_READ_WRITE | (useOpKernel ? CL_MEM_COPY_HOST_PTR : 0)),
110         get_explicit_type_size(vecType) * vecSize * TEST_SIZE,
111         (useOpKernel) ? &inDataB : NULL, NULL);
112     if( streams[1] == NULL )
113     {
114         log_error("ERROR: Creating output array failed!\n");
115         return -1;
116     }
117 
118     /* Assign streams and execute */
119     error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
120     test_error( error, "Unable to set indexed kernel arguments" );
121     error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
122     test_error( error, "Unable to set indexed kernel arguments" );
123 
124     /* Run the kernel */
125     threads[0] = TEST_SIZE;
126 
127     error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
128     test_error( error, "Unable to get work group size to use" );
129 
130     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
131     test_error( error, "Unable to execute test kernel" );
132 
133     memset(outData, 0xFF, get_explicit_type_size( vecType ) * TEST_SIZE * vecSize );
134 
135     /* Now get the results */
136     error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0,
137                                  get_explicit_type_size( vecType ) * TEST_SIZE * vecSize,
138                                  outData, 0, NULL, NULL );
139     test_error( error, "Unable to read output array!" );
140 
141     // deal with division by 0 -- any answer is allowed here
142     if( verifyFn == verify_integer_divideAssign || verifyFn == verify_integer_moduloAssign )
143         patchup_divide_results( outData, inDataA, inDataB, TEST_SIZE * vecSize, vecType );
144 
145     /* And verify! */
146     char *p = (char *)outData;
147     char *in = (char *)inDataA;
148     char *in2 = (char *)inDataB;
149     for( i = 0; i < (int)TEST_SIZE; i++ )
150     {
151         for( size_t j = 0; j < vecSize; j++ )
152         {
153             if( useOpKernel )
154                 memcpy( &expected, in2, get_explicit_type_size( vecType ) );
155 
156             verifyFn( in, &expected, vecType );
157             if( memcmp( &expected, p, get_explicit_type_size( vecType ) ) != 0 )
158             {
159                 switch( get_explicit_type_size( vecType ))
160                 {
161                     case 1:
162                         if( useOpKernel )
163                             log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x)\n",
164                                       (int)i, (int)j,
165                                       ((cl_uchar*)&expected)[0],
166                                       *( (cl_uchar *)p ),
167                                       *( (cl_uchar *)in ),
168                                       *( (cl_uchar *)in2 ) );
169                         else
170                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x)\n",
171                                   (int)i, (int)j,
172                                    ((cl_uchar*)&expected)[0],
173                                    *( (cl_uchar *)p ),
174                                    *( (cl_uchar *)in ) );
175                         break;
176 
177                     case 2:
178                         if( useOpKernel )
179                             log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x)\n",
180                                       (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
181                                       *( (cl_ushort *)in ), *( (cl_ushort *)in2 ) );
182                         else
183                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x)\n",
184                                   (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
185                                             *( (cl_ushort *)in ) );
186                         break;
187 
188                     case 4:
189                         if( useOpKernel )
190                             log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
191                                       (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
192                                       *( (cl_uint *)in ), *( (cl_uint *)in2 ) );
193                         else
194                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x)\n",
195                                   (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
196                                             *( (cl_uint *)in ) );
197                         break;
198 
199                     case 8:
200                         if( useOpKernel )
201                             log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
202                                       (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
203                                       *( (cl_ulong *)in ), *( (cl_ulong *)in2 ) );
204                         else
205                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx)\n",
206                                   (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
207                                             *( (cl_ulong *)in ) );
208                         break;
209                 }
210                 return -1;
211             }
212             p += get_explicit_type_size( vecType );
213             in += get_explicit_type_size( vecType );
214             in2 += get_explicit_type_size( vecType );
215         }
216     }
217 
218     return 0;
219 }
220 
test_single_param_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,singleParamIntegerVerifyFn verifyFn,bool useOpKernel=false)221 int test_single_param_integer_fn( cl_command_queue queue, cl_context context, const char *fnName, singleParamIntegerVerifyFn verifyFn, bool useOpKernel = false )
222 {
223     ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
224     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO 3 not tested
225     unsigned int index, typeIndex;
226     int retVal = 0;
227     RandomSeed seed(gRandomSeed );
228 
229     for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
230     {
231         if ((types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
232             continue;
233 
234         for( index = 0; vecSizes[ index ] != 0; index++ )
235         {
236             if( test_single_param_integer_kernel(queue, context, fnName, types[ typeIndex ], vecSizes[ index ], verifyFn, seed, useOpKernel ) != 0 )
237             {
238                 log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
239                 retVal = -1;
240             }
241         }
242     }
243 
244     return retVal;
245 }
246 
verify_integer_clz(void * source,void * destination,ExplicitType vecType)247 bool verify_integer_clz( void *source, void *destination, ExplicitType vecType )
248 {
249     cl_long testValue;
250     int count;
251     int typeBits;
252 
253     switch( vecType )
254     {
255         case kChar:
256             testValue = *( (cl_char *)source );
257             typeBits = 8 * sizeof( cl_char );
258             break;
259         case kUChar:
260             testValue = *( (cl_uchar *)source );
261             typeBits = 8 * sizeof( cl_uchar );
262             break;
263         case kShort:
264             testValue = *( (cl_short *)source );
265             typeBits = 8 * sizeof( cl_short );
266             break;
267         case kUShort:
268             testValue = *( (cl_ushort *)source );
269             typeBits = 8 * sizeof( cl_ushort );
270             break;
271         case kInt:
272             testValue = *( (cl_int *)source );
273             typeBits = 8 * sizeof( cl_int );
274             break;
275         case kUInt:
276             testValue = *( (cl_uint *)source );
277             typeBits = 8 * sizeof( cl_uint );
278             break;
279         case kLong:
280             testValue = *( (cl_long *)source );
281             typeBits = 8 * sizeof( cl_long );
282             break;
283         case kULong:
284             // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting
285             testValue = *( (cl_ulong *)source );
286             typeBits = 8 * sizeof( cl_ulong );
287             break;
288         default:
289             // Should never happen
290             return false;
291     }
292 
293     count = typeBits;
294     if( testValue )
295     {
296         testValue <<= 8 * sizeof( testValue ) - typeBits;
297         for( count = 0; 0 == (testValue & CL_LONG_MIN); count++ )
298             testValue <<= 1;
299     }
300 
301     switch( vecType )
302     {
303         case kChar:
304             *( (cl_char *)destination ) = count;
305             break;
306         case kUChar:
307             *( (cl_uchar *)destination ) = count;
308             break;
309         case kShort:
310             *( (cl_short *)destination ) = count;
311             break;
312         case kUShort:
313             *( (cl_ushort *)destination ) = count;
314             break;
315         case kInt:
316             *( (cl_int *)destination ) = count;
317             break;
318         case kUInt:
319             *( (cl_uint *)destination ) = count;
320             break;
321         case kLong:
322             *( (cl_long *)destination ) = count;
323             break;
324         case kULong:
325             *( (cl_ulong *)destination ) = count;
326             break;
327         default:
328             // Should never happen
329             return false;
330     }
331     return true;
332 }
333 
test_integer_clz(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)334 int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
335 {
336     return test_single_param_integer_fn( queue, context, "clz", verify_integer_clz );
337 }
338 
339 
verify_integer_ctz(void * source,void * destination,ExplicitType vecType)340 bool verify_integer_ctz( void *source, void *destination, ExplicitType vecType )
341 {
342   cl_long testValue;
343   int count;
344   int typeBits;
345 
346   switch( vecType )
347   {
348   case kChar:
349     testValue = *( (cl_char *)source );
350     typeBits = 8 * sizeof( cl_char );
351     break;
352   case kUChar:
353     testValue = *( (cl_uchar *)source );
354     typeBits = 8 * sizeof( cl_uchar );
355     break;
356   case kShort:
357     testValue = *( (cl_short *)source );
358     typeBits = 8 * sizeof( cl_short );
359     break;
360   case kUShort:
361     testValue = *( (cl_ushort *)source );
362     typeBits = 8 * sizeof( cl_ushort );
363     break;
364   case kInt:
365     testValue = *( (cl_int *)source );
366     typeBits = 8 * sizeof( cl_int );
367     break;
368   case kUInt:
369     testValue = *( (cl_uint *)source );
370     typeBits = 8 * sizeof( cl_uint );
371     break;
372   case kLong:
373     testValue = *( (cl_long *)source );
374     typeBits = 8 * sizeof( cl_long );
375     break;
376   case kULong:
377     // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting
378     testValue = *( (cl_ulong *)source );
379     typeBits = 8 * sizeof( cl_ulong );
380     break;
381   default:
382     // Should never happen
383     return false;
384   }
385 
386   if ( testValue == 0 )
387     count = typeBits;
388   else
389   {
390     for( count = 0; (0 == (testValue & 0x1)); count++ )
391       testValue >>= 1;
392   }
393 
394   switch( vecType )
395   {
396   case kChar:
397     *( (cl_char *)destination ) = count;
398     break;
399   case kUChar:
400     *( (cl_uchar *)destination ) = count;
401     break;
402   case kShort:
403     *( (cl_short *)destination ) = count;
404     break;
405   case kUShort:
406     *( (cl_ushort *)destination ) = count;
407     break;
408   case kInt:
409     *( (cl_int *)destination ) = count;
410     break;
411   case kUInt:
412     *( (cl_uint *)destination ) = count;
413     break;
414   case kLong:
415     *( (cl_long *)destination ) = count;
416     break;
417   case kULong:
418     *( (cl_ulong *)destination ) = count;
419     break;
420   default:
421     // Should never happen
422     return false;
423   }
424   return true;
425 }
426 
427 
test_integer_ctz(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)428 int test_integer_ctz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
429 {
430   return test_single_param_integer_fn( queue, context, "ctz", verify_integer_ctz );
431 }
432 
433 #define OP_CASE( op, sizeName, size ) \
434     case sizeName: \
435     {    \
436         cl_##size *d = (cl_##size *)destination; \
437         *d op##= *( (cl_##size *)source ); \
438         break; \
439     }
440 
441 #define OP_CASES( op ) \
442     switch( vecType ) \
443     {                    \
444         OP_CASE( op, kChar, char ) \
445         OP_CASE( op, kUChar, uchar ) \
446         OP_CASE( op, kShort, short ) \
447         OP_CASE( op, kUShort, ushort ) \
448         OP_CASE( op, kInt, int ) \
449         OP_CASE( op, kUInt, uint ) \
450         OP_CASE( op, kLong, long ) \
451         OP_CASE( op, kULong, ulong ) \
452         default: \
453             break; \
454     }
455 
456 #define OP_TEST( op, opName ) \
457     bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType )    \
458     {    \
459         OP_CASES( op )    \
460         return true; \
461     }    \
462     int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)    \
463     {    \
464         return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
465     }
466 
467 OP_TEST( +, add )
468 OP_TEST( -, subtract )
469 OP_TEST( *, multiply )
470 OP_TEST( ^, exclusiveOr )
471 OP_TEST( |, or )
472 OP_TEST( &, and )
473 
474 #define OP_CASE_GUARD( op, sizeName, size ) \
475     case sizeName: \
476     {    \
477         cl_##size *d = (cl_##size *)destination; \
478         cl_##size *s = (cl_##size *)source;     \
479         if( *s == 0 )                           \
480             *d = -1;                            \
481         else                                    \
482             *d op##= *s;                        \
483         break; \
484     }
485 
486 #define OP_CASE_GUARD_SIGNED( op, sizeName, size, MIN_VAL ) \
487     case sizeName: \
488     {    \
489         cl_##size *d = (cl_##size *)destination; \
490         cl_##size *s = (cl_##size *)source;     \
491         if( *s == 0 || (*d == MIN_VAL && *s == -1))  \
492             *d = -1 - MIN_VAL;                  \
493         else                                    \
494             *d op##= *s;                        \
495         break; \
496     }
497 
498 #define OP_CASES_GUARD( op ) \
499     switch( vecType ) \
500     {                    \
501         OP_CASE_GUARD_SIGNED( op, kChar, char, CL_CHAR_MIN ) \
502         OP_CASE_GUARD( op, kUChar, uchar ) \
503         OP_CASE_GUARD_SIGNED( op, kShort, short, CL_SHRT_MIN ) \
504         OP_CASE_GUARD( op, kUShort, ushort ) \
505         OP_CASE_GUARD_SIGNED( op, kInt, int, CL_INT_MIN ) \
506         OP_CASE_GUARD( op, kUInt, uint ) \
507         OP_CASE_GUARD_SIGNED( op, kLong, long, CL_LONG_MIN ) \
508         OP_CASE_GUARD( op, kULong, ulong ) \
509         default: \
510             break; \
511     }
512 
513 #define OP_TEST_GUARD( op, opName ) \
514     bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType )    \
515     {    \
516         OP_CASES_GUARD( op )    \
517         return true;            \
518     }    \
519     int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)    \
520     {    \
521         return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
522     }
523 
524 OP_TEST_GUARD( /, divide )
525 OP_TEST_GUARD( %, modulo )
526 
527 #define PATCH_CASE( _out, _src, _dest, _count, _cl_type )           \
528     {                                                               \
529         const _cl_type *denom = (const _cl_type* ) _src;            \
530         _cl_type *result = (_cl_type* ) _out;                       \
531         for( size_t i = 0; i < _count; i++ )                        \
532             if( denom[i] == 0 )                                     \
533                 result[i] = (_cl_type) -1;                          \
534     }
535 
536 #define PATCH_CASE_SIGNED( _out, _src, _dest, _count, _cl_type, _MIN_VAL )      \
537     {                                                                           \
538         const _cl_type *num = (const _cl_type* ) _dest;                         \
539         const _cl_type *denom = (const _cl_type* ) _src;                        \
540         _cl_type *result = (_cl_type* ) _out;                                   \
541         for( size_t i = 0; i < _count; i++ )                                    \
542             if( denom[i] == 0 || ( num[i] == _MIN_VAL && denom[i] == -1))       \
543                 result[i] = -1 - _MIN_VAL;                                      \
544     }
545 
patchup_divide_results(void * outData,const void * inDataA,const void * inDataB,size_t count,ExplicitType vecType)546 static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType )
547 {
548     switch( vecType )
549     {
550         case kChar:
551             PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_char, CL_CHAR_MIN )
552             break;
553         case kUChar:
554             PATCH_CASE( outData, inDataA, inDataB, count, cl_uchar )
555             break;
556         case kShort:
557             PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_short, CL_SHRT_MIN )
558             break;
559         case kUShort:
560             PATCH_CASE( outData, inDataA, inDataB, count, cl_ushort )
561             break;
562         case kInt:
563             PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_int, CL_INT_MIN )
564             break;
565         case kUInt:
566             PATCH_CASE( outData, inDataA, inDataB, count, cl_uint )
567             break;
568         case kLong:
569             PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_long, CL_LONG_MIN )
570             break;
571         case kULong:
572             PATCH_CASE( outData, inDataA, inDataB, count, cl_ulong )
573             break;
574         default:
575             log_error( "ERROR: internal test error -- unknown data type %d\n", vecType );
576             break;
577     }
578 }
579 
580 const char *twoParamIntegerKernelSourcePattern =
581 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *destValues)\n"
582 "{\n"
583 "    int  tid = get_global_id(0);\n"
584 "    %s%s sA = %s;\n"
585 "    %s%s sB = %s;\n"
586 "    %s%s dst = %s( sA, sB );\n"
587 "     %s;\n"
588 "\n"
589 "}\n";
590 
591 typedef bool (*twoParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *destination, ExplicitType vecType );
592 
build_load_statement(char * outString,size_t vecSize,const char * name)593 static char * build_load_statement( char *outString, size_t vecSize, const char *name )
594 {
595     if( vecSize != 3 )
596         sprintf( outString, "%s[ tid ]", name );
597     else
598         sprintf( outString, "vload3( tid, %s )", name );
599     return outString;
600 }
601 
build_store_statement(char * outString,size_t vecSize,const char * name,const char * srcName)602 static char * build_store_statement( char *outString, size_t vecSize, const char *name, const char *srcName )
603 {
604     if( vecSize != 3 )
605         sprintf( outString, "%s[ tid ] = %s", name, srcName );
606     else
607         sprintf( outString, "vstore3( %s, tid, %s )", srcName, name );
608     return outString;
609 }
610 
test_two_param_integer_kernel(cl_command_queue queue,cl_context context,const char * fnName,ExplicitType vecAType,ExplicitType vecBType,unsigned int vecSize,twoParamIntegerVerifyFn verifyFn,MTdata d)611 int test_two_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
612                                      ExplicitType vecAType, ExplicitType vecBType, unsigned int vecSize, twoParamIntegerVerifyFn verifyFn, MTdata d )
613 {
614     clProgramWrapper program;
615     clKernelWrapper kernel;
616     clMemWrapper streams[3];
617     cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
618     int error, i;
619     size_t threads[1], localThreads[1];
620     char kernelSource[10240];
621     char *programPtr;
622     char sizeName[4], paramSizeName[4];
623 
624     // embedded profiles don't support long/ulong datatypes
625     if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
626     {
627        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
628        return CL_SUCCESS;
629     }
630 
631     /* Create the source */
632     if( vecSize == 1 )
633         sizeName[ 0 ] = 0;
634     else
635         sprintf( sizeName, "%d", vecSize );
636     if( ( vecSize == 1 ) || ( vecSize == 3 ) )
637         paramSizeName[ 0 ] = 0;
638         else
639         sprintf( paramSizeName, "%d", vecSize );
640 
641     char sourceALoad[ 128 ], sourceBLoad[ 128 ], destStore[ 128 ];
642 
643     sprintf( kernelSource, twoParamIntegerKernelSourcePattern,
644                 get_explicit_type_name( vecAType ), paramSizeName,
645                 get_explicit_type_name( vecBType ), paramSizeName,
646                 get_explicit_type_name( vecAType ), paramSizeName,
647                 get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
648                 get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
649                 get_explicit_type_name( vecAType ), sizeName,
650                 fnName,
651                 build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
652                 );
653 
654     /* Create kernels */
655     programPtr = kernelSource;
656     if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
657     {
658         log_error("The program we attempted to compile was: \n%s\n", kernelSource);
659         return -1;
660     }
661 
662     /* Generate some streams */
663     generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
664     generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
665 
666     streams[0] = clCreateBuffer(
667         context, CL_MEM_COPY_HOST_PTR,
668         get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
669     if( streams[0] == NULL )
670     {
671         log_error("ERROR: Creating input array A failed!\n");
672         return -1;
673     }
674     streams[1] = clCreateBuffer(
675         context, CL_MEM_COPY_HOST_PTR,
676         get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
677     if( streams[1] == NULL )
678     {
679         log_error("ERROR: Creating input array B failed!\n");
680         return -1;
681     }
682     streams[2] = clCreateBuffer(
683         context, CL_MEM_READ_WRITE,
684         get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, NULL, NULL);
685     if( streams[2] == NULL )
686     {
687         log_error("ERROR: Creating output array failed!\n");
688         return -1;
689     }
690 
691     /* Assign streams and execute */
692     error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
693     test_error( error, "Unable to set indexed kernel arguments" );
694     error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
695     test_error( error, "Unable to set indexed kernel arguments" );
696     error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
697     test_error( error, "Unable to set indexed kernel arguments" );
698 
699     /* Run the kernel */
700     threads[0] = TEST_SIZE;
701 
702     error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
703     test_error( error, "Unable to get work group size to use" );
704 
705     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
706     test_error( error, "Unable to execute test kernel" );
707 
708     memset(outData, 0xFF, get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize);
709 
710     /* Now get the results */
711     error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0,
712                                  get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize, outData, 0,
713                                  NULL, NULL );
714     test_error( error, "Unable to read output array!" );
715 
716     /* And verify! */
717     char *inA = (char *)inDataA;
718     char *inB = (char *)inDataB;
719     char *out = (char *)outData;
720     for( i = 0; i < (int)TEST_SIZE; i++ )
721     {
722         for( size_t j = 0; j < vecSize; j++ )
723         {
724             bool test = verifyFn( inA, inB, &expected, vecAType );
725             if( test && ( memcmp( &expected, out, get_explicit_type_size( vecAType ) ) != 0 ) )
726             {
727                 switch( get_explicit_type_size( vecAType ))
728                 {
729                     case 1:
730                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x), TEST_SIZE %d\n",
731                                    (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
732                                    *( (cl_uchar *)inA ),
733                                    *( (cl_uchar *)inB ) ,
734                                    TEST_SIZE);
735                         break;
736 
737                     case 2:
738                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x), TEST_SIZE %d\n",
739                                    (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
740                                    *( (cl_ushort *)inA ),
741                                    *( (cl_ushort *)inB ),
742                                    TEST_SIZE);
743                         break;
744 
745                     case 4:
746                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
747                                   (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
748                                             *( (cl_uint *)inA ),
749                                             *( (cl_uint *)inB ) );
750                         break;
751 
752                     case 8:
753                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
754                                   (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
755                                             *( (cl_ulong *)inA ),
756                                             *( (cl_ulong *)inB ) );
757                         break;
758                 }
759                 return -1;
760             }
761             inA += get_explicit_type_size( vecAType );
762             inB += get_explicit_type_size( vecBType );
763             out += get_explicit_type_size( vecAType );
764         }
765     }
766 
767     return 0;
768 }
769 
test_two_param_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,twoParamIntegerVerifyFn verifyFn)770 int test_two_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
771 {
772     ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
773     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO : 3 not tested
774     unsigned int index, typeIndex;
775     int retVal = 0;
776     RandomSeed seed(gRandomSeed );
777 
778     for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
779     {
780         if (( types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
781             continue;
782 
783         for( index = 0; vecSizes[ index ] != 0; index++ )
784         {
785             if( test_two_param_integer_kernel(queue, context, fnName, types[ typeIndex ], types[ typeIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
786             {
787                 log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
788                 retVal = -1;
789             }
790         }
791     }
792 
793     return retVal;
794 }
795 
test_two_param_unmatched_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,twoParamIntegerVerifyFn verifyFn)796 int test_two_param_unmatched_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
797 {
798     ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
799     unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
800     unsigned int index, typeAIndex, typeBIndex;
801     int retVal = 0;
802     RandomSeed seed( gRandomSeed );
803 
804     for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
805     {
806         if (( types[ typeAIndex ] == kLong || types[ typeAIndex ] == kULong) && !gHasLong)
807             continue;
808 
809         for( typeBIndex = 0; types[ typeBIndex ] != kNumExplicitTypes; typeBIndex++ )
810         {
811             if (( types[ typeBIndex ] == kLong || types[ typeBIndex ] == kULong) && !gHasLong)
812                 continue;
813 
814             for( index = 0; vecSizes[ index ] != 0; index++ )
815             {
816                 if( test_two_param_integer_kernel( queue, context, fnName, types[ typeAIndex ], types[ typeBIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
817                 {
818                     log_error( "   Vector %s%d / %s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ],  get_explicit_type_name( types[ typeBIndex ] ), vecSizes[ index ] );
819                     retVal = -1;
820                 }
821             }
822         }
823     }
824 
825     return retVal;
826 }
827 
verify_integer_hadd(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)828 bool verify_integer_hadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
829 {
830     cl_long testValueA, testValueB, overflow;
831     cl_ulong uValueA, uValueB, uOverflow;
832 
833     switch( vecType )
834     {
835         case kChar:
836             testValueA = *( (cl_char *)sourceA );
837             testValueB = *( (cl_char *)sourceB );
838             *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB ) >> 1 );
839             break;
840         case kUChar:
841             testValueA = *( (cl_uchar *)sourceA );
842             testValueB = *( (cl_uchar *)sourceB );
843             *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB ) >> 1 );
844             break;
845         case kShort:
846             testValueA = *( (cl_short *)sourceA );
847             testValueB = *( (cl_short *)sourceB );
848             *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB ) >> 1 );
849             break;
850         case kUShort:
851             testValueA = *( (cl_ushort *)sourceA );
852             testValueB = *( (cl_ushort *)sourceB );
853             *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB ) >> 1 );
854             break;
855         case kInt:
856             testValueA = *( (cl_int *)sourceA );
857             testValueB = *( (cl_int *)sourceB );
858             *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB ) >> 1 );
859             break;
860         case kUInt:
861             testValueA = *( (cl_uint *)sourceA );
862             testValueB = *( (cl_uint *)sourceB );
863             *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB ) >> 1 );
864             break;
865         case kLong:
866             // The long way to avoid dropping bits
867             testValueA = *( (cl_long *)sourceA );
868             testValueB = *( (cl_long *)sourceB );
869             overflow = ( testValueA & 0x1 ) + ( testValueB & 0x1 );
870             *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + ( overflow >> 1 );
871             break;
872         case kULong:
873             // The long way to avoid dropping bits
874             uValueA = *( (cl_ulong *)sourceA );
875             uValueB = *( (cl_ulong *)sourceB );
876             uOverflow = ( uValueA & 0x1 ) + ( uValueB & 0x1 );
877             *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + ( uOverflow >> 1 );
878             break;
879         default:
880             // Should never happen
881             return false;
882     }
883     return true;
884 }
885 
test_integer_hadd(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)886 int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
887 {
888     return test_two_param_integer_fn( queue, context, "hadd", verify_integer_hadd );
889 }
890 
verify_integer_rhadd(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)891 bool verify_integer_rhadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
892 {
893     cl_long testValueA, testValueB, overflow;
894     cl_ulong uValueA, uValueB, uOverflow;
895 
896     switch( vecType )
897     {
898         case kChar:
899             testValueA = *( (cl_char *)sourceA );
900             testValueB = *( (cl_char *)sourceB );
901             *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB + 1 ) >> 1 );
902             break;
903         case kUChar:
904             testValueA = *( (cl_uchar *)sourceA );
905             testValueB = *( (cl_uchar *)sourceB );
906             *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB + 1 ) >> 1 );
907             break;
908         case kShort:
909             testValueA = *( (cl_short *)sourceA );
910             testValueB = *( (cl_short *)sourceB );
911             *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB + 1 ) >> 1 );
912             break;
913         case kUShort:
914             testValueA = *( (cl_ushort *)sourceA );
915             testValueB = *( (cl_ushort *)sourceB );
916             *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB + 1 ) >> 1 );
917             break;
918         case kInt:
919             testValueA = *( (cl_int *)sourceA );
920             testValueB = *( (cl_int *)sourceB );
921             *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB + 1 ) >> 1 );
922             break;
923         case kUInt:
924             testValueA = *( (cl_uint *)sourceA );
925             testValueB = *( (cl_uint *)sourceB );
926             *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB + 1 ) >> 1 );
927             break;
928         case kLong:
929             // The long way to avoid dropping bits
930             testValueA = *( (cl_long *)sourceA );
931             testValueB = *( (cl_long *)sourceB );
932             overflow = ( testValueA | testValueB ) & 0x1;
933             *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + overflow;
934             break;
935         case kULong:
936             // The long way to avoid dropping bits
937             uValueA = *( (cl_ulong *)sourceA );
938             uValueB = *( (cl_ulong *)sourceB );
939             uOverflow = ( uValueA | uValueB ) & 0x1;
940             *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + uOverflow;
941             break;
942         default:
943             // Should never happen
944             return false;
945     }
946     return true;
947 }
948 
test_integer_rhadd(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)949 int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
950 {
951     return test_two_param_integer_fn( queue, context, "rhadd", verify_integer_rhadd );
952 }
953 
954 #define MIN_CASE( type, const ) \
955     case const : \
956     {            \
957         cl_##type valueA = *( (cl_##type *)sourceA ); \
958         cl_##type valueB = *( (cl_##type *)sourceB ); \
959         *( (cl_##type *)destination ) = (cl_##type)( valueB < valueA ? valueB : valueA ); \
960         break; \
961     }
962 
verify_integer_min(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)963 bool verify_integer_min( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
964 {
965     switch( vecType )
966     {
967         MIN_CASE( char, kChar )
968         MIN_CASE( uchar, kUChar )
969         MIN_CASE( short, kShort )
970         MIN_CASE( ushort, kUShort )
971         MIN_CASE( int, kInt )
972         MIN_CASE( uint, kUInt )
973         MIN_CASE( long, kLong )
974         MIN_CASE( ulong, kULong )
975         default:
976             // Should never happen
977             return false;
978     }
979     return true;
980 }
981 
test_integer_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)982 int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
983 {
984     return test_two_param_integer_fn( queue, context, "min", verify_integer_min);
985 }
986 
987 #define MAX_CASE( type, const ) \
988     case const : \
989     {            \
990         cl_##type valueA = *( (cl_##type *)sourceA ); \
991         cl_##type valueB = *( (cl_##type *)sourceB ); \
992         *( (cl_##type *)destination ) = (cl_##type)( valueA < valueB ? valueB : valueA ); \
993         break; \
994     }
995 
verify_integer_max(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)996 bool verify_integer_max( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
997 {
998     switch( vecType )
999     {
1000             MAX_CASE( char, kChar )
1001             MAX_CASE( uchar, kUChar )
1002             MAX_CASE( short, kShort )
1003             MAX_CASE( ushort, kUShort )
1004             MAX_CASE( int, kInt )
1005             MAX_CASE( uint, kUInt )
1006             MAX_CASE( long, kLong )
1007             MAX_CASE( ulong, kULong )
1008         default:
1009             // Should never happen
1010             return false;
1011     }
1012     return true;
1013 }
1014 
test_integer_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1015 int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1016 {
1017     return test_two_param_integer_fn( queue, context, "max", verify_integer_max );
1018 }
1019 
1020 
multiply_unsigned_64_by_64(cl_ulong sourceA,cl_ulong sourceB,cl_ulong & destLow,cl_ulong & destHi)1021 void multiply_unsigned_64_by_64( cl_ulong sourceA, cl_ulong sourceB, cl_ulong &destLow, cl_ulong &destHi )
1022 {
1023     cl_ulong lowA, lowB;
1024     cl_ulong highA, highB;
1025 
1026     // Split up the values
1027     lowA = sourceA & 0xffffffff;
1028     highA = sourceA >> 32;
1029     lowB = sourceB & 0xffffffff;
1030     highB = sourceB >> 32;
1031 
1032     // Note that, with this split, our multiplication becomes:
1033     //     ( a * b )
1034     // = ( ( aHI << 32 + aLO ) * ( bHI << 32 + bLO ) ) >> 64
1035     // = ( ( aHI << 32 * bHI << 32 ) + ( aHI << 32 * bLO ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
1036     // = ( ( aHI * bHI << 64 ) + ( aHI * bLO << 32 ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
1037     // = ( aHI * bHI ) + ( aHI * bLO >> 32 ) + ( aLO * bHI >> 32 ) + ( aLO * bLO >> 64 )
1038 
1039     // Now, since each value is 32 bits, the max size of any multiplication is:
1040     // ( 2 ^ 32 - 1 ) * ( 2 ^ 32 - 1 ) = 2^64 - 4^32 + 1 = 2^64 - 2^33 + 1, which fits within 64 bits
1041     // Which means we can do each component within a 64-bit integer as necessary (each component above marked as AB1 - AB4)
1042     cl_ulong aHibHi = highA * highB;
1043     cl_ulong aHibLo = highA * lowB;
1044     cl_ulong aLobHi = lowA * highB;
1045     cl_ulong aLobLo = lowA * lowB;
1046 
1047     // Assemble terms.
1048     //  We note that in certain cases, sums of products cannot overflow:
1049     //
1050     //      The maximum product of two N-bit unsigned numbers is
1051     //
1052     //          (2**N-1)^2 = 2**2N - 2**(N+1) + 1
1053     //
1054     //      We note that we can add the maximum N-bit number to the 2N-bit product twice without overflow:
1055     //
1056     //          (2**N-1)^2 + 2*(2**N-1) = 2**2N - 2**(N+1) + 1 + 2**(N+1) - 2 = 2**2N - 1
1057     //
1058     //  If we breakdown the product of two numbers a,b into high and low halves of partial products as follows:
1059     //
1060     //                                              a.hi                a.lo
1061     // x                                            b.hi                b.lo
1062     //===============================================================================
1063     //  (b.hi*a.hi).hi      (b.hi*a.hi).lo
1064     //                      (b.lo*a.hi).hi      (b.lo*a.hi).lo
1065     //                      (b.hi*a.lo).hi      (b.hi*a.lo).lo
1066     // +                                        (b.lo*a.lo).hi      (b.lo*a.lo).lo
1067     //===============================================================================
1068     //
1069     // The (b.lo*a.lo).lo term cannot cause a carry, so we can ignore them for now.  We also know from above, that we can add (b.lo*a.lo).hi
1070     // and (b.hi*a.lo).lo to the 2N bit term [(b.lo*a.hi).hi + (b.lo*a.hi).lo] without overflow.  That takes care of all of the terms
1071     // on the right half that might carry.  Do that now.
1072     //
1073     cl_ulong aLobLoHi = aLobLo >> 32;
1074     cl_ulong aLobHiLo = aLobHi & 0xFFFFFFFFULL;
1075     aHibLo += aLobLoHi + aLobHiLo;
1076 
1077     // That leaves us with these terms:
1078     //
1079     //                                              a.hi                a.lo
1080     // x                                            b.hi                b.lo
1081     //===============================================================================
1082     //  (b.hi*a.hi).hi      (b.hi*a.hi).lo
1083     //                      (b.hi*a.lo).hi
1084     //                    [ (b.lo*a.hi).hi + (b.lo*a.hi).lo + other ]
1085     // +                                                                (b.lo*a.lo).lo
1086     //===============================================================================
1087 
1088     // All of the overflow potential from the right half has now been accumulated into the [ (b.lo*a.hi).hi + (b.lo*a.hi).lo ] 2N bit term.
1089     // We can safely separate into high and low parts. Per our rule above, we know we can accumulate the high part of that and (b.hi*a.lo).hi
1090     // into the 2N bit term (b.lo*a.hi) without carry.  The low part can be pieced together with (b.lo*a.lo).lo, to give the final low result
1091 
1092     destHi = aHibHi + (aHibLo >> 32 ) + (aLobHi >> 32);             // Cant overflow
1093     destLow = (aHibLo << 32) | ( aLobLo & 0xFFFFFFFFULL );
1094 }
1095 
multiply_signed_64_by_64(cl_long sourceA,cl_long sourceB,cl_ulong & destLow,cl_long & destHi)1096 void multiply_signed_64_by_64( cl_long sourceA, cl_long sourceB, cl_ulong &destLow, cl_long &destHi )
1097 {
1098     // Find sign of result
1099     cl_long aSign = sourceA >> 63;
1100     cl_long bSign = sourceB >> 63;
1101     cl_long resultSign = aSign ^ bSign;
1102 
1103     // take absolute values of the argument
1104     sourceA = (sourceA ^ aSign) - aSign;
1105     sourceB = (sourceB ^ bSign) - bSign;
1106 
1107     cl_ulong hi;
1108     multiply_unsigned_64_by_64( (cl_ulong) sourceA, (cl_ulong) sourceB, destLow, hi );
1109 
1110     // Fix the sign
1111     if( resultSign )
1112     {
1113         destLow ^= resultSign;
1114         hi  ^= resultSign;
1115         destLow -= resultSign;
1116 
1117         //carry if necessary
1118         if( 0 == destLow )
1119             hi -= resultSign;
1120     }
1121 
1122     destHi = (cl_long) hi;
1123 }
1124 
verify_integer_mul_hi(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)1125 bool verify_integer_mul_hi( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
1126 {
1127     cl_long testValueA, testValueB, highSigned;
1128     cl_ulong highUnsigned, lowHalf;
1129 
1130     switch( vecType )
1131     {
1132         case kChar:
1133             testValueA = *( (cl_char *)sourceA );
1134             testValueB = *( (cl_char *)sourceB );
1135             *( (cl_char *)destination ) = (cl_char)( ( testValueA * testValueB ) >> 8 );
1136             break;
1137         case kUChar:
1138             testValueA = *( (cl_uchar *)sourceA );
1139             testValueB = *( (cl_uchar *)sourceB );
1140             *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA * testValueB ) >> 8 );
1141             break;
1142         case kShort:
1143             testValueA = *( (cl_short *)sourceA );
1144             testValueB = *( (cl_short *)sourceB );
1145             *( (cl_short *)destination ) = (cl_short)( ( testValueA * testValueB ) >> 16 );
1146             break;
1147         case kUShort:
1148             testValueA = *( (cl_ushort *)sourceA );
1149             testValueB = *( (cl_ushort *)sourceB );
1150             *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA * testValueB ) >> 16 );
1151             break;
1152         case kInt:
1153             testValueA = *( (cl_int *)sourceA );
1154             testValueB = *( (cl_int *)sourceB );
1155             *( (cl_int *)destination ) = (cl_int)( ( testValueA * testValueB ) >> 32 );
1156             break;
1157         case kUInt:
1158             testValueA = *( (cl_uint *)sourceA );
1159             testValueB = *( (cl_uint *)sourceB );
1160             *( (cl_uint *)destination ) = (cl_uint)( ( testValueA * testValueB ) >> 32 );
1161             break;
1162         case kLong:
1163             testValueA = *( (cl_long *)sourceA );
1164             testValueB = *( (cl_long *)sourceB );
1165 
1166             multiply_signed_64_by_64( testValueA, testValueB, lowHalf, highSigned );
1167             *( (cl_long *)destination ) = highSigned;
1168             break;
1169         case kULong:
1170             testValueA = *( (cl_ulong *)sourceA );
1171             testValueB = *( (cl_ulong *)sourceB );
1172 
1173             multiply_unsigned_64_by_64( testValueA, testValueB, lowHalf, highUnsigned );
1174             *( (cl_ulong *)destination ) = highUnsigned;
1175             break;
1176         default:
1177             // Should never happen
1178             return false;
1179     }
1180     return true;
1181 }
1182 
test_integer_mul_hi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1183 int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1184 {
1185     return test_two_param_integer_fn( queue, context, "mul_hi", verify_integer_mul_hi );
1186 }
1187 
verify_integer_rotate(void * sourceA,void * sourceB,void * destination,ExplicitType vecType)1188 bool verify_integer_rotate( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
1189 {
1190     cl_ulong testValueA;
1191     char numBits;
1192 
1193     switch( vecType )
1194     {
1195         case kChar:
1196         case kUChar:
1197             testValueA = *( (cl_uchar *)sourceA );
1198             numBits = *( (cl_uchar *)sourceB );
1199             numBits &= 7;
1200             if ( numBits == 0 )
1201                 *( (cl_uchar *)destination ) =  (cl_uchar)testValueA;
1202             else
1203                 *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA << numBits ) | ( testValueA >> ( 8 - numBits ) ) );
1204             break;
1205         case kShort:
1206         case kUShort:
1207             testValueA = *( (cl_ushort *)sourceA );
1208             numBits = *( (cl_ushort *)sourceB );
1209             numBits &= 15;
1210             if ( numBits == 0 )
1211                 *( (cl_ushort *)destination ) =  (cl_ushort)testValueA;
1212             else
1213                 *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA << numBits ) | ( testValueA >> ( 16 - numBits ) ) );
1214             break;
1215         case kInt:
1216         case kUInt:
1217             testValueA = *( (cl_uint *)sourceA );
1218             numBits = *( (cl_uint *)sourceB );
1219             numBits &= 31;
1220             if ( numBits == 0 )
1221                 *( (cl_uint *)destination ) =  (cl_uint) testValueA;
1222             else
1223                 *( (cl_uint *)destination ) = (cl_uint)( ( testValueA << numBits ) | ( testValueA >> ( 32 - numBits ) ) );
1224             break;
1225         case kLong:
1226         case kULong:
1227             testValueA = *( (cl_ulong *)sourceA );
1228             numBits = *( (cl_ulong *)sourceB );
1229             numBits &= 63;
1230             if ( numBits == 0 )
1231                 *( (cl_ulong *)destination ) =  (cl_ulong)testValueA;
1232             else
1233                 *( (cl_ulong *)destination ) = (cl_ulong)( ( testValueA << numBits ) | ( testValueA >> ( 64 - numBits ) ) );
1234             break;
1235         default:
1236             // Should never happen
1237             log_error( "Unknown type encountered in verify_integer_rotate. Test failed. Aborting...\n" );
1238             abort();
1239             return false;
1240     }
1241     return true;
1242 }
1243 
test_integer_rotate(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1244 int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1245 {
1246     return test_two_param_integer_fn( queue, context, "rotate", verify_integer_rotate );
1247 }
1248 
1249 const char *threeParamIntegerKernelSourcePattern =
1250 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
1251 "{\n"
1252 "    int  tid = get_global_id(0);\n"
1253 "    %s%s sA = %s;\n"
1254 "    %s%s sB = %s;\n"
1255 "    %s%s sC = %s;\n"
1256 "    %s%s dst = %s( sA, sB, sC );\n"
1257 "     %s;\n"
1258 "\n"
1259 "}\n";
1260 
1261 typedef bool (*threeParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *sourceC, void *destination,
1262                                             ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType );
1263 
test_three_param_integer_kernel(cl_command_queue queue,cl_context context,const char * fnName,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType,unsigned int vecSize,threeParamIntegerVerifyFn verifyFn,MTdata d)1264 int test_three_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
1265                                   ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType,
1266                                     unsigned int vecSize, threeParamIntegerVerifyFn verifyFn, MTdata d )
1267 {
1268     clProgramWrapper program;
1269     clKernelWrapper kernel;
1270     clMemWrapper streams[4];
1271     cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], inDataC[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
1272     int error, i;
1273     size_t threads[1], localThreads[1];
1274     char kernelSource[10240];
1275     char *programPtr;
1276     char sizeName[4], paramSizeName[4];
1277 
1278     if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
1279     {
1280         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
1281         return CL_SUCCESS;
1282     }
1283 
1284 
1285     /* Create the source */
1286     if( vecSize == 1 )
1287         sizeName[ 0 ] = 0;
1288     else
1289         sprintf( sizeName, "%d", vecSize );
1290     if( ( vecSize == 1 ) || ( vecSize == 3 ) )
1291         paramSizeName[ 0 ] = 0;
1292         else
1293         sprintf( paramSizeName, "%d", vecSize );
1294 
1295     char sourceALoad[ 128 ], sourceBLoad[ 128 ], sourceCLoad[ 128 ], destStore[ 128 ];
1296 
1297     sprintf( kernelSource, threeParamIntegerKernelSourcePattern,
1298             get_explicit_type_name( vecAType ), paramSizeName,
1299             get_explicit_type_name( vecBType ), paramSizeName,
1300             get_explicit_type_name( vecCType ), paramSizeName,
1301             get_explicit_type_name( destType ), paramSizeName,
1302             get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
1303             get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
1304             get_explicit_type_name( vecCType ), sizeName, build_load_statement( sourceCLoad, (size_t)vecSize, "sourceC" ),
1305             get_explicit_type_name( destType ), sizeName,
1306             fnName,
1307             build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
1308             );
1309 
1310     /* Create kernels */
1311     programPtr = kernelSource;
1312     if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
1313     {
1314     log_error("The program we attempted to compile was: \n%s\n", kernelSource);
1315         return -1;
1316     }
1317 
1318     /* Generate some streams */
1319     generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
1320     generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
1321     generate_random_data( vecCType, vecSize * TEST_SIZE, d, inDataC );
1322 
1323     streams[0] = clCreateBuffer(
1324         context, CL_MEM_COPY_HOST_PTR,
1325         get_explicit_type_size(vecAType) * vecSize * TEST_SIZE, &inDataA, NULL);
1326     if( streams[0] == NULL )
1327     {
1328         log_error("ERROR: Creating input array A failed!\n");
1329         return -1;
1330     }
1331     streams[1] = clCreateBuffer(
1332         context, CL_MEM_COPY_HOST_PTR,
1333         get_explicit_type_size(vecBType) * vecSize * TEST_SIZE, &inDataB, NULL);
1334     if( streams[1] == NULL )
1335     {
1336         log_error("ERROR: Creating input array B failed!\n");
1337         return -1;
1338     }
1339     streams[2] = clCreateBuffer(
1340         context, CL_MEM_COPY_HOST_PTR,
1341         get_explicit_type_size(vecCType) * vecSize * TEST_SIZE, &inDataC, NULL);
1342     if( streams[2] == NULL )
1343     {
1344         log_error("ERROR: Creating input array C failed!\n");
1345         return -1;
1346     }
1347     streams[3] = clCreateBuffer(
1348         context, CL_MEM_READ_WRITE,
1349         get_explicit_type_size(destType) * vecSize * TEST_SIZE, NULL, NULL);
1350     if( streams[3] == NULL )
1351     {
1352         log_error("ERROR: Creating output array failed!\n");
1353         return -1;
1354     }
1355 
1356     /* Assign streams and execute */
1357     error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
1358     test_error( error, "Unable to set indexed kernel arguments" );
1359     error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
1360     test_error( error, "Unable to set indexed kernel arguments" );
1361     error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
1362     test_error( error, "Unable to set indexed kernel arguments" );
1363     error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
1364     test_error( error, "Unable to set indexed kernel arguments" );
1365 
1366     /* Run the kernel */
1367     threads[0] = TEST_SIZE;
1368 
1369     error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
1370     test_error( error, "Unable to get work group size to use" );
1371 
1372     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
1373     test_error( error, "Unable to execute test kernel" );
1374 
1375     memset(outData, 0xFF, get_explicit_type_size( destType ) * TEST_SIZE * vecSize);
1376 
1377     /* Now get the results */
1378     error = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, get_explicit_type_size( destType ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
1379     test_error( error, "Unable to read output array!" );
1380 
1381     /* And verify! */
1382     char *inA = (char *)inDataA;
1383     char *inB = (char *)inDataB;
1384     char *inC = (char *)inDataC;
1385     char *out = (char *)outData;
1386     for( i = 0; i < (int)TEST_SIZE; i++ )
1387     {
1388         for( size_t j = 0; j < vecSize; j++ )
1389         {
1390             bool test = verifyFn( inA, inB, inC, &expected, vecAType, vecBType, vecCType, destType );
1391             if( test && ( memcmp( &expected, out, get_explicit_type_size( destType ) ) != 0 ) )
1392             {
1393                 switch( get_explicit_type_size( vecAType ))
1394                 {
1395                     case 1:
1396                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x, 0x%2.2x)\n",
1397                                   (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
1398                                             *( (cl_uchar *)inA ),
1399                                             *( (cl_uchar *)inB ),
1400                                             *( (cl_uchar *)inC ) );
1401                         break;
1402 
1403                     case 2:
1404                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x, 0x%4.4x)\n",
1405                                   (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
1406                                             *( (cl_ushort *)inA ),
1407                                             *( (cl_ushort *)inB ),
1408                                             *( (cl_ushort *)inC ) );
1409                         break;
1410 
1411                     case 4:
1412                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x, 0x%8.8x)\n",
1413                                   (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
1414                                             *( (cl_uint *)inA ),
1415                                             *( (cl_uint *)inB ),
1416                                             *( (cl_uint *)inC ) );
1417                         break;
1418 
1419                     case 8:
1420                         log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx, 0x%16.16llx)\n",
1421                                   (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
1422                                             *( (cl_ulong *)inA ),
1423                                             *( (cl_ulong *)inB ),
1424                                             *( (cl_ulong *)inC ) );
1425                         break;
1426                 }
1427                 return -1;
1428             }
1429             inA += get_explicit_type_size( vecAType );
1430             inB += get_explicit_type_size( vecBType );
1431             inC += get_explicit_type_size( vecCType );
1432             out += get_explicit_type_size( destType );
1433         }
1434     }
1435 
1436     return 0;
1437 }
1438 
test_three_param_integer_fn(cl_command_queue queue,cl_context context,const char * fnName,threeParamIntegerVerifyFn verifyFn)1439 int test_three_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, threeParamIntegerVerifyFn verifyFn)
1440 {
1441     ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
1442     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
1443     unsigned int index, typeAIndex;
1444     int retVal = 0;
1445     RandomSeed seed(gRandomSeed);
1446 
1447     for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
1448     {
1449         if ((types[ typeAIndex ] == kLong || types[ typeAIndex] == kULong) && !gHasLong)
1450             continue;
1451 
1452         for( index = 0; vecSizes[ index ] != 0; index++ )
1453         {
1454             if( test_three_param_integer_kernel(queue, context, fnName, types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
1455             {
1456                 log_error( "   Vector %s%d,%s%d,%s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ],
1457                                                             get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] ,
1458                                                             get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] );
1459                 retVal = -1;
1460             }
1461         }
1462     }
1463 
1464     return retVal;
1465 }
1466 
verify_integer_clamp(void * sourceA,void * sourceB,void * sourceC,void * destination,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType)1467 bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *destination,
1468                         ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
1469 {
1470     if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
1471     {
1472         cl_ulong valueA, valueB, valueC;
1473 
1474         switch( vecAType )
1475         {
1476             case kULong:
1477                 valueA = ((cl_ulong*) sourceA)[0];
1478                 valueB = ((cl_ulong*) sourceB)[0];
1479                 valueC = ((cl_ulong*) sourceC)[0];
1480                 break;
1481             case kUInt:
1482                 valueA = ((cl_uint*) sourceA)[0];
1483                 valueB = ((cl_uint*) sourceB)[0];
1484                 valueC = ((cl_uint*) sourceC)[0];
1485                 break;
1486             case kUShort:
1487                 valueA = ((cl_ushort*) sourceA)[0];
1488                 valueB = ((cl_ushort*) sourceB)[0];
1489                 valueC = ((cl_ushort*) sourceC)[0];
1490                 break;
1491             case kUChar:
1492                 valueA = ((cl_uchar*) sourceA)[0];
1493                 valueB = ((cl_uchar*) sourceB)[0];
1494                 valueC = ((cl_uchar*) sourceC)[0];
1495                 break;
1496             default:
1497                 //error -- should never get here
1498                 abort();
1499                 break;
1500         }
1501 
1502 
1503         if(valueB > valueC) {
1504             return false; // results are undefined : let expected alone.
1505         }
1506 
1507         switch( vecAType )
1508         {
1509             case kULong:
1510                 ((cl_ulong *)destination)[0] =
1511                     std::max(std::min(valueA, valueC), valueB);
1512                 break;
1513             case kUInt:
1514                 ((cl_uint *)destination)[0] =
1515                     (cl_uint)(std::max(std::min(valueA, valueC), valueB));
1516                 break;
1517             case kUShort:
1518                 ((cl_ushort *)destination)[0] =
1519                     (cl_ushort)(std::max(std::min(valueA, valueC), valueB));
1520                 break;
1521             case kUChar:
1522                 ((cl_uchar *)destination)[0] =
1523                     (cl_uchar)(std::max(std::min(valueA, valueC), valueB));
1524                 break;
1525             default:
1526                 //error -- should never get here
1527                 abort();
1528                 break;
1529         }
1530 
1531 
1532 
1533 
1534     }
1535     else
1536     {
1537         cl_long valueA, valueB, valueC;
1538 
1539 
1540         switch( vecAType )
1541         {
1542             case kLong:
1543                 valueA = ((cl_long*) sourceA)[0];
1544                 valueB = ((cl_long*) sourceB)[0];
1545                 valueC = ((cl_long*) sourceC)[0];
1546                 break;
1547             case kInt:
1548                 valueA = ((cl_int*) sourceA)[0];
1549                 valueB = ((cl_int*) sourceB)[0];
1550                 valueC = ((cl_int*) sourceC)[0];
1551                 break;
1552             case kShort:
1553                 valueA = ((cl_short*) sourceA)[0];
1554                 valueB = ((cl_short*) sourceB)[0];
1555                 valueC = ((cl_short*) sourceC)[0];
1556                 break;
1557             case kChar:
1558                 valueA = ((cl_char*) sourceA)[0];
1559                 valueB = ((cl_char*) sourceB)[0];
1560                 valueC = ((cl_char*) sourceC)[0];
1561                 break;
1562             default:
1563                 //error -- should never get here
1564                 abort();
1565                 break;
1566         }
1567 
1568         if(valueB > valueC) {
1569             return false; // undefined behavior : leave "expected" alone
1570         }
1571 
1572         switch( vecAType )
1573         {
1574             case kLong:
1575                 ((cl_long *)destination)[0] =
1576                     std::max(std::min(valueA, valueC), valueB);
1577                 break;
1578             case kInt:
1579                 ((cl_int *)destination)[0] =
1580                     (cl_int)(std::max(std::min(valueA, valueC), valueB));
1581                 break;
1582             case kShort:
1583                 ((cl_short *)destination)[0] =
1584                     (cl_short)(std::max(std::min(valueA, valueC), valueB));
1585                 break;
1586             case kChar:
1587                 ((cl_char *)destination)[0] =
1588                     (cl_char)(std::max(std::min(valueA, valueC), valueB));
1589                 break;
1590             default:
1591                 //error -- should never get here
1592                 abort();
1593                 break;
1594         }
1595 
1596     }
1597     return true;
1598 }
1599 
test_integer_clamp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1600 int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1601 {
1602     return test_three_param_integer_fn( queue, context, "clamp", verify_integer_clamp );
1603 }
1604 
verify_integer_mad_sat(void * sourceA,void * sourceB,void * sourceC,void * destination,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType)1605 bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *destination,
1606                         ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
1607 {
1608     if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
1609     {
1610         cl_ulong valueA, valueB, valueC;
1611 
1612         switch( vecAType )
1613         {
1614             case kULong:
1615                 valueA = ((cl_ulong*) sourceA)[0];
1616                 valueB = ((cl_ulong*) sourceB)[0];
1617                 valueC = ((cl_ulong*) sourceC)[0];
1618                 break;
1619             case kUInt:
1620                 valueA = ((cl_uint*) sourceA)[0];
1621                 valueB = ((cl_uint*) sourceB)[0];
1622                 valueC = ((cl_uint*) sourceC)[0];
1623                 break;
1624             case kUShort:
1625                 valueA = ((cl_ushort*) sourceA)[0];
1626                 valueB = ((cl_ushort*) sourceB)[0];
1627                 valueC = ((cl_ushort*) sourceC)[0];
1628                 break;
1629             case kUChar:
1630                 valueA = ((cl_uchar*) sourceA)[0];
1631                 valueB = ((cl_uchar*) sourceB)[0];
1632                 valueC = ((cl_uchar*) sourceC)[0];
1633                 break;
1634             default:
1635                 //error -- should never get here
1636                 abort();
1637                 break;
1638         }
1639 
1640         cl_ulong multHi, multLo;
1641         multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
1642 
1643         multLo += valueC;
1644         multHi += multLo < valueC;  // carry if overflow
1645         if( multHi )
1646             multLo = 0xFFFFFFFFFFFFFFFFULL;
1647 
1648         switch( vecAType )
1649         {
1650             case kULong:
1651                 ((cl_ulong*) destination)[0] = multLo;
1652                 break;
1653             case kUInt:
1654                 ((cl_uint *)destination)[0] =
1655                     (cl_uint)std::min(multLo, (cl_ulong)CL_UINT_MAX);
1656                 break;
1657             case kUShort:
1658                 ((cl_ushort *)destination)[0] =
1659                     (cl_ushort)std::min(multLo, (cl_ulong)CL_USHRT_MAX);
1660                 break;
1661             case kUChar:
1662                 ((cl_uchar *)destination)[0] =
1663                     (cl_uchar)std::min(multLo, (cl_ulong)CL_UCHAR_MAX);
1664                 break;
1665             default:
1666                 //error -- should never get here
1667                 abort();
1668                 break;
1669         }
1670     }
1671     else
1672     {
1673         cl_long valueA, valueB, valueC;
1674 
1675         switch( vecAType )
1676         {
1677             case kLong:
1678                 valueA = ((cl_long*) sourceA)[0];
1679                 valueB = ((cl_long*) sourceB)[0];
1680                 valueC = ((cl_long*) sourceC)[0];
1681                 break;
1682             case kInt:
1683                 valueA = ((cl_int*) sourceA)[0];
1684                 valueB = ((cl_int*) sourceB)[0];
1685                 valueC = ((cl_int*) sourceC)[0];
1686                 break;
1687             case kShort:
1688                 valueA = ((cl_short*) sourceA)[0];
1689                 valueB = ((cl_short*) sourceB)[0];
1690                 valueC = ((cl_short*) sourceC)[0];
1691                 break;
1692             case kChar:
1693                 valueA = ((cl_char*) sourceA)[0];
1694                 valueB = ((cl_char*) sourceB)[0];
1695                 valueC = ((cl_char*) sourceC)[0];
1696                 break;
1697             default:
1698                 //error -- should never get here
1699                 abort();
1700                 break;
1701         }
1702 
1703         cl_long multHi;
1704         cl_ulong multLo;
1705         multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
1706 
1707         cl_ulong sum = multLo + valueC;
1708         // carry if overflow
1709         if( valueC >= 0 )
1710         {
1711             if( multLo > sum )
1712             {
1713                 multHi++;
1714                 if( CL_LONG_MIN == multHi )
1715                 {
1716                     multHi = CL_LONG_MAX;
1717                     sum = CL_ULONG_MAX;
1718                 }
1719             }
1720         }
1721         else
1722         {
1723             if( multLo < sum )
1724             {
1725                 multHi--;
1726                 if( CL_LONG_MAX == multHi )
1727                 {
1728                     multHi = CL_LONG_MIN;
1729                     sum = 0;
1730                 }
1731             }
1732         }
1733 
1734         // saturate
1735         if( multHi > 0 )
1736             sum = CL_LONG_MAX;
1737         else if( multHi < -1 )
1738             sum = CL_LONG_MIN;
1739         cl_long result = (cl_long) sum;
1740 
1741         switch( vecAType )
1742         {
1743             case kLong:
1744                 ((cl_long*) destination)[0] = result;
1745                 break;
1746             case kInt:
1747                 result = std::min(result, (cl_long)CL_INT_MAX);
1748                 result = std::max(result, (cl_long)CL_INT_MIN);
1749                 ((cl_int*) destination)[0] = (cl_int) result;
1750                 break;
1751             case kShort:
1752                 result = std::min(result, (cl_long)CL_SHRT_MAX);
1753                 result = std::max(result, (cl_long)CL_SHRT_MIN);
1754                 ((cl_short*) destination)[0] = (cl_short) result;
1755                 break;
1756             case kChar:
1757                 result = std::min(result, (cl_long)CL_CHAR_MAX);
1758                 result = std::max(result, (cl_long)CL_CHAR_MIN);
1759                 ((cl_char*) destination)[0] = (cl_char) result;
1760                 break;
1761             default:
1762                 //error -- should never get here
1763                 abort();
1764                 break;
1765         }
1766     }
1767     return true;
1768 }
1769 
test_integer_mad_sat(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1770 int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1771 {
1772     return test_three_param_integer_fn( queue, context, "mad_sat", verify_integer_mad_sat );
1773 }
1774 
verify_integer_mad_hi(void * sourceA,void * sourceB,void * sourceC,void * destination,ExplicitType vecAType,ExplicitType vecBType,ExplicitType vecCType,ExplicitType destType)1775 bool verify_integer_mad_hi( void *sourceA, void *sourceB, void *sourceC, void *destination,
1776                             ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
1777 {
1778     if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
1779     {
1780         cl_ulong valueA, valueB, valueC;
1781 
1782         switch( vecAType )
1783         {
1784             case kULong:
1785                 valueA = ((cl_ulong*) sourceA)[0];
1786                 valueB = ((cl_ulong*) sourceB)[0];
1787                 valueC = ((cl_ulong*) sourceC)[0];
1788                 break;
1789             case kUInt:
1790                 valueA = ((cl_uint*) sourceA)[0];
1791                 valueB = ((cl_uint*) sourceB)[0];
1792                 valueC = ((cl_uint*) sourceC)[0];
1793                 break;
1794             case kUShort:
1795                 valueA = ((cl_ushort*) sourceA)[0];
1796                 valueB = ((cl_ushort*) sourceB)[0];
1797                 valueC = ((cl_ushort*) sourceC)[0];
1798                 break;
1799             case kUChar:
1800                 valueA = ((cl_uchar*) sourceA)[0];
1801                 valueB = ((cl_uchar*) sourceB)[0];
1802                 valueC = ((cl_uchar*) sourceC)[0];
1803                 break;
1804             default:
1805                 //error -- should never get here
1806                 abort();
1807                 break;
1808         }
1809 
1810         cl_ulong multHi, multLo;
1811         multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
1812 
1813         switch( vecAType )
1814         {
1815             case kULong:
1816                 ((cl_ulong*) destination)[0] = multHi + valueC;
1817                 break;
1818             case kUInt:
1819                 ((cl_uint*) destination)[0] = (cl_uint) (( multLo >> 32) + valueC );
1820                 break;
1821             case kUShort:
1822                 ((cl_ushort*) destination)[0] = (cl_ushort) (( multLo >> 16) + valueC );
1823                 break;
1824             case kUChar:
1825                 ((cl_uchar*) destination)[0] = (cl_uchar) (( multLo >> 8) + valueC );
1826                 break;
1827             default:
1828                 //error -- should never get here
1829                 abort();
1830                 break;
1831         }
1832     }
1833     else
1834     {
1835         cl_long valueA, valueB, valueC;
1836 
1837         switch( vecAType )
1838         {
1839             case kLong:
1840                 valueA = ((cl_long*) sourceA)[0];
1841                 valueB = ((cl_long*) sourceB)[0];
1842                 valueC = ((cl_long*) sourceC)[0];
1843                 break;
1844             case kInt:
1845                 valueA = ((cl_int*) sourceA)[0];
1846                 valueB = ((cl_int*) sourceB)[0];
1847                 valueC = ((cl_int*) sourceC)[0];
1848                 break;
1849             case kShort:
1850                 valueA = ((cl_short*) sourceA)[0];
1851                 valueB = ((cl_short*) sourceB)[0];
1852                 valueC = ((cl_short*) sourceC)[0];
1853                 break;
1854             case kChar:
1855                 valueA = ((cl_char*) sourceA)[0];
1856                 valueB = ((cl_char*) sourceB)[0];
1857                 valueC = ((cl_char*) sourceC)[0];
1858                 break;
1859             default:
1860                 //error -- should never get here
1861                 abort();
1862                 break;
1863         }
1864 
1865         cl_long multHi;
1866         cl_ulong multLo;
1867         multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
1868 
1869         switch( vecAType )
1870         {
1871             case kLong:
1872                 ((cl_long*) destination)[0] = multHi + valueC;
1873                 break;
1874             case kInt:
1875                 ((cl_int*) destination)[0] = (cl_int) ((multLo >> 32) + valueC);
1876                 break;
1877             case kShort:
1878                 ((cl_short*) destination)[0] = (cl_int) ((multLo >> 16) + valueC);
1879                 break;
1880             case kChar:
1881                 ((cl_char*) destination)[0] = (cl_char) (cl_int) ((multLo >> 8) + valueC);
1882                 break;
1883             default:
1884                 //error -- should never get here
1885                 abort();
1886                 break;
1887         }
1888     }
1889     return true;
1890 }
1891 
test_integer_mad_hi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1892 int test_integer_mad_hi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1893 {
1894     return test_three_param_integer_fn( queue, context, "mad_hi", verify_integer_mad_hi );
1895 }
1896 
1897 
1898