• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "Utility.h"
17 
18 #include <cstdio>
19 #include <cstdlib>
20 #include <string>
21 #include <time.h>
22 #include "FunctionList.h"
23 #include "Sleep.h"
24 
25 #include "harness/errorHelpers.h"
26 #include "harness/kernelHelpers.h"
27 #include "harness/parseParameters.h"
28 
29 #if defined( __APPLE__ )
30     #include <sys/sysctl.h>
31     #include <sys/mman.h>
32     #include <libgen.h>
33     #include <sys/time.h>
34 #elif defined( __linux__ )
35     #include <unistd.h>
36     #include <sys/syscall.h>
37     #include <linux/sysctl.h>
38     #include <sys/param.h>
39 #endif
40 
41 #if defined (__linux__) || (defined WIN32 && defined __MINGW32__)
42 #include <sys/param.h>
43 #endif
44 
45 #include "harness/testHarness.h"
46 
47 #define kPageSize           4096
48 #define DOUBLE_REQUIRED_FEATURES    ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM  )
49 
50 const char      **gTestNames = NULL;
51 unsigned int    gTestNameCount = 0;
52 char            appName[ MAXPATHLEN ] = "";
53 cl_device_id    gDevice = NULL;
54 cl_context      gContext = NULL;
55 cl_command_queue gQueue = NULL;
56 static int32_t  gStartTestNumber;
57 static int32_t  gEndTestNumber;
58 int             gSkipCorrectnessTesting = 0;
59 int             gStopOnError = 0;
60 static bool     gSkipRestOfTests;
61 #if defined( __APPLE__ )
62 int             gMeasureTimes = 1;
63 #else
64 int             gMeasureTimes = 0;
65 #endif
66 int             gReportAverageTimes = 0;
67 int             gForceFTZ = 0;
68 int             gWimpyMode = 0;
69 int             gHasDouble = 0;
70 int             gTestFloat = 1;
71 //This flag should be 'ON' by default and it can be changed through the command line arguments.
72 volatile int             gTestFastRelaxed = 1;
73 /*This flag corresponds to defining if the implementation has Derived Fast Relaxed functions.
74   The spec does not specify ULP for derived function.  The derived functions are composed of base functions which are tested for ULP, thus when this flag is enabled,
75   Derived functions will not be tested for ULP, as per table 7.1 of OpenCL 2.0 spec.
76   Since there is no way of quering the device whether it is a derived or non-derived implementation according to OpenCL 2.0 spec then it has to be changed through a command line argument.
77 */
78 int             gFastRelaxedDerived = 1;
79 int             gToggleCorrectlyRoundedDivideSqrt = 0;
80 int             gDeviceILogb0 = 1;
81 int             gDeviceILogbNaN = 1;
82 int             gCheckTininessBeforeRounding = 1;
83 int             gIsInRTZMode = 0;
84 uint32_t        gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
85 uint32_t        gMinVectorSizeIndex = 0;
86 const char      *method[] = { "Best", "Average" };
87 void            *gIn = NULL;
88 void            *gIn2 = NULL;
89 void            *gIn3 = NULL;
90 void            *gOut_Ref = NULL;
91 void            *gOut[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
92 void            *gOut_Ref2 = NULL;
93 void            *gOut2[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
94 cl_mem          gInBuffer = NULL;
95 cl_mem          gInBuffer2 = NULL;
96 cl_mem          gInBuffer3 = NULL;
97 cl_mem          gOutBuffer[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
98 cl_mem          gOutBuffer2[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
99 uint32_t        gComputeDevices = 0;
100 uint32_t        gSimdSize = 1;
101 uint32_t        gDeviceFrequency = 0;
102 static MTdata   gMTdata;
103 cl_device_fp_config gFloatCapabilities = 0;
104 cl_device_fp_config gDoubleCapabilities = 0;
105 int             gWimpyReductionFactor = 32;
106 int             gWimpyBufferSize = BUFFER_SIZE;
107 int             gVerboseBruteForce = 0;
108 
109 static int ParseArgs( int argc, const char **argv );
110 static void PrintUsage( void );
111 static void PrintFunctions( void );
112 test_status InitCL( cl_device_id device );
113 static void ReleaseCL( void );
114 static int InitILogbConstants( void );
115 static int IsTininessDetectedBeforeRounding( void );
116 static int IsInRTZMode( void );         //expensive. Please check gIsInRTZMode global instead.
117 
118 
doTest(const char * name)119 int doTest( const char* name )
120 {
121     if( gSkipRestOfTests )
122     {
123         vlog( "Skipping function because of an earlier error.\n" );
124         return 1;
125     }
126 
127     int error = 0;
128     const Func* func_data = NULL;
129 
130     for( size_t i = 0; i < functionListCount; i++ )
131     {
132         const Func* const temp_func = functionList + i;
133         if( strcmp( temp_func->name, name ) == 0 )
134         {
135             if( i < gStartTestNumber || i > gEndTestNumber )
136             {
137                 vlog( "Skipping function #%d\n", i );
138                 return 0;
139             }
140 
141             func_data = temp_func;
142             break;
143         }
144     }
145 
146     if( func_data == NULL )
147     {
148         vlog( "Function '%s' doesn't exist!\n", name );
149         exit( EXIT_FAILURE );
150     }
151 
152     if( func_data->func.p == NULL )
153     {
154         vlog( "'%s' is missing implementation, skipping function.\n", func_data->name );
155         return 0;
156     }
157 
158     // if correctly rounded divide & sqrt are supported by the implementation
159     // then test it; otherwise skip the test
160     if( strcmp( func_data->name, "sqrt_cr" ) == 0 || strcmp( func_data->name, "divide_cr" ) == 0 )
161     {
162         if( ( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 )
163         {
164             vlog( "Correctly rounded divide and sqrt are not supported, skipping function.\n" );
165             return 0;
166         }
167     }
168 
169     {
170         extern int my_ilogb(double);
171         if( 0 == strcmp( "ilogb", func_data->name ) )
172         {
173             InitILogbConstants();
174         }
175 
176         if ( gTestFastRelaxed )
177         {
178             if( func_data->relaxed )
179             {
180                 gTestCount++;
181                 vlog( "%3d: ", gTestCount );
182                 if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
183                 {
184                     gFailCount++;
185                     error++;
186                     if( gStopOnError )
187                     {
188                         gSkipRestOfTests = true;
189                         return error;
190                     }
191                 }
192             }
193         }
194 
195         if( gTestFloat )
196         {
197             int testFastRelaxedTmp = gTestFastRelaxed;
198             gTestFastRelaxed = 0;
199 
200             gTestCount++;
201             vlog( "%3d: ", gTestCount );
202             if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata )  )
203             {
204                 gFailCount++;
205                 error++;
206                 if( gStopOnError )
207                 {
208                     gTestFastRelaxed = testFastRelaxedTmp;
209                     gSkipRestOfTests = true;
210                     return error;
211                 }
212             }
213             gTestFastRelaxed = testFastRelaxedTmp;
214         }
215 
216         if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p )
217         {
218             //Disable fast-relaxed-math for double precision floating-point
219             int testFastRelaxedTmp = gTestFastRelaxed;
220             gTestFastRelaxed = 0;
221 
222             gTestCount++;
223             vlog( "%3d: ", gTestCount );
224             if( func_data->vtbl_ptr->DoubleTestFunc( func_data, gMTdata )  )
225             {
226                 gFailCount++;
227                 error++;
228                 if( gStopOnError )
229                 {
230                     gTestFastRelaxed = testFastRelaxedTmp;
231                     gSkipRestOfTests = true;
232                     return error;
233                 }
234             }
235 
236             //Re-enable testing fast-relaxed-math mode
237             gTestFastRelaxed = testFastRelaxedTmp;
238         }
239     }
240 
241     return error;
242 }
243 
test_acos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)244 int test_acos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
245 {
246     return doTest( "acos" );
247 }
test_acosh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)248 int test_acosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
249 {
250     return doTest( "acosh" );
251 }
test_acospi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)252 int test_acospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
253 {
254     return doTest( "acospi" );
255 }
test_asin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)256 int test_asin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
257 {
258     return doTest( "asin" );
259 }
test_asinh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)260 int test_asinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
261 {
262     return doTest( "asinh" );
263 }
test_asinpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)264 int test_asinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
265 {
266     return doTest( "asinpi" );
267 }
test_atan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)268 int test_atan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
269 {
270     return doTest( "atan" );
271 }
test_atanh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)272 int test_atanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
273 {
274     return doTest( "atanh" );
275 }
test_atanpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)276 int test_atanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
277 {
278     return doTest( "atanpi" );
279 }
test_atan2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)280 int test_atan2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
281 {
282     return doTest( "atan2" );
283 }
test_atan2pi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)284 int test_atan2pi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
285 {
286     return doTest( "atan2pi" );
287 }
test_cbrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)288 int test_cbrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
289 {
290     return doTest( "cbrt" );
291 }
test_ceil(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)292 int test_ceil( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
293 {
294     return doTest( "ceil" );
295 }
test_copysign(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)296 int test_copysign( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
297 {
298     return doTest( "copysign" );
299 }
test_cos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)300 int test_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
301 {
302     return doTest( "cos" );
303 }
test_cosh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)304 int test_cosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
305 {
306     return doTest( "cosh" );
307 }
test_cospi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)308 int test_cospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
309 {
310     return doTest( "cospi" );
311 }
test_exp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)312 int test_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
313 {
314     return doTest( "exp" );
315 }
test_exp2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)316 int test_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
317 {
318     return doTest( "exp2" );
319 }
test_exp10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)320 int test_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
321 {
322     return doTest( "exp10" );
323 }
test_expm1(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)324 int test_expm1( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
325 {
326     return doTest( "expm1" );
327 }
test_fabs(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)328 int test_fabs( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
329 {
330     return doTest( "fabs" );
331 }
test_fdim(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)332 int test_fdim( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
333 {
334     return doTest( "fdim" );
335 }
test_floor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)336 int test_floor( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
337 {
338     return doTest( "floor" );
339 }
test_fma(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)340 int test_fma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
341 {
342     return doTest( "fma" );
343 }
test_fmax(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)344 int test_fmax( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
345 {
346     return doTest( "fmax" );
347 }
test_fmin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)348 int test_fmin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
349 {
350     return doTest( "fmin" );
351 }
test_fmod(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)352 int test_fmod( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
353 {
354     return doTest( "fmod" );
355 }
test_fract(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)356 int test_fract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
357 {
358     return doTest( "fract" );
359 }
test_frexp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)360 int test_frexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
361 {
362     return doTest( "frexp" );
363 }
test_hypot(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)364 int test_hypot( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
365 {
366     return doTest( "hypot" );
367 }
test_ilogb(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)368 int test_ilogb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
369 {
370     return doTest( "ilogb" );
371 }
test_isequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)372 int test_isequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
373 {
374     return doTest( "isequal" );
375 }
test_isfinite(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)376 int test_isfinite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
377 {
378     return doTest( "isfinite" );
379 }
test_isgreater(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)380 int test_isgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
381 {
382     return doTest( "isgreater" );
383 }
test_isgreaterequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)384 int test_isgreaterequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
385 {
386     return doTest( "isgreaterequal" );
387 }
test_isinf(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)388 int test_isinf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
389 {
390     return doTest( "isinf" );
391 }
test_isless(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)392 int test_isless( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
393 {
394     return doTest( "isless" );
395 }
test_islessequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)396 int test_islessequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
397 {
398     return doTest( "islessequal" );
399 }
test_islessgreater(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)400 int test_islessgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
401 {
402     return doTest( "islessgreater" );
403 }
test_isnan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)404 int test_isnan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
405 {
406     return doTest( "isnan" );
407 }
test_isnormal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)408 int test_isnormal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
409 {
410     return doTest( "isnormal" );
411 }
test_isnotequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)412 int test_isnotequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
413 {
414     return doTest( "isnotequal" );
415 }
test_isordered(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)416 int test_isordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
417 {
418     return doTest( "isordered" );
419 }
test_isunordered(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)420 int test_isunordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
421 {
422     return doTest( "isunordered" );
423 }
test_ldexp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)424 int test_ldexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
425 {
426     return doTest( "ldexp" );
427 }
test_lgamma(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)428 int test_lgamma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
429 {
430     return doTest( "lgamma" );
431 }
test_lgamma_r(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)432 int test_lgamma_r( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
433 {
434     return doTest( "lgamma_r" );
435 }
test_log(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)436 int test_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
437 {
438     return doTest( "log" );
439 }
test_log2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)440 int test_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
441 {
442     return doTest( "log2" );
443 }
test_log10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)444 int test_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
445 {
446     return doTest( "log10" );
447 }
test_log1p(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)448 int test_log1p( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
449 {
450     return doTest( "log1p" );
451 }
test_logb(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)452 int test_logb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
453 {
454     return doTest( "logb" );
455 }
test_mad(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)456 int test_mad( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
457 {
458     return doTest( "mad" );
459 }
test_maxmag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)460 int test_maxmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
461 {
462     return doTest( "maxmag" );
463 }
test_minmag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)464 int test_minmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
465 {
466     return doTest( "minmag" );
467 }
test_modf(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)468 int test_modf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
469 {
470     return doTest( "modf" );
471 }
test_nan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)472 int test_nan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
473 {
474     return doTest( "nan" );
475 }
test_nextafter(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)476 int test_nextafter( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
477 {
478     return doTest( "nextafter" );
479 }
test_pow(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)480 int test_pow( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
481 {
482     return doTest( "pow" );
483 }
test_pown(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)484 int test_pown( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
485 {
486     return doTest( "pown" );
487 }
test_powr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)488 int test_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
489 {
490     return doTest( "powr" );
491 }
test_remainder(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)492 int test_remainder( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
493 {
494     return doTest( "remainder" );
495 }
test_remquo(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)496 int test_remquo( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
497 {
498     return doTest( "remquo" );
499 }
test_rint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)500 int test_rint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
501 {
502     return doTest( "rint" );
503 }
test_rootn(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)504 int test_rootn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
505 {
506     return doTest( "rootn" );
507 }
test_round(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)508 int test_round( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
509 {
510     return doTest( "round" );
511 }
test_rsqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)512 int test_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
513 {
514     return doTest( "rsqrt" );
515 }
test_signbit(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)516 int test_signbit( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
517 {
518     return doTest( "signbit" );
519 }
test_sin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)520 int test_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
521 {
522     return doTest( "sin" );
523 }
test_sincos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)524 int test_sincos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
525 {
526     return doTest( "sincos" );
527 }
test_sinh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)528 int test_sinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
529 {
530     return doTest( "sinh" );
531 }
test_sinpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)532 int test_sinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
533 {
534     return doTest( "sinpi" );
535 }
test_sqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)536 int test_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
537 {
538     return doTest( "sqrt" );
539 }
test_sqrt_cr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)540 int test_sqrt_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
541 {
542     return doTest( "sqrt_cr" );
543 }
test_tan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)544 int test_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
545 {
546     return doTest( "tan" );
547 }
test_tanh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)548 int test_tanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
549 {
550     return doTest( "tanh" );
551 }
test_tanpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)552 int test_tanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
553 {
554     return doTest( "tanpi" );
555 }
test_trunc(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)556 int test_trunc( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
557 {
558     return doTest( "trunc" );
559 }
test_half_cos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)560 int test_half_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
561 {
562     return doTest( "half_cos" );
563 }
test_half_divide(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)564 int test_half_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
565 {
566     return doTest( "half_divide" );
567 }
test_half_exp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)568 int test_half_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
569 {
570     return doTest( "half_exp" );
571 }
test_half_exp2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)572 int test_half_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
573 {
574     return doTest( "half_exp2" );
575 }
test_half_exp10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)576 int test_half_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
577 {
578     return doTest( "half_exp10" );
579 }
test_half_log(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)580 int test_half_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
581 {
582     return doTest( "half_log" );
583 }
test_half_log2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)584 int test_half_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
585 {
586     return doTest( "half_log2" );
587 }
test_half_log10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)588 int test_half_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
589 {
590     return doTest( "half_log10" );
591 }
test_half_powr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)592 int test_half_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
593 {
594     return doTest( "half_powr" );
595 }
test_half_recip(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)596 int test_half_recip( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
597 {
598     return doTest( "half_recip" );
599 }
test_half_rsqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)600 int test_half_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
601 {
602     return doTest( "half_rsqrt" );
603 }
test_half_sin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)604 int test_half_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
605 {
606     return doTest( "half_sin" );
607 }
test_half_sqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)608 int test_half_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
609 {
610     return doTest( "half_sqrt" );
611 }
test_half_tan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)612 int test_half_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
613 {
614     return doTest( "half_tan" );
615 }
test_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)616 int test_add( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
617 {
618     return doTest( "add" );
619 }
test_subtract(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)620 int test_subtract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
621 {
622     return doTest( "subtract" );
623 }
test_divide(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)624 int test_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
625 {
626     return doTest( "divide" );
627 }
test_divide_cr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)628 int test_divide_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
629 {
630     return doTest( "divide_cr" );
631 }
test_multiply(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)632 int test_multiply( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
633 {
634     return doTest( "multiply" );
635 }
test_assignment(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)636 int test_assignment( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
637 {
638     return doTest( "assignment" );
639 }
test_not(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)640 int test_not( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
641 {
642     return doTest( "not" );
643 }
644 
645 test_definition test_list[] = {
646     ADD_TEST( acos ),
647     ADD_TEST( acosh ),
648     ADD_TEST( acospi ),
649     ADD_TEST( asin ),
650     ADD_TEST( asinh ),
651     ADD_TEST( asinpi ),
652     ADD_TEST( atan ),
653     ADD_TEST( atanh ),
654     ADD_TEST( atanpi ),
655     ADD_TEST( atan2 ),
656     ADD_TEST( atan2pi ),
657     ADD_TEST( cbrt ),
658     ADD_TEST( ceil ),
659     ADD_TEST( copysign ),
660     ADD_TEST( cos ),
661     ADD_TEST( cosh ),
662     ADD_TEST( cospi ),
663     ADD_TEST( exp ),
664     ADD_TEST( exp2 ),
665     ADD_TEST( exp10 ),
666     ADD_TEST( expm1 ),
667     ADD_TEST( fabs ),
668     ADD_TEST( fdim ),
669     ADD_TEST( floor ),
670     ADD_TEST( fma ),
671     ADD_TEST( fmax ),
672     ADD_TEST( fmin ),
673     ADD_TEST( fmod ),
674     ADD_TEST( fract ),
675     ADD_TEST( frexp ),
676     ADD_TEST( hypot ),
677     ADD_TEST( ilogb ),
678     ADD_TEST( isequal ),
679     ADD_TEST( isfinite ),
680     ADD_TEST( isgreater ),
681     ADD_TEST( isgreaterequal ),
682     ADD_TEST( isinf ),
683     ADD_TEST( isless ),
684     ADD_TEST( islessequal ),
685     ADD_TEST( islessgreater ),
686     ADD_TEST( isnan ),
687     ADD_TEST( isnormal ),
688     ADD_TEST( isnotequal ),
689     ADD_TEST( isordered ),
690     ADD_TEST( isunordered ),
691     ADD_TEST( ldexp ),
692     ADD_TEST( lgamma ),
693     ADD_TEST( lgamma_r ),
694     ADD_TEST( log ),
695     ADD_TEST( log2 ),
696     ADD_TEST( log10 ),
697     ADD_TEST( log1p ),
698     ADD_TEST( logb ),
699     ADD_TEST( mad ),
700     ADD_TEST( maxmag ),
701     ADD_TEST( minmag ),
702     ADD_TEST( modf ),
703     ADD_TEST( nan ),
704     ADD_TEST( nextafter ),
705     ADD_TEST( pow ),
706     ADD_TEST( pown ),
707     ADD_TEST( powr ),
708     ADD_TEST( remainder ),
709     ADD_TEST( remquo ),
710     ADD_TEST( rint ),
711     ADD_TEST( rootn ),
712     ADD_TEST( round ),
713     ADD_TEST( rsqrt ),
714     ADD_TEST( signbit ),
715     ADD_TEST( sin ),
716     ADD_TEST( sincos ),
717     ADD_TEST( sinh ),
718     ADD_TEST( sinpi ),
719     ADD_TEST( sqrt ),
720     ADD_TEST( sqrt_cr ),
721     ADD_TEST( tan ),
722     ADD_TEST( tanh ),
723     ADD_TEST( tanpi ),
724     ADD_TEST( trunc ),
725     ADD_TEST( half_cos ),
726     ADD_TEST( half_divide ),
727     ADD_TEST( half_exp ),
728     ADD_TEST( half_exp2 ),
729     ADD_TEST( half_exp10 ),
730     ADD_TEST( half_log ),
731     ADD_TEST( half_log2 ),
732     ADD_TEST( half_log10 ),
733     ADD_TEST( half_powr ),
734     ADD_TEST( half_recip ),
735     ADD_TEST( half_rsqrt ),
736     ADD_TEST( half_sin ),
737     ADD_TEST( half_sqrt ),
738     ADD_TEST( half_tan ),
739     ADD_TEST( add ),
740     ADD_TEST( subtract ),
741     ADD_TEST( divide ),
742     ADD_TEST( divide_cr ),
743     ADD_TEST( multiply ),
744     ADD_TEST( assignment ),
745     ADD_TEST( not ),
746 };
747 
748 const int test_num = ARRAY_SIZE( test_list );
749 
750 #pragma mark -
751 
main(int argc,const char * argv[])752 int main (int argc, const char * argv[])
753 {
754     int error;
755 
756     argc = parseCustomParam(argc, argv);
757     if (argc == -1)
758     {
759         return -1;
760     }
761 
762 #if defined( __APPLE__ )
763     struct timeval startTime;
764     gettimeofday( &startTime, NULL );
765 #endif
766 
767     error = ParseArgs( argc, argv );
768     if( error )
769         return error;
770 
771     // This takes a while, so prevent the machine from going to sleep.
772     PreventSleep();
773     atexit( ResumeSleep );
774 
775     if( gSkipCorrectnessTesting )
776         vlog( "*** Skipping correctness testing! ***\n\n" );
777     else if( gStopOnError )
778         vlog( "Stopping at first error.\n" );
779 
780     if( gMeasureTimes )
781     {
782         vlog( "%s times are reported at right (cycles per element):\n", method[gReportAverageTimes] );
783         vlog( "\n" );
784         if( gSkipCorrectnessTesting )
785             vlog( "   \t               ");
786         else
787             vlog( "   \t                                        ");
788         if( gWimpyMode )
789             vlog( "   " );
790         for( int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
791             vlog( "\t  float%s", sizeNames[i] );
792     }
793     else
794     {
795         vlog( "   \t                                        ");
796         if( gWimpyMode )
797             vlog( "   " );
798     }
799     if( ! gSkipCorrectnessTesting )
800         vlog( "\t  max_ulps" );
801 
802     vlog( "\n-----------------------------------------------------------------------------------------------------------\n" );
803 
804     gMTdata = init_genrand( gRandomSeed );
805     if( gEndTestNumber == 0 )
806     {
807         gEndTestNumber = functionListCount;
808     }
809 
810     FPU_mode_type oldMode;
811     DisableFTZ( &oldMode );
812 
813     int ret = runTestHarnessWithCheck( gTestNameCount, gTestNames, test_num, test_list, true, 0, InitCL );
814 
815     RestoreFPState( &oldMode );
816 
817     free_mtdata(gMTdata);
818     free(gTestNames);
819 
820     int error_code = clFinish(gQueue);
821     if (error_code)
822         vlog_error("clFinish failed:%d\n", error_code);
823 
824     ReleaseCL();
825 
826 #if defined( __APPLE__ )
827     struct timeval endTime;
828     gettimeofday( &endTime, NULL );
829     double time = (double) endTime.tv_sec - (double) startTime.tv_sec;
830     time += 1e-6 * ((double) endTime.tv_usec - (double) startTime.tv_usec);
831     vlog( "time: %f s\n", time );
832 #endif
833 
834     return ret;
835 }
836 
ParseArgs(int argc,const char ** argv)837 static int ParseArgs( int argc, const char **argv )
838 {
839     int i;
840     gTestNames = (const char**) calloc( argc - 1, sizeof( char*) );
841     if( NULL == gTestNames )
842     {
843         vlog( "Failed to allocate memory for gTestNames array.\n" );
844         return 1;
845     }
846     gTestNames[0] = argv[0];
847     gTestNameCount = 1;
848     int singleThreaded = 0;
849 
850     { // Extract the app name
851         strncpy( appName, argv[0], MAXPATHLEN );
852 
853 #if defined( __APPLE__ )
854         char baseName[MAXPATHLEN];
855         char *base = NULL;
856         strncpy( baseName, argv[0], MAXPATHLEN );
857         base = basename( baseName );
858         if( NULL != base )
859         {
860             strncpy( appName, base, sizeof( appName )  );
861             appName[ sizeof( appName ) -1 ] = '\0';
862         }
863 #endif
864     }
865 
866     vlog( "\n%s\t", appName );
867     for( i = 1; i < argc; i++ )
868     {
869         const char *arg = argv[i];
870         if( NULL == arg )
871             break;
872 
873         vlog( "\t%s", arg );
874         int optionFound = 0;
875         if( arg[0] == '-' )
876         {
877             while( arg[1] != '\0' )
878             {
879                 arg++;
880                 optionFound = 1;
881                 switch( *arg )
882                 {
883                     case 'a':
884                         gReportAverageTimes ^= 1;
885                         break;
886 
887                     case 'c':
888                         gToggleCorrectlyRoundedDivideSqrt ^= 1;
889                         break;
890 
891                     case 'd':
892                         gHasDouble ^= 1;
893                         break;
894 
895                     case 'e':
896                         gFastRelaxedDerived ^= 1;
897                         break;
898 
899                     case 'f':
900                         gTestFloat ^= 1;
901                         break;
902 
903                     case 'h':
904                         PrintUsage();
905                         return -1;
906 
907                     case 'p':
908                       PrintFunctions();
909                       return -1;
910 
911                     case 'l':
912                         gSkipCorrectnessTesting ^= 1;
913                         break;
914 
915                     case 'm':
916                         singleThreaded ^= 1;
917                         break;
918 
919                     case 'r':
920                         gTestFastRelaxed ^= 1;
921                         break;
922 
923                     case 's':
924                         gStopOnError ^= 1;
925                         break;
926 
927                     case 't':
928                         gMeasureTimes ^= 1;
929                         break;
930 
931                     case 'v':
932                         gVerboseBruteForce ^= 1;
933                         break;
934 
935                     case 'w':   // wimpy mode
936                         gWimpyMode ^= 1;
937                         break;
938 
939                     case '[':
940                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
941                         break;
942 
943                     case 'z':
944                         gForceFTZ ^= 1;
945                         break;
946 
947                     case '1':
948                         if( arg[1] == '6' )
949                         {
950                             gMinVectorSizeIndex = 5;
951                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
952                             arg++;
953                         }
954                         else
955                         {
956                             gMinVectorSizeIndex = 0;
957                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
958                         }
959                         break;
960                     case '2':
961                             gMinVectorSizeIndex = 1;
962                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
963                             break;
964                     case '3':
965                             gMinVectorSizeIndex = 2;
966                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
967                             break;
968                     case '4':
969                             gMinVectorSizeIndex = 3;
970                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
971                             break;
972                     case '8':
973                             gMinVectorSizeIndex = 4;
974                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
975                             break;
976                         break;
977 
978                     default:
979                         vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
980                         PrintUsage();
981                         return -1;
982                 }
983             }
984         }
985 
986         if( ! optionFound )
987         {
988             char *t = NULL;
989             long number = strtol( arg, &t, 0 );
990             if( t != arg )
991             {
992                 if( 0 == gStartTestNumber )
993                     gStartTestNumber = (int32_t) number;
994                 else
995                     gEndTestNumber = gStartTestNumber + (int32_t) number;
996             }
997             else
998             {
999                 // Make sure this is a valid name
1000                 unsigned int k;
1001                 for (k=0; k<functionListCount; k++)
1002                 {
1003                     const Func *f = functionList+k;
1004                     if (strcmp(arg, f->name) == 0)
1005                     {
1006                         gTestNames[ gTestNameCount ] = arg;
1007                         gTestNameCount++;
1008                         break;
1009                     }
1010                 }
1011                 // If we didn't find it in the list of test names
1012                 if (k >= functionListCount)
1013                 {
1014                     gTestNames[gTestNameCount] = arg;
1015                     gTestNameCount++;
1016                 }
1017             }
1018         }
1019     }
1020 
1021     // Check for the wimpy mode environment variable
1022     if (getenv("CL_WIMPY_MODE")) {
1023       vlog( "\n" );
1024       vlog( "*** Detected CL_WIMPY_MODE env                          ***\n" );
1025       gWimpyMode = 1;
1026     }
1027 
1028     vlog( "\nTest binary built %s %s\n", __DATE__, __TIME__ );
1029 
1030     PrintArch();
1031 
1032     if( gWimpyMode )
1033     {
1034         vlog( "\n" );
1035         vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
1036         vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
1037         vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor );
1038     }
1039 
1040     if( singleThreaded )
1041         SetThreadCount(1);
1042 
1043     return 0;
1044 }
1045 
1046 
PrintFunctions(void)1047 static void PrintFunctions ( void )
1048 {
1049   vlog( "\nMath function names:\n" );
1050   for( int i = 0; i < functionListCount; i++ )
1051   {
1052     vlog( "\t%s\n", functionList[ i ].name );
1053   }
1054 }
1055 
PrintUsage(void)1056 static void PrintUsage( void )
1057 {
1058     vlog( "%s [-acglstz]: <optional: math function names>\n", appName );
1059     vlog( "\toptions:\n" );
1060     vlog( "\t\t-a\tReport average times instead of best times\n" );
1061     vlog( "\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: off)\n");
1062     vlog( "\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 on)\n" );
1063     vlog( "\t\t-f\tToggle float precision testing. (Default: on)\n" );
1064     vlog( "\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n" );
1065     vlog( "\t\t-e\tToggle test as derived implementations for fast relaxed math precision. (Default: on)\n" );
1066     vlog( "\t\t-h\tPrint this message and quit\n" );
1067     vlog( "\t\t-p\tPrint all math function names and quit\n" );
1068     vlog( "\t\t-l\tlink check only (make sure functions are present, skip accuracy checks.)\n" );
1069     vlog( "\t\t-m\tToggle run multi-threaded. (Default: on) )\n" );
1070     vlog( "\t\t-s\tStop on error\n" );
1071     vlog( "\t\t-t\tToggle timing  (on by default)\n" );
1072     vlog( "\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
1073     vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-10, default factor(%u)\n",gWimpyReductionFactor );
1074     vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
1075     vlog( "\t\t-v\tToggle Verbosity (Default: off)\n ");
1076     vlog( "\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, \"-16\" tests 16-wide vectors only.\n" );
1077     vlog( "\n\tYou may also pass a number instead of a function name.\n" );
1078     vlog( "\tThis causes the first N tests to be skipped. The tests are numbered.\n" );
1079     vlog( "\tIf you pass a second number, that is the number tests to run after the first one.\n" );
1080     vlog( "\tA name list may be used in conjunction with a number range. In that case,\n" );
1081     vlog( "\tonly the named cases in the number range will run.\n" );
1082     vlog( "\tYou may also choose to pass no arguments, in which case all tests will be run.\n" );
1083     vlog( "\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the device.\n" );
1084     vlog( "\n" );
1085 }
1086 
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)1087 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
1088 {
1089     vlog( "%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data );
1090 }
1091 
InitCL(cl_device_id device)1092 test_status InitCL( cl_device_id device )
1093 {
1094     int error;
1095     uint32_t i;
1096     size_t configSize = sizeof( gComputeDevices );
1097     cl_device_type device_type;
1098 
1099     error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL );
1100     if( error )
1101     {
1102         print_error( error, "Unable to get device type" );
1103         return TEST_FAIL;
1104     }
1105 
1106     gDevice = device;
1107 
1108 
1109     if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
1110         gComputeDevices = 1;
1111 
1112     // Check extensions
1113     if(is_extension_available(gDevice, "cl_khr_fp64"))
1114     {
1115         gHasDouble ^= 1;
1116 #if defined( CL_DEVICE_DOUBLE_FP_CONFIG )
1117         if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(gDoubleCapabilities), &gDoubleCapabilities, NULL)))
1118         {
1119             vlog_error( "ERROR: Unable to get device CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", error );
1120             return TEST_FAIL;
1121         }
1122 
1123         if( DOUBLE_REQUIRED_FEATURES != (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES) )
1124         {
1125             std::string list;
1126             if (0 == (gDoubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
1127             if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST) )
1128                 list += "CL_FP_ROUND_TO_NEAREST, ";
1129             if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO) )
1130                 list += "CL_FP_ROUND_TO_ZERO, ";
1131             if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF) )
1132                 list += "CL_FP_ROUND_TO_INF, ";
1133             if( 0 == (gDoubleCapabilities & CL_FP_INF_NAN) )
1134                 list += "CL_FP_INF_NAN, ";
1135             if( 0 == (gDoubleCapabilities & CL_FP_DENORM) )
1136                 list += "CL_FP_DENORM, ";
1137             vlog_error("ERROR: required double features are missing: %s\n",
1138                        list.c_str());
1139 
1140             return TEST_FAIL;
1141         }
1142 #else
1143         vlog_error( "FAIL: device says it supports cl_khr_fp64 but CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n" );
1144         return TEST_FAIL;
1145 #endif
1146     }
1147 
1148     configSize = sizeof( gDeviceFrequency );
1149     if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
1150         gDeviceFrequency = 0;
1151 
1152     if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL)))
1153     {
1154         vlog_error( "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", error );
1155         return TEST_FAIL;
1156     }
1157 
1158     gContext = clCreateContext( NULL, 1, &gDevice, bruteforce_notify_callback, NULL, &error );
1159     if( NULL == gContext || error )
1160     {
1161         vlog_error( "clCreateContext failed. (%d) \n", error );
1162         return TEST_FAIL;
1163     }
1164 
1165     gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
1166     if( NULL == gQueue || error )
1167     {
1168         vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
1169         return TEST_FAIL;
1170     }
1171 
1172 #if defined( __APPLE__ )
1173     // FIXME: use clProtectedArray
1174 #endif
1175     //Allocate buffers
1176     cl_uint min_alignment = 0;
1177     error = clGetDeviceInfo (gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), (void*)&min_alignment, NULL);
1178     if (CL_SUCCESS != error)
1179     {
1180         vlog_error( "clGetDeviceInfo failed. (%d)\n", error );
1181         return TEST_FAIL;
1182     }
1183     min_alignment >>= 3;    // convert bits to bytes
1184 
1185     gIn   = align_malloc( BUFFER_SIZE, min_alignment );
1186     if( NULL == gIn )
1187         return TEST_FAIL;
1188     gIn2   = align_malloc( BUFFER_SIZE, min_alignment );
1189     if( NULL == gIn2 )
1190         return TEST_FAIL;
1191     gIn3   = align_malloc( BUFFER_SIZE, min_alignment );
1192     if( NULL == gIn3 )
1193         return TEST_FAIL;
1194     gOut_Ref   = align_malloc( BUFFER_SIZE, min_alignment );
1195     if( NULL == gOut_Ref )
1196         return TEST_FAIL;
1197     gOut_Ref2   = align_malloc( BUFFER_SIZE, min_alignment );
1198     if( NULL == gOut_Ref2 )
1199         return TEST_FAIL;
1200 
1201     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1202     {
1203         gOut[i] = align_malloc( BUFFER_SIZE, min_alignment );
1204         if( NULL == gOut[i] )
1205             return TEST_FAIL;
1206         gOut2[i] = align_malloc( BUFFER_SIZE, min_alignment );
1207         if( NULL == gOut2[i] )
1208             return TEST_FAIL;
1209     }
1210 
1211     cl_mem_flags device_flags = CL_MEM_READ_ONLY;
1212     // save a copy on the host device to make this go faster
1213     if( CL_DEVICE_TYPE_CPU == device_type )
1214         device_flags |= CL_MEM_USE_HOST_PTR;
1215       else
1216           device_flags |= CL_MEM_COPY_HOST_PTR;
1217 
1218     // setup input buffers
1219     gInBuffer = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
1220     if( gInBuffer == NULL || error )
1221     {
1222         vlog_error( "clCreateBuffer1 failed for input (%d)\n", error );
1223         return TEST_FAIL;
1224     }
1225 
1226     gInBuffer2 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn2, &error );
1227     if( gInBuffer2 == NULL || error )
1228     {
1229         vlog_error( "clCreateArray2 failed for input (%d)\n" , error );
1230         return TEST_FAIL;
1231     }
1232 
1233     gInBuffer3 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn3, &error );
1234     if( gInBuffer3 == NULL  || error)
1235     {
1236         vlog_error( "clCreateArray3 failed for input (%d)\n", error );
1237         return TEST_FAIL;
1238     }
1239 
1240 
1241     // setup output buffers
1242     device_flags = CL_MEM_READ_WRITE;
1243     // save a copy on the host device to make this go faster
1244     if( CL_DEVICE_TYPE_CPU == device_type )
1245         device_flags |= CL_MEM_USE_HOST_PTR;
1246       else
1247           device_flags |= CL_MEM_COPY_HOST_PTR;
1248     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1249     {
1250         gOutBuffer[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut[i], &error );
1251         if( gOutBuffer[i] == NULL || error )
1252         {
1253             vlog_error( "clCreateArray failed for output (%d)\n", error  );
1254             return TEST_FAIL;
1255         }
1256         gOutBuffer2[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut2[i], &error );
1257         if( gOutBuffer2[i] == NULL || error)
1258         {
1259             vlog_error( "clCreateArray2 failed for output (%d)\n", error );
1260             return TEST_FAIL;
1261         }
1262     }
1263 
1264     // we are embedded, check current rounding mode
1265     if( gIsEmbedded )
1266     {
1267         gIsInRTZMode = IsInRTZMode();
1268     }
1269 
1270     //Check tininess detection
1271     IsTininessDetectedBeforeRounding();
1272 
1273     cl_platform_id platform;
1274     int err = clGetPlatformIDs(1, &platform, NULL);
1275     if( err )
1276     {
1277         print_error(err, "clGetPlatformIDs failed");
1278         return TEST_FAIL;
1279     }
1280 
1281     char c[1024];
1282     static const char *no_yes[] = { "NO", "YES" };
1283     vlog( "\nCompute Device info:\n" );
1284     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
1285     vlog( "\tPlatform Version: %s\n", c );
1286     clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
1287     vlog( "\tDevice Name: %s\n", c );
1288     clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
1289     vlog( "\tVendor: %s\n", c );
1290     clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
1291     vlog( "\tDevice Version: %s\n", c );
1292     clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
1293     vlog( "\tCL C Version: %s\n", c );
1294     clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
1295     vlog( "\tDriver Version: %s\n", c );
1296     vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
1297     vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)] );
1298     vlog( "\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
1299     if( gToggleCorrectlyRoundedDivideSqrt )
1300     {
1301         gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
1302     }
1303     vlog( "\tTesting with correctly rounded float divide and sqrt? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
1304     vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)] );
1305     vlog( "\tTesting single precision? %s\n", no_yes[0 != gTestFloat] );
1306     vlog( "\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed] );
1307     if(gTestFastRelaxed)
1308     {
1309       vlog( "\tFast relaxed math has derived implementations? %s\n", no_yes[0 != gFastRelaxedDerived] );
1310     }
1311     vlog( "\tTesting double precision? %s\n", no_yes[0 != gHasDouble] );
1312     if( sizeof( long double) == sizeof( double ) && gHasDouble )
1313     {
1314         vlog( "\n\t\tWARNING: Host system long double does not have better precision than double!\n" );
1315         vlog( "\t\t         All double results that do not match the reference result have their reported\n" );
1316         vlog( "\t\t         error inflated by 0.5 ulps to account for the fact that this system\n" );
1317         vlog( "\t\t         can not accurately represent the right result to an accuracy closer\n" );
1318         vlog( "\t\t         than half an ulp. See comments in Bruteforce_Ulp_Error_Double() for more details.\n\n" );
1319     }
1320 
1321     vlog( "\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded] );
1322     if( gIsEmbedded )
1323         vlog( "\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode] );
1324     vlog( "\tTininess is detected before rounding? %s\n", no_yes[0 != gCheckTininessBeforeRounding] );
1325     vlog( "\tWorker threads: %d\n", GetThreadCount() );
1326     vlog( "\tTesting vector sizes:" );
1327     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1328         vlog( "\t%d", sizeValues[i] );
1329 
1330     vlog("\n");
1331     vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
1332     vlog( "\n\n" );
1333 
1334     // Check to see if we are using single threaded mode on other than a 1.0 device
1335     if (getenv( "CL_TEST_SINGLE_THREADED" )) {
1336 
1337       char device_version[1024] = { 0 };
1338       clGetDeviceInfo( gDevice, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
1339 
1340       if (strcmp("OpenCL 1.0 ",device_version)) {
1341         vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
1342       }
1343     }
1344 
1345     return TEST_PASS;
1346 }
1347 
ReleaseCL(void)1348 static void ReleaseCL( void )
1349 {
1350     uint32_t i;
1351     clReleaseMemObject(gInBuffer);
1352     clReleaseMemObject(gInBuffer2);
1353     clReleaseMemObject(gInBuffer3);
1354     for ( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) {
1355         clReleaseMemObject(gOutBuffer[i]);
1356         clReleaseMemObject(gOutBuffer2[i]);
1357     }
1358     clReleaseCommandQueue(gQueue);
1359     clReleaseContext(gContext);
1360 
1361     align_free(gIn);
1362     align_free(gIn2);
1363     align_free(gIn3);
1364     align_free(gOut_Ref);
1365     align_free(gOut_Ref2);
1366 
1367     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1368     {
1369         align_free(gOut[i]);
1370         align_free(gOut2[i]);
1371     }
1372 }
1373 
_LogBuildError(cl_program p,int line,const char * file)1374 void _LogBuildError( cl_program p, int line, const char *file )
1375 {
1376     char the_log[2048] = "";
1377 
1378     vlog_error( "%s:%d: Build Log:\n", file, line );
1379     if( 0 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(the_log), the_log, NULL) )
1380         vlog_error( "%s", the_log );
1381     else
1382         vlog_error( "*** Error getting build log for program %p\n", p );
1383 }
1384 
InitILogbConstants(void)1385 int InitILogbConstants( void )
1386 {
1387     int error;
1388     const char *kernel =
1389     "__kernel void GetILogBConstants( __global int *out )\n"
1390     "{\n"
1391     "   out[0] = FP_ILOGB0;\n"
1392     "   out[1] = FP_ILOGBNAN;\n"
1393     "}\n";
1394 
1395     cl_program query;
1396     error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
1397     if (NULL == query || error)
1398     {
1399         vlog_error( "Error: Unable to create program to get FP_ILOGB0 and FP_ILOGBNAN for the device. (%d)", error );
1400         return error;
1401     }
1402 
1403     cl_kernel k = clCreateKernel( query, "GetILogBConstants", &error );
1404     if( NULL == k || error)
1405     {
1406       vlog_error( "Error: Unable to create kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
1407         return error;
1408     }
1409 
1410     if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
1411     {
1412         vlog_error( "Error: Unable to set kernel arg to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
1413         return error;
1414     }
1415 
1416     size_t dim = 1;
1417     if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
1418     {
1419         vlog_error( "Error: Unable to execute kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
1420         return error;
1421     }
1422 
1423     struct{ cl_int ilogb0, ilogbnan; }data;
1424     if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
1425     {
1426         vlog_error( "Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the device. Err = %d", error );
1427         return error;
1428     }
1429 
1430     gDeviceILogb0 = data.ilogb0;
1431     gDeviceILogbNaN = data.ilogbnan;
1432 
1433     clReleaseKernel(k);
1434     clReleaseProgram(query);
1435 
1436     return 0;
1437 }
1438 
IsTininessDetectedBeforeRounding(void)1439 int IsTininessDetectedBeforeRounding( void )
1440 {
1441     int error;
1442     const char *kernel =
1443     "__kernel void IsTininessDetectedBeforeRounding( __global float *out )\n"
1444     "{\n"
1445     "   volatile float a = 0x1.000002p-126f;\n"
1446     "   volatile float b = 0x1.fffffcp-1f;\n"       // product is 0x1.fffffffffff8p-127
1447     "   out[0] = a * b;\n"
1448     "}\n";
1449 
1450     cl_program query;
1451     error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
1452     if (error != CL_SUCCESS) {
1453         vlog_error( "Error: Unable to create program to detect how tininess is detected for the device. (%d)", error );
1454         return error;
1455     }
1456 
1457     cl_kernel k = clCreateKernel( query, "IsTininessDetectedBeforeRounding", &error );
1458     if( NULL == k || error)
1459     {
1460       vlog_error( "Error: Unable to create kernel to detect how tininess is detected  for the device. Err = %d", error );
1461         return error;
1462     }
1463 
1464     if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
1465     {
1466         vlog_error( "Error: Unable to set kernel arg to detect how tininess is detected  for the device. Err = %d", error );
1467         return error;
1468     }
1469 
1470     size_t dim = 1;
1471     if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
1472     {
1473         vlog_error( "Error: Unable to execute kernel to detect how tininess is detected  for the device. Err = %d", error );
1474         return error;
1475     }
1476 
1477     struct{ cl_uint f; }data;
1478     if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
1479     {
1480         vlog_error( "Error: unable to read result from tininess test from the device. Err = %d", error );
1481         return error;
1482     }
1483 
1484     gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1485 
1486     clReleaseKernel(k);
1487     clReleaseProgram(query);
1488 
1489     return 0;
1490 }
1491 
1492 
MakeKernel(const char ** c,cl_uint count,const char * name,cl_kernel * k,cl_program * p)1493 int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p )
1494 {
1495     int error = 0;
1496     char options[200] = "";
1497 
1498     if( gForceFTZ )
1499     {
1500       strcat(options," -cl-denorms-are-zero");
1501     }
1502 
1503     if( gTestFastRelaxed )
1504     {
1505       strcat(options, " -cl-fast-relaxed-math");
1506     }
1507 
1508     error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1509     if (error != CL_SUCCESS)
1510     {
1511         vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
1512         return error;
1513     }
1514 
1515     *k = clCreateKernel( *p, name, &error );
1516     if( NULL == *k || error )
1517     {
1518         char    buffer[2048] = "";
1519 
1520         vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1521         clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
1522         vlog_error("Log: %s\n", buffer);
1523         clReleaseProgram( *p );
1524         return error;
1525     }
1526 
1527     return error;
1528 }
1529 
MakeKernels(const char ** c,cl_uint count,const char * name,cl_uint kernel_count,cl_kernel * k,cl_program * p)1530 int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p )
1531 {
1532     int error = 0;
1533     cl_uint i;
1534     char options[200] = "";
1535 
1536     if (gForceFTZ)
1537     {
1538       strcat(options," -cl-denorms-are-zero ");
1539     }
1540 
1541     if( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT )
1542     {
1543       strcat(options," -cl-fp32-correctly-rounded-divide-sqrt ");
1544     }
1545 
1546     if( gTestFastRelaxed )
1547     {
1548       strcat(options, " -cl-fast-relaxed-math");
1549     }
1550 
1551     error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1552     if ( error != CL_SUCCESS )
1553     {
1554         vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
1555         return error;
1556     }
1557 
1558 
1559     memset( k, 0, kernel_count * sizeof( *k) );
1560     for( i = 0; i< kernel_count; i++ )
1561     {
1562         k[i] = clCreateKernel( *p, name, &error );
1563         if( NULL == k[i]|| error )
1564         {
1565             char    buffer[2048] = "";
1566 
1567             vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1568             clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
1569             vlog_error("Log: %s\n", buffer);
1570             clReleaseProgram( *p );
1571             return error;
1572         }
1573     }
1574 
1575     return error;
1576 }
1577 
1578 
IsInRTZMode(void)1579 static int IsInRTZMode( void )
1580 {
1581     int error;
1582     const char *kernel =
1583     "__kernel void GetRoundingMode( __global int *out )\n"
1584     "{\n"
1585     "   volatile float a = 0x1.0p23f;\n"
1586     "   volatile float b = -0x1.0p23f;\n"
1587     "   out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);\n"
1588     "}\n";
1589 
1590     cl_program query;
1591     error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
1592     if (error != CL_SUCCESS) {
1593         vlog_error( "Error: Unable to create program to detect RTZ mode for the device. (%d)", error );
1594         return error;
1595     }
1596 
1597     cl_kernel k = clCreateKernel( query, "GetRoundingMode", &error );
1598     if( NULL == k || error)
1599     {
1600         vlog_error( "Error: Unable to create kernel to gdetect RTZ mode for the device. Err = %d", error );
1601         return error;
1602     }
1603 
1604     if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
1605     {
1606         vlog_error( "Error: Unable to set kernel arg to detect RTZ mode for the device. Err = %d", error );
1607         return error;
1608     }
1609 
1610     size_t dim = 1;
1611     if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
1612     {
1613         vlog_error( "Error: Unable to execute kernel to detect RTZ mode for the device. Err = %d", error );
1614         return error;
1615     }
1616 
1617     struct{ cl_int isRTZ; }data;
1618     if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
1619     {
1620         vlog_error( "Error: unable to read RTZ mode data from the device. Err = %d", error );
1621         return error;
1622     }
1623 
1624     clReleaseKernel(k);
1625     clReleaseProgram(query);
1626 
1627     return data.isRTZ;
1628 }
1629 
1630 #pragma mark -
1631 
1632 const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1633 const int  sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1634 
1635 // TODO: There is another version of Ulp_Error_Double defined in test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1636 float Bruteforce_Ulp_Error_Double( double test, long double reference )
1637 {
1638 //Check for Non-power-of-two and NaN
1639 
1640   // Note: This function presumes that someone has already tested whether the result is correctly,
1641   // rounded before calling this function.  That test:
1642   //
1643   //    if( (float) reference == test )
1644   //        return 0.0f;
1645   //
1646   // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
1647   // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
1648   // results.
1649 
1650   // Deal with long double = double
1651   // On most systems long double is a higher precision type than double. They provide either
1652   // a 80-bit or greater floating point type, or they provide a head-tail double double format.
1653   // That is sufficient to represent the accuracy of a floating point result to many more bits
1654   // than double and we can calculate sub-ulp errors. This is the standard system for which this
1655   // test suite is designed.
1656   //
1657   // On some systems double and long double are the same thing. Then we run into a problem,
1658   // because our representation of the infinitely precise result (passed in as reference above)
1659   // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
1660   // reported error by half an ulp to take this into account.  A more correct and permanent fix
1661   // would be to undertake refactoring the reference code to return results in this format:
1662   //
1663   //    typedef struct DoubleReference
1664   //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
1665   //        double  correctlyRoundedResult;     // as best we can
1666   //        double  ulps;                       // plus a fractional amount to account for the difference
1667   //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
1668   //
1669   // This would provide a useful higher-than-double precision format for everyone that we can use,
1670   // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
1671   // that use a head to tail double double for long double.
1672 
1673     int x;
1674     long double testVal = test;
1675 
1676     // First, handle special reference values
1677     if (isinf(reference))
1678     {
1679     if (reference == testVal)
1680         return 0.0f;
1681 
1682     return INFINITY;
1683     }
1684 
1685     if (isnan(reference))
1686     {
1687     if (isnan(testVal))
1688         return 0.0f;
1689 
1690     return INFINITY;
1691     }
1692 
1693     if ( 0.0L != reference && 0.5L != frexpl(reference, &x) )
1694     { // Non-zero and Non-power of two
1695 
1696        // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
1697        // There is something of a performance optimization here.
1698         if( testVal == reference )
1699             return 0.0f;
1700 
1701         // The unbiased exponent of the ulp unit place
1702         int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
1703 
1704         // Scale the exponent of the error
1705         float result = (float) scalbnl( testVal - reference, ulp_exp );
1706 
1707         // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
1708         if( sizeof(long double) == sizeof( double ) )
1709             result += copysignf( 0.5f, result);
1710 
1711         return result;
1712     }
1713 
1714     // reference is a normal power of two or a zero
1715     // The unbiased exponent of the ulp unit place
1716     int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
1717 
1718    // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
1719    // There is something of a performance optimization here too.
1720     if( testVal == reference )
1721         return 0.0f;
1722 
1723     // Scale the exponent of the error
1724     float result = (float) scalbnl( testVal - reference, ulp_exp );
1725 
1726     // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
1727     if( sizeof(long double) == sizeof( double ) )
1728         result += copysignf( 0.5f, result);
1729 
1730     return result;
1731 }
1732 
Abs_Error(float test,double reference)1733 float Abs_Error( float test, double reference )
1734 {
1735   if( isnan(test) && isnan(reference) )
1736     return 0.0f;
1737   return fabs((float)(reference-(double)test));
1738 }
1739 
1740 /*
1741 #define HALF_MIN_EXP    -13
1742 #define HALF_MANT_DIG    11
1743 float Ulp_Error_Half( float test, double reference )
1744 {
1745     union{ double d; uint64_t u; }u;     u.d = reference;
1746 
1747   // Note: This function presumes that someone has already tested whether the result is correctly,
1748   // rounded before calling this function.  That test:
1749   //
1750   //    if( (float) reference == test )
1751   //        return 0.0f;
1752   //
1753   // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
1754   // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
1755   // results.
1756 
1757     double testVal = test;
1758     if( u.u & 0x000fffffffffffffULL )
1759     { // Non-power of two and NaN
1760         if( isnan( reference ) && isnan( test ) )
1761             return 0.0f;    // if we are expecting a NaN, any NaN is fine
1762 
1763         // The unbiased exponent of the ulp unit place
1764         int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
1765 
1766         // Scale the exponent of the error
1767         return (float) scalbn( testVal - reference, ulp_exp );
1768     }
1769 
1770     if( isinf( reference ) )
1771     {
1772         if( (double) test == reference )
1773             return 0.0f;
1774 
1775         return (float) (testVal - reference );
1776     }
1777 
1778     // reference is a normal power of two or a zero
1779     int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
1780 
1781     // Scale the exponent of the error
1782     return (float) scalbn( testVal - reference, ulp_exp );
1783 }
1784 */
1785 
1786 
1787 #if defined( __APPLE__ )
1788     #include <mach/mach_time.h>
1789 #endif
1790 
GetTime(void)1791 uint64_t GetTime( void )
1792 {
1793 #if defined( __APPLE__ )
1794     return mach_absolute_time();
1795 #elif defined(_WIN32) && defined(_MSC_VER)
1796     return  ReadTime();
1797 #else
1798     //mach_absolute_time is a high precision timer with precision < 1 microsecond.
1799     #warning need accurate clock here.  Times are invalid.
1800     return 0;
1801 #endif
1802 }
1803 
1804 
1805 #if defined(_WIN32) && defined (_MSC_VER)
1806 /* function is defined in "compat.h" */
1807 #else
SubtractTime(uint64_t endTime,uint64_t startTime)1808 double SubtractTime( uint64_t endTime, uint64_t startTime )
1809 {
1810     uint64_t diff = endTime - startTime;
1811     static double conversion = 0.0;
1812 
1813     if( 0.0 == conversion )
1814     {
1815 #if defined( __APPLE__ )
1816         mach_timebase_info_data_t info = {0,0};
1817         kern_return_t   err = mach_timebase_info( &info );
1818         if( 0 == err )
1819             conversion = 1e-9 * (double) info.numer / (double) info.denom;
1820 #else
1821     // This function consumes output from GetTime() above, and converts the time to secionds.
1822     #warning need accurate ticks to seconds conversion factor here. Times are invalid.
1823 #endif
1824     }
1825 
1826     // strictly speaking we should also be subtracting out timer latency here
1827     return conversion * (double) diff;
1828 }
1829 #endif
1830 
RoundUpToNextPowerOfTwo(cl_uint x)1831 cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
1832 {
1833     if( 0 == (x & (x-1)))
1834         return x;
1835 
1836     while( x & (x-1) )
1837         x &= x-1;
1838 
1839     return x+x;
1840 }
1841 
1842