1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "Utility.h"
17
18 #include <cstdio>
19 #include <cstdlib>
20 #include <string>
21 #include <time.h>
22 #include "FunctionList.h"
23 #include "Sleep.h"
24
25 #include "harness/errorHelpers.h"
26 #include "harness/kernelHelpers.h"
27 #include "harness/parseParameters.h"
28
29 #if defined( __APPLE__ )
30 #include <sys/sysctl.h>
31 #include <sys/mman.h>
32 #include <libgen.h>
33 #include <sys/time.h>
34 #elif defined( __linux__ )
35 #include <unistd.h>
36 #include <sys/syscall.h>
37 #include <linux/sysctl.h>
38 #include <sys/param.h>
39 #endif
40
41 #if defined (__linux__) || (defined WIN32 && defined __MINGW32__)
42 #include <sys/param.h>
43 #endif
44
45 #include "harness/testHarness.h"
46
47 #define kPageSize 4096
48 #define DOUBLE_REQUIRED_FEATURES ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM )
49
50 const char **gTestNames = NULL;
51 unsigned int gTestNameCount = 0;
52 char appName[ MAXPATHLEN ] = "";
53 cl_device_id gDevice = NULL;
54 cl_context gContext = NULL;
55 cl_command_queue gQueue = NULL;
56 static int32_t gStartTestNumber;
57 static int32_t gEndTestNumber;
58 int gSkipCorrectnessTesting = 0;
59 int gStopOnError = 0;
60 static bool gSkipRestOfTests;
61 #if defined( __APPLE__ )
62 int gMeasureTimes = 1;
63 #else
64 int gMeasureTimes = 0;
65 #endif
66 int gReportAverageTimes = 0;
67 int gForceFTZ = 0;
68 int gWimpyMode = 0;
69 int gHasDouble = 0;
70 int gTestFloat = 1;
71 //This flag should be 'ON' by default and it can be changed through the command line arguments.
72 volatile int gTestFastRelaxed = 1;
73 /*This flag corresponds to defining if the implementation has Derived Fast Relaxed functions.
74 The spec does not specify ULP for derived function. The derived functions are composed of base functions which are tested for ULP, thus when this flag is enabled,
75 Derived functions will not be tested for ULP, as per table 7.1 of OpenCL 2.0 spec.
76 Since there is no way of quering the device whether it is a derived or non-derived implementation according to OpenCL 2.0 spec then it has to be changed through a command line argument.
77 */
78 int gFastRelaxedDerived = 1;
79 int gToggleCorrectlyRoundedDivideSqrt = 0;
80 int gDeviceILogb0 = 1;
81 int gDeviceILogbNaN = 1;
82 int gCheckTininessBeforeRounding = 1;
83 int gIsInRTZMode = 0;
84 uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
85 uint32_t gMinVectorSizeIndex = 0;
86 const char *method[] = { "Best", "Average" };
87 void *gIn = NULL;
88 void *gIn2 = NULL;
89 void *gIn3 = NULL;
90 void *gOut_Ref = NULL;
91 void *gOut[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
92 void *gOut_Ref2 = NULL;
93 void *gOut2[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
94 cl_mem gInBuffer = NULL;
95 cl_mem gInBuffer2 = NULL;
96 cl_mem gInBuffer3 = NULL;
97 cl_mem gOutBuffer[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
98 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
99 uint32_t gComputeDevices = 0;
100 uint32_t gSimdSize = 1;
101 uint32_t gDeviceFrequency = 0;
102 static MTdata gMTdata;
103 cl_device_fp_config gFloatCapabilities = 0;
104 cl_device_fp_config gDoubleCapabilities = 0;
105 int gWimpyReductionFactor = 32;
106 int gWimpyBufferSize = BUFFER_SIZE;
107 int gVerboseBruteForce = 0;
108
109 static int ParseArgs( int argc, const char **argv );
110 static void PrintUsage( void );
111 static void PrintFunctions( void );
112 test_status InitCL( cl_device_id device );
113 static void ReleaseCL( void );
114 static int InitILogbConstants( void );
115 static int IsTininessDetectedBeforeRounding( void );
116 static int IsInRTZMode( void ); //expensive. Please check gIsInRTZMode global instead.
117
118
doTest(const char * name)119 int doTest( const char* name )
120 {
121 if( gSkipRestOfTests )
122 {
123 vlog( "Skipping function because of an earlier error.\n" );
124 return 1;
125 }
126
127 int error = 0;
128 const Func* func_data = NULL;
129
130 for( size_t i = 0; i < functionListCount; i++ )
131 {
132 const Func* const temp_func = functionList + i;
133 if( strcmp( temp_func->name, name ) == 0 )
134 {
135 if( i < gStartTestNumber || i > gEndTestNumber )
136 {
137 vlog( "Skipping function #%d\n", i );
138 return 0;
139 }
140
141 func_data = temp_func;
142 break;
143 }
144 }
145
146 if( func_data == NULL )
147 {
148 vlog( "Function '%s' doesn't exist!\n", name );
149 exit( EXIT_FAILURE );
150 }
151
152 if( func_data->func.p == NULL )
153 {
154 vlog( "'%s' is missing implementation, skipping function.\n", func_data->name );
155 return 0;
156 }
157
158 // if correctly rounded divide & sqrt are supported by the implementation
159 // then test it; otherwise skip the test
160 if( strcmp( func_data->name, "sqrt_cr" ) == 0 || strcmp( func_data->name, "divide_cr" ) == 0 )
161 {
162 if( ( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 )
163 {
164 vlog( "Correctly rounded divide and sqrt are not supported, skipping function.\n" );
165 return 0;
166 }
167 }
168
169 {
170 extern int my_ilogb(double);
171 if( 0 == strcmp( "ilogb", func_data->name ) )
172 {
173 InitILogbConstants();
174 }
175
176 if ( gTestFastRelaxed )
177 {
178 if( func_data->relaxed )
179 {
180 gTestCount++;
181 vlog( "%3d: ", gTestCount );
182 if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata ) )
183 {
184 gFailCount++;
185 error++;
186 if( gStopOnError )
187 {
188 gSkipRestOfTests = true;
189 return error;
190 }
191 }
192 }
193 }
194
195 if( gTestFloat )
196 {
197 int testFastRelaxedTmp = gTestFastRelaxed;
198 gTestFastRelaxed = 0;
199
200 gTestCount++;
201 vlog( "%3d: ", gTestCount );
202 if( func_data->vtbl_ptr->TestFunc( func_data, gMTdata ) )
203 {
204 gFailCount++;
205 error++;
206 if( gStopOnError )
207 {
208 gTestFastRelaxed = testFastRelaxedTmp;
209 gSkipRestOfTests = true;
210 return error;
211 }
212 }
213 gTestFastRelaxed = testFastRelaxedTmp;
214 }
215
216 if( gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc && NULL != func_data->dfunc.p )
217 {
218 //Disable fast-relaxed-math for double precision floating-point
219 int testFastRelaxedTmp = gTestFastRelaxed;
220 gTestFastRelaxed = 0;
221
222 gTestCount++;
223 vlog( "%3d: ", gTestCount );
224 if( func_data->vtbl_ptr->DoubleTestFunc( func_data, gMTdata ) )
225 {
226 gFailCount++;
227 error++;
228 if( gStopOnError )
229 {
230 gTestFastRelaxed = testFastRelaxedTmp;
231 gSkipRestOfTests = true;
232 return error;
233 }
234 }
235
236 //Re-enable testing fast-relaxed-math mode
237 gTestFastRelaxed = testFastRelaxedTmp;
238 }
239 }
240
241 return error;
242 }
243
test_acos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)244 int test_acos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
245 {
246 return doTest( "acos" );
247 }
test_acosh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)248 int test_acosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
249 {
250 return doTest( "acosh" );
251 }
test_acospi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)252 int test_acospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
253 {
254 return doTest( "acospi" );
255 }
test_asin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)256 int test_asin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
257 {
258 return doTest( "asin" );
259 }
test_asinh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)260 int test_asinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
261 {
262 return doTest( "asinh" );
263 }
test_asinpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)264 int test_asinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
265 {
266 return doTest( "asinpi" );
267 }
test_atan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)268 int test_atan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
269 {
270 return doTest( "atan" );
271 }
test_atanh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)272 int test_atanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
273 {
274 return doTest( "atanh" );
275 }
test_atanpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)276 int test_atanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
277 {
278 return doTest( "atanpi" );
279 }
test_atan2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)280 int test_atan2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
281 {
282 return doTest( "atan2" );
283 }
test_atan2pi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)284 int test_atan2pi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
285 {
286 return doTest( "atan2pi" );
287 }
test_cbrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)288 int test_cbrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
289 {
290 return doTest( "cbrt" );
291 }
test_ceil(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)292 int test_ceil( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
293 {
294 return doTest( "ceil" );
295 }
test_copysign(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)296 int test_copysign( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
297 {
298 return doTest( "copysign" );
299 }
test_cos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)300 int test_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
301 {
302 return doTest( "cos" );
303 }
test_cosh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)304 int test_cosh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
305 {
306 return doTest( "cosh" );
307 }
test_cospi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)308 int test_cospi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
309 {
310 return doTest( "cospi" );
311 }
test_exp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)312 int test_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
313 {
314 return doTest( "exp" );
315 }
test_exp2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)316 int test_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
317 {
318 return doTest( "exp2" );
319 }
test_exp10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)320 int test_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
321 {
322 return doTest( "exp10" );
323 }
test_expm1(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)324 int test_expm1( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
325 {
326 return doTest( "expm1" );
327 }
test_fabs(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)328 int test_fabs( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
329 {
330 return doTest( "fabs" );
331 }
test_fdim(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)332 int test_fdim( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
333 {
334 return doTest( "fdim" );
335 }
test_floor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)336 int test_floor( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
337 {
338 return doTest( "floor" );
339 }
test_fma(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)340 int test_fma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
341 {
342 return doTest( "fma" );
343 }
test_fmax(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)344 int test_fmax( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
345 {
346 return doTest( "fmax" );
347 }
test_fmin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)348 int test_fmin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
349 {
350 return doTest( "fmin" );
351 }
test_fmod(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)352 int test_fmod( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
353 {
354 return doTest( "fmod" );
355 }
test_fract(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)356 int test_fract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
357 {
358 return doTest( "fract" );
359 }
test_frexp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)360 int test_frexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
361 {
362 return doTest( "frexp" );
363 }
test_hypot(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)364 int test_hypot( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
365 {
366 return doTest( "hypot" );
367 }
test_ilogb(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)368 int test_ilogb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
369 {
370 return doTest( "ilogb" );
371 }
test_isequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)372 int test_isequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
373 {
374 return doTest( "isequal" );
375 }
test_isfinite(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)376 int test_isfinite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
377 {
378 return doTest( "isfinite" );
379 }
test_isgreater(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)380 int test_isgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
381 {
382 return doTest( "isgreater" );
383 }
test_isgreaterequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)384 int test_isgreaterequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
385 {
386 return doTest( "isgreaterequal" );
387 }
test_isinf(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)388 int test_isinf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
389 {
390 return doTest( "isinf" );
391 }
test_isless(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)392 int test_isless( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
393 {
394 return doTest( "isless" );
395 }
test_islessequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)396 int test_islessequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
397 {
398 return doTest( "islessequal" );
399 }
test_islessgreater(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)400 int test_islessgreater( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
401 {
402 return doTest( "islessgreater" );
403 }
test_isnan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)404 int test_isnan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
405 {
406 return doTest( "isnan" );
407 }
test_isnormal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)408 int test_isnormal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
409 {
410 return doTest( "isnormal" );
411 }
test_isnotequal(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)412 int test_isnotequal( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
413 {
414 return doTest( "isnotequal" );
415 }
test_isordered(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)416 int test_isordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
417 {
418 return doTest( "isordered" );
419 }
test_isunordered(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)420 int test_isunordered( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
421 {
422 return doTest( "isunordered" );
423 }
test_ldexp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)424 int test_ldexp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
425 {
426 return doTest( "ldexp" );
427 }
test_lgamma(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)428 int test_lgamma( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
429 {
430 return doTest( "lgamma" );
431 }
test_lgamma_r(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)432 int test_lgamma_r( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
433 {
434 return doTest( "lgamma_r" );
435 }
test_log(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)436 int test_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
437 {
438 return doTest( "log" );
439 }
test_log2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)440 int test_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
441 {
442 return doTest( "log2" );
443 }
test_log10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)444 int test_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
445 {
446 return doTest( "log10" );
447 }
test_log1p(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)448 int test_log1p( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
449 {
450 return doTest( "log1p" );
451 }
test_logb(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)452 int test_logb( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
453 {
454 return doTest( "logb" );
455 }
test_mad(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)456 int test_mad( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
457 {
458 return doTest( "mad" );
459 }
test_maxmag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)460 int test_maxmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
461 {
462 return doTest( "maxmag" );
463 }
test_minmag(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)464 int test_minmag( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
465 {
466 return doTest( "minmag" );
467 }
test_modf(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)468 int test_modf( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
469 {
470 return doTest( "modf" );
471 }
test_nan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)472 int test_nan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
473 {
474 return doTest( "nan" );
475 }
test_nextafter(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)476 int test_nextafter( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
477 {
478 return doTest( "nextafter" );
479 }
test_pow(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)480 int test_pow( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
481 {
482 return doTest( "pow" );
483 }
test_pown(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)484 int test_pown( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
485 {
486 return doTest( "pown" );
487 }
test_powr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)488 int test_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
489 {
490 return doTest( "powr" );
491 }
test_remainder(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)492 int test_remainder( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
493 {
494 return doTest( "remainder" );
495 }
test_remquo(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)496 int test_remquo( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
497 {
498 return doTest( "remquo" );
499 }
test_rint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)500 int test_rint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
501 {
502 return doTest( "rint" );
503 }
test_rootn(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)504 int test_rootn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
505 {
506 return doTest( "rootn" );
507 }
test_round(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)508 int test_round( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
509 {
510 return doTest( "round" );
511 }
test_rsqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)512 int test_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
513 {
514 return doTest( "rsqrt" );
515 }
test_signbit(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)516 int test_signbit( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
517 {
518 return doTest( "signbit" );
519 }
test_sin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)520 int test_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
521 {
522 return doTest( "sin" );
523 }
test_sincos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)524 int test_sincos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
525 {
526 return doTest( "sincos" );
527 }
test_sinh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)528 int test_sinh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
529 {
530 return doTest( "sinh" );
531 }
test_sinpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)532 int test_sinpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
533 {
534 return doTest( "sinpi" );
535 }
test_sqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)536 int test_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
537 {
538 return doTest( "sqrt" );
539 }
test_sqrt_cr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)540 int test_sqrt_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
541 {
542 return doTest( "sqrt_cr" );
543 }
test_tan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)544 int test_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
545 {
546 return doTest( "tan" );
547 }
test_tanh(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)548 int test_tanh( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
549 {
550 return doTest( "tanh" );
551 }
test_tanpi(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)552 int test_tanpi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
553 {
554 return doTest( "tanpi" );
555 }
test_trunc(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)556 int test_trunc( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
557 {
558 return doTest( "trunc" );
559 }
test_half_cos(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)560 int test_half_cos( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
561 {
562 return doTest( "half_cos" );
563 }
test_half_divide(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)564 int test_half_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
565 {
566 return doTest( "half_divide" );
567 }
test_half_exp(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)568 int test_half_exp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
569 {
570 return doTest( "half_exp" );
571 }
test_half_exp2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)572 int test_half_exp2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
573 {
574 return doTest( "half_exp2" );
575 }
test_half_exp10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)576 int test_half_exp10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
577 {
578 return doTest( "half_exp10" );
579 }
test_half_log(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)580 int test_half_log( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
581 {
582 return doTest( "half_log" );
583 }
test_half_log2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)584 int test_half_log2( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
585 {
586 return doTest( "half_log2" );
587 }
test_half_log10(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)588 int test_half_log10( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
589 {
590 return doTest( "half_log10" );
591 }
test_half_powr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)592 int test_half_powr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
593 {
594 return doTest( "half_powr" );
595 }
test_half_recip(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)596 int test_half_recip( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
597 {
598 return doTest( "half_recip" );
599 }
test_half_rsqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)600 int test_half_rsqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
601 {
602 return doTest( "half_rsqrt" );
603 }
test_half_sin(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)604 int test_half_sin( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
605 {
606 return doTest( "half_sin" );
607 }
test_half_sqrt(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)608 int test_half_sqrt( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
609 {
610 return doTest( "half_sqrt" );
611 }
test_half_tan(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)612 int test_half_tan( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
613 {
614 return doTest( "half_tan" );
615 }
test_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)616 int test_add( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
617 {
618 return doTest( "add" );
619 }
test_subtract(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)620 int test_subtract( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
621 {
622 return doTest( "subtract" );
623 }
test_divide(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)624 int test_divide( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
625 {
626 return doTest( "divide" );
627 }
test_divide_cr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)628 int test_divide_cr( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
629 {
630 return doTest( "divide_cr" );
631 }
test_multiply(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)632 int test_multiply( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
633 {
634 return doTest( "multiply" );
635 }
test_assignment(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)636 int test_assignment( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
637 {
638 return doTest( "assignment" );
639 }
test_not(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)640 int test_not( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
641 {
642 return doTest( "not" );
643 }
644
645 test_definition test_list[] = {
646 ADD_TEST( acos ),
647 ADD_TEST( acosh ),
648 ADD_TEST( acospi ),
649 ADD_TEST( asin ),
650 ADD_TEST( asinh ),
651 ADD_TEST( asinpi ),
652 ADD_TEST( atan ),
653 ADD_TEST( atanh ),
654 ADD_TEST( atanpi ),
655 ADD_TEST( atan2 ),
656 ADD_TEST( atan2pi ),
657 ADD_TEST( cbrt ),
658 ADD_TEST( ceil ),
659 ADD_TEST( copysign ),
660 ADD_TEST( cos ),
661 ADD_TEST( cosh ),
662 ADD_TEST( cospi ),
663 ADD_TEST( exp ),
664 ADD_TEST( exp2 ),
665 ADD_TEST( exp10 ),
666 ADD_TEST( expm1 ),
667 ADD_TEST( fabs ),
668 ADD_TEST( fdim ),
669 ADD_TEST( floor ),
670 ADD_TEST( fma ),
671 ADD_TEST( fmax ),
672 ADD_TEST( fmin ),
673 ADD_TEST( fmod ),
674 ADD_TEST( fract ),
675 ADD_TEST( frexp ),
676 ADD_TEST( hypot ),
677 ADD_TEST( ilogb ),
678 ADD_TEST( isequal ),
679 ADD_TEST( isfinite ),
680 ADD_TEST( isgreater ),
681 ADD_TEST( isgreaterequal ),
682 ADD_TEST( isinf ),
683 ADD_TEST( isless ),
684 ADD_TEST( islessequal ),
685 ADD_TEST( islessgreater ),
686 ADD_TEST( isnan ),
687 ADD_TEST( isnormal ),
688 ADD_TEST( isnotequal ),
689 ADD_TEST( isordered ),
690 ADD_TEST( isunordered ),
691 ADD_TEST( ldexp ),
692 ADD_TEST( lgamma ),
693 ADD_TEST( lgamma_r ),
694 ADD_TEST( log ),
695 ADD_TEST( log2 ),
696 ADD_TEST( log10 ),
697 ADD_TEST( log1p ),
698 ADD_TEST( logb ),
699 ADD_TEST( mad ),
700 ADD_TEST( maxmag ),
701 ADD_TEST( minmag ),
702 ADD_TEST( modf ),
703 ADD_TEST( nan ),
704 ADD_TEST( nextafter ),
705 ADD_TEST( pow ),
706 ADD_TEST( pown ),
707 ADD_TEST( powr ),
708 ADD_TEST( remainder ),
709 ADD_TEST( remquo ),
710 ADD_TEST( rint ),
711 ADD_TEST( rootn ),
712 ADD_TEST( round ),
713 ADD_TEST( rsqrt ),
714 ADD_TEST( signbit ),
715 ADD_TEST( sin ),
716 ADD_TEST( sincos ),
717 ADD_TEST( sinh ),
718 ADD_TEST( sinpi ),
719 ADD_TEST( sqrt ),
720 ADD_TEST( sqrt_cr ),
721 ADD_TEST( tan ),
722 ADD_TEST( tanh ),
723 ADD_TEST( tanpi ),
724 ADD_TEST( trunc ),
725 ADD_TEST( half_cos ),
726 ADD_TEST( half_divide ),
727 ADD_TEST( half_exp ),
728 ADD_TEST( half_exp2 ),
729 ADD_TEST( half_exp10 ),
730 ADD_TEST( half_log ),
731 ADD_TEST( half_log2 ),
732 ADD_TEST( half_log10 ),
733 ADD_TEST( half_powr ),
734 ADD_TEST( half_recip ),
735 ADD_TEST( half_rsqrt ),
736 ADD_TEST( half_sin ),
737 ADD_TEST( half_sqrt ),
738 ADD_TEST( half_tan ),
739 ADD_TEST( add ),
740 ADD_TEST( subtract ),
741 ADD_TEST( divide ),
742 ADD_TEST( divide_cr ),
743 ADD_TEST( multiply ),
744 ADD_TEST( assignment ),
745 ADD_TEST( not ),
746 };
747
748 const int test_num = ARRAY_SIZE( test_list );
749
750 #pragma mark -
751
main(int argc,const char * argv[])752 int main (int argc, const char * argv[])
753 {
754 int error;
755
756 argc = parseCustomParam(argc, argv);
757 if (argc == -1)
758 {
759 return -1;
760 }
761
762 #if defined( __APPLE__ )
763 struct timeval startTime;
764 gettimeofday( &startTime, NULL );
765 #endif
766
767 error = ParseArgs( argc, argv );
768 if( error )
769 return error;
770
771 // This takes a while, so prevent the machine from going to sleep.
772 PreventSleep();
773 atexit( ResumeSleep );
774
775 if( gSkipCorrectnessTesting )
776 vlog( "*** Skipping correctness testing! ***\n\n" );
777 else if( gStopOnError )
778 vlog( "Stopping at first error.\n" );
779
780 if( gMeasureTimes )
781 {
782 vlog( "%s times are reported at right (cycles per element):\n", method[gReportAverageTimes] );
783 vlog( "\n" );
784 if( gSkipCorrectnessTesting )
785 vlog( " \t ");
786 else
787 vlog( " \t ");
788 if( gWimpyMode )
789 vlog( " " );
790 for( int i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
791 vlog( "\t float%s", sizeNames[i] );
792 }
793 else
794 {
795 vlog( " \t ");
796 if( gWimpyMode )
797 vlog( " " );
798 }
799 if( ! gSkipCorrectnessTesting )
800 vlog( "\t max_ulps" );
801
802 vlog( "\n-----------------------------------------------------------------------------------------------------------\n" );
803
804 gMTdata = init_genrand( gRandomSeed );
805 if( gEndTestNumber == 0 )
806 {
807 gEndTestNumber = functionListCount;
808 }
809
810 FPU_mode_type oldMode;
811 DisableFTZ( &oldMode );
812
813 int ret = runTestHarnessWithCheck( gTestNameCount, gTestNames, test_num, test_list, true, 0, InitCL );
814
815 RestoreFPState( &oldMode );
816
817 free_mtdata(gMTdata);
818 free(gTestNames);
819
820 int error_code = clFinish(gQueue);
821 if (error_code)
822 vlog_error("clFinish failed:%d\n", error_code);
823
824 ReleaseCL();
825
826 #if defined( __APPLE__ )
827 struct timeval endTime;
828 gettimeofday( &endTime, NULL );
829 double time = (double) endTime.tv_sec - (double) startTime.tv_sec;
830 time += 1e-6 * ((double) endTime.tv_usec - (double) startTime.tv_usec);
831 vlog( "time: %f s\n", time );
832 #endif
833
834 return ret;
835 }
836
ParseArgs(int argc,const char ** argv)837 static int ParseArgs( int argc, const char **argv )
838 {
839 int i;
840 gTestNames = (const char**) calloc( argc - 1, sizeof( char*) );
841 if( NULL == gTestNames )
842 {
843 vlog( "Failed to allocate memory for gTestNames array.\n" );
844 return 1;
845 }
846 gTestNames[0] = argv[0];
847 gTestNameCount = 1;
848 int singleThreaded = 0;
849
850 { // Extract the app name
851 strncpy( appName, argv[0], MAXPATHLEN );
852
853 #if defined( __APPLE__ )
854 char baseName[MAXPATHLEN];
855 char *base = NULL;
856 strncpy( baseName, argv[0], MAXPATHLEN );
857 base = basename( baseName );
858 if( NULL != base )
859 {
860 strncpy( appName, base, sizeof( appName ) );
861 appName[ sizeof( appName ) -1 ] = '\0';
862 }
863 #endif
864 }
865
866 vlog( "\n%s\t", appName );
867 for( i = 1; i < argc; i++ )
868 {
869 const char *arg = argv[i];
870 if( NULL == arg )
871 break;
872
873 vlog( "\t%s", arg );
874 int optionFound = 0;
875 if( arg[0] == '-' )
876 {
877 while( arg[1] != '\0' )
878 {
879 arg++;
880 optionFound = 1;
881 switch( *arg )
882 {
883 case 'a':
884 gReportAverageTimes ^= 1;
885 break;
886
887 case 'c':
888 gToggleCorrectlyRoundedDivideSqrt ^= 1;
889 break;
890
891 case 'd':
892 gHasDouble ^= 1;
893 break;
894
895 case 'e':
896 gFastRelaxedDerived ^= 1;
897 break;
898
899 case 'f':
900 gTestFloat ^= 1;
901 break;
902
903 case 'h':
904 PrintUsage();
905 return -1;
906
907 case 'p':
908 PrintFunctions();
909 return -1;
910
911 case 'l':
912 gSkipCorrectnessTesting ^= 1;
913 break;
914
915 case 'm':
916 singleThreaded ^= 1;
917 break;
918
919 case 'r':
920 gTestFastRelaxed ^= 1;
921 break;
922
923 case 's':
924 gStopOnError ^= 1;
925 break;
926
927 case 't':
928 gMeasureTimes ^= 1;
929 break;
930
931 case 'v':
932 gVerboseBruteForce ^= 1;
933 break;
934
935 case 'w': // wimpy mode
936 gWimpyMode ^= 1;
937 break;
938
939 case '[':
940 parseWimpyReductionFactor(arg, gWimpyReductionFactor);
941 break;
942
943 case 'z':
944 gForceFTZ ^= 1;
945 break;
946
947 case '1':
948 if( arg[1] == '6' )
949 {
950 gMinVectorSizeIndex = 5;
951 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
952 arg++;
953 }
954 else
955 {
956 gMinVectorSizeIndex = 0;
957 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
958 }
959 break;
960 case '2':
961 gMinVectorSizeIndex = 1;
962 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
963 break;
964 case '3':
965 gMinVectorSizeIndex = 2;
966 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
967 break;
968 case '4':
969 gMinVectorSizeIndex = 3;
970 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
971 break;
972 case '8':
973 gMinVectorSizeIndex = 4;
974 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
975 break;
976 break;
977
978 default:
979 vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
980 PrintUsage();
981 return -1;
982 }
983 }
984 }
985
986 if( ! optionFound )
987 {
988 char *t = NULL;
989 long number = strtol( arg, &t, 0 );
990 if( t != arg )
991 {
992 if( 0 == gStartTestNumber )
993 gStartTestNumber = (int32_t) number;
994 else
995 gEndTestNumber = gStartTestNumber + (int32_t) number;
996 }
997 else
998 {
999 // Make sure this is a valid name
1000 unsigned int k;
1001 for (k=0; k<functionListCount; k++)
1002 {
1003 const Func *f = functionList+k;
1004 if (strcmp(arg, f->name) == 0)
1005 {
1006 gTestNames[ gTestNameCount ] = arg;
1007 gTestNameCount++;
1008 break;
1009 }
1010 }
1011 // If we didn't find it in the list of test names
1012 if (k >= functionListCount)
1013 {
1014 gTestNames[gTestNameCount] = arg;
1015 gTestNameCount++;
1016 }
1017 }
1018 }
1019 }
1020
1021 // Check for the wimpy mode environment variable
1022 if (getenv("CL_WIMPY_MODE")) {
1023 vlog( "\n" );
1024 vlog( "*** Detected CL_WIMPY_MODE env ***\n" );
1025 gWimpyMode = 1;
1026 }
1027
1028 vlog( "\nTest binary built %s %s\n", __DATE__, __TIME__ );
1029
1030 PrintArch();
1031
1032 if( gWimpyMode )
1033 {
1034 vlog( "\n" );
1035 vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
1036 vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
1037 vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor );
1038 }
1039
1040 if( singleThreaded )
1041 SetThreadCount(1);
1042
1043 return 0;
1044 }
1045
1046
PrintFunctions(void)1047 static void PrintFunctions ( void )
1048 {
1049 vlog( "\nMath function names:\n" );
1050 for( int i = 0; i < functionListCount; i++ )
1051 {
1052 vlog( "\t%s\n", functionList[ i ].name );
1053 }
1054 }
1055
PrintUsage(void)1056 static void PrintUsage( void )
1057 {
1058 vlog( "%s [-acglstz]: <optional: math function names>\n", appName );
1059 vlog( "\toptions:\n" );
1060 vlog( "\t\t-a\tReport average times instead of best times\n" );
1061 vlog( "\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: off)\n");
1062 vlog( "\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 on)\n" );
1063 vlog( "\t\t-f\tToggle float precision testing. (Default: on)\n" );
1064 vlog( "\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n" );
1065 vlog( "\t\t-e\tToggle test as derived implementations for fast relaxed math precision. (Default: on)\n" );
1066 vlog( "\t\t-h\tPrint this message and quit\n" );
1067 vlog( "\t\t-p\tPrint all math function names and quit\n" );
1068 vlog( "\t\t-l\tlink check only (make sure functions are present, skip accuracy checks.)\n" );
1069 vlog( "\t\t-m\tToggle run multi-threaded. (Default: on) )\n" );
1070 vlog( "\t\t-s\tStop on error\n" );
1071 vlog( "\t\t-t\tToggle timing (on by default)\n" );
1072 vlog( "\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
1073 vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-10, default factor(%u)\n",gWimpyReductionFactor );
1074 vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
1075 vlog( "\t\t-v\tToggle Verbosity (Default: off)\n ");
1076 vlog( "\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, \"-16\" tests 16-wide vectors only.\n" );
1077 vlog( "\n\tYou may also pass a number instead of a function name.\n" );
1078 vlog( "\tThis causes the first N tests to be skipped. The tests are numbered.\n" );
1079 vlog( "\tIf you pass a second number, that is the number tests to run after the first one.\n" );
1080 vlog( "\tA name list may be used in conjunction with a number range. In that case,\n" );
1081 vlog( "\tonly the named cases in the number range will run.\n" );
1082 vlog( "\tYou may also choose to pass no arguments, in which case all tests will be run.\n" );
1083 vlog( "\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the device.\n" );
1084 vlog( "\n" );
1085 }
1086
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)1087 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
1088 {
1089 vlog( "%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data );
1090 }
1091
InitCL(cl_device_id device)1092 test_status InitCL( cl_device_id device )
1093 {
1094 int error;
1095 uint32_t i;
1096 size_t configSize = sizeof( gComputeDevices );
1097 cl_device_type device_type;
1098
1099 error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL );
1100 if( error )
1101 {
1102 print_error( error, "Unable to get device type" );
1103 return TEST_FAIL;
1104 }
1105
1106 gDevice = device;
1107
1108
1109 if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
1110 gComputeDevices = 1;
1111
1112 // Check extensions
1113 if(is_extension_available(gDevice, "cl_khr_fp64"))
1114 {
1115 gHasDouble ^= 1;
1116 #if defined( CL_DEVICE_DOUBLE_FP_CONFIG )
1117 if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(gDoubleCapabilities), &gDoubleCapabilities, NULL)))
1118 {
1119 vlog_error( "ERROR: Unable to get device CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", error );
1120 return TEST_FAIL;
1121 }
1122
1123 if( DOUBLE_REQUIRED_FEATURES != (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES) )
1124 {
1125 std::string list;
1126 if (0 == (gDoubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
1127 if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST) )
1128 list += "CL_FP_ROUND_TO_NEAREST, ";
1129 if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO) )
1130 list += "CL_FP_ROUND_TO_ZERO, ";
1131 if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF) )
1132 list += "CL_FP_ROUND_TO_INF, ";
1133 if( 0 == (gDoubleCapabilities & CL_FP_INF_NAN) )
1134 list += "CL_FP_INF_NAN, ";
1135 if( 0 == (gDoubleCapabilities & CL_FP_DENORM) )
1136 list += "CL_FP_DENORM, ";
1137 vlog_error("ERROR: required double features are missing: %s\n",
1138 list.c_str());
1139
1140 return TEST_FAIL;
1141 }
1142 #else
1143 vlog_error( "FAIL: device says it supports cl_khr_fp64 but CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n" );
1144 return TEST_FAIL;
1145 #endif
1146 }
1147
1148 configSize = sizeof( gDeviceFrequency );
1149 if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
1150 gDeviceFrequency = 0;
1151
1152 if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL)))
1153 {
1154 vlog_error( "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", error );
1155 return TEST_FAIL;
1156 }
1157
1158 gContext = clCreateContext( NULL, 1, &gDevice, bruteforce_notify_callback, NULL, &error );
1159 if( NULL == gContext || error )
1160 {
1161 vlog_error( "clCreateContext failed. (%d) \n", error );
1162 return TEST_FAIL;
1163 }
1164
1165 gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
1166 if( NULL == gQueue || error )
1167 {
1168 vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
1169 return TEST_FAIL;
1170 }
1171
1172 #if defined( __APPLE__ )
1173 // FIXME: use clProtectedArray
1174 #endif
1175 //Allocate buffers
1176 cl_uint min_alignment = 0;
1177 error = clGetDeviceInfo (gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), (void*)&min_alignment, NULL);
1178 if (CL_SUCCESS != error)
1179 {
1180 vlog_error( "clGetDeviceInfo failed. (%d)\n", error );
1181 return TEST_FAIL;
1182 }
1183 min_alignment >>= 3; // convert bits to bytes
1184
1185 gIn = align_malloc( BUFFER_SIZE, min_alignment );
1186 if( NULL == gIn )
1187 return TEST_FAIL;
1188 gIn2 = align_malloc( BUFFER_SIZE, min_alignment );
1189 if( NULL == gIn2 )
1190 return TEST_FAIL;
1191 gIn3 = align_malloc( BUFFER_SIZE, min_alignment );
1192 if( NULL == gIn3 )
1193 return TEST_FAIL;
1194 gOut_Ref = align_malloc( BUFFER_SIZE, min_alignment );
1195 if( NULL == gOut_Ref )
1196 return TEST_FAIL;
1197 gOut_Ref2 = align_malloc( BUFFER_SIZE, min_alignment );
1198 if( NULL == gOut_Ref2 )
1199 return TEST_FAIL;
1200
1201 for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1202 {
1203 gOut[i] = align_malloc( BUFFER_SIZE, min_alignment );
1204 if( NULL == gOut[i] )
1205 return TEST_FAIL;
1206 gOut2[i] = align_malloc( BUFFER_SIZE, min_alignment );
1207 if( NULL == gOut2[i] )
1208 return TEST_FAIL;
1209 }
1210
1211 cl_mem_flags device_flags = CL_MEM_READ_ONLY;
1212 // save a copy on the host device to make this go faster
1213 if( CL_DEVICE_TYPE_CPU == device_type )
1214 device_flags |= CL_MEM_USE_HOST_PTR;
1215 else
1216 device_flags |= CL_MEM_COPY_HOST_PTR;
1217
1218 // setup input buffers
1219 gInBuffer = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
1220 if( gInBuffer == NULL || error )
1221 {
1222 vlog_error( "clCreateBuffer1 failed for input (%d)\n", error );
1223 return TEST_FAIL;
1224 }
1225
1226 gInBuffer2 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn2, &error );
1227 if( gInBuffer2 == NULL || error )
1228 {
1229 vlog_error( "clCreateArray2 failed for input (%d)\n" , error );
1230 return TEST_FAIL;
1231 }
1232
1233 gInBuffer3 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn3, &error );
1234 if( gInBuffer3 == NULL || error)
1235 {
1236 vlog_error( "clCreateArray3 failed for input (%d)\n", error );
1237 return TEST_FAIL;
1238 }
1239
1240
1241 // setup output buffers
1242 device_flags = CL_MEM_READ_WRITE;
1243 // save a copy on the host device to make this go faster
1244 if( CL_DEVICE_TYPE_CPU == device_type )
1245 device_flags |= CL_MEM_USE_HOST_PTR;
1246 else
1247 device_flags |= CL_MEM_COPY_HOST_PTR;
1248 for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1249 {
1250 gOutBuffer[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut[i], &error );
1251 if( gOutBuffer[i] == NULL || error )
1252 {
1253 vlog_error( "clCreateArray failed for output (%d)\n", error );
1254 return TEST_FAIL;
1255 }
1256 gOutBuffer2[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut2[i], &error );
1257 if( gOutBuffer2[i] == NULL || error)
1258 {
1259 vlog_error( "clCreateArray2 failed for output (%d)\n", error );
1260 return TEST_FAIL;
1261 }
1262 }
1263
1264 // we are embedded, check current rounding mode
1265 if( gIsEmbedded )
1266 {
1267 gIsInRTZMode = IsInRTZMode();
1268 }
1269
1270 //Check tininess detection
1271 IsTininessDetectedBeforeRounding();
1272
1273 cl_platform_id platform;
1274 int err = clGetPlatformIDs(1, &platform, NULL);
1275 if( err )
1276 {
1277 print_error(err, "clGetPlatformIDs failed");
1278 return TEST_FAIL;
1279 }
1280
1281 char c[1024];
1282 static const char *no_yes[] = { "NO", "YES" };
1283 vlog( "\nCompute Device info:\n" );
1284 clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
1285 vlog( "\tPlatform Version: %s\n", c );
1286 clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
1287 vlog( "\tDevice Name: %s\n", c );
1288 clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
1289 vlog( "\tVendor: %s\n", c );
1290 clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
1291 vlog( "\tDevice Version: %s\n", c );
1292 clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
1293 vlog( "\tCL C Version: %s\n", c );
1294 clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
1295 vlog( "\tDriver Version: %s\n", c );
1296 vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
1297 vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)] );
1298 vlog( "\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
1299 if( gToggleCorrectlyRoundedDivideSqrt )
1300 {
1301 gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
1302 }
1303 vlog( "\tTesting with correctly rounded float divide and sqrt? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
1304 vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)] );
1305 vlog( "\tTesting single precision? %s\n", no_yes[0 != gTestFloat] );
1306 vlog( "\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed] );
1307 if(gTestFastRelaxed)
1308 {
1309 vlog( "\tFast relaxed math has derived implementations? %s\n", no_yes[0 != gFastRelaxedDerived] );
1310 }
1311 vlog( "\tTesting double precision? %s\n", no_yes[0 != gHasDouble] );
1312 if( sizeof( long double) == sizeof( double ) && gHasDouble )
1313 {
1314 vlog( "\n\t\tWARNING: Host system long double does not have better precision than double!\n" );
1315 vlog( "\t\t All double results that do not match the reference result have their reported\n" );
1316 vlog( "\t\t error inflated by 0.5 ulps to account for the fact that this system\n" );
1317 vlog( "\t\t can not accurately represent the right result to an accuracy closer\n" );
1318 vlog( "\t\t than half an ulp. See comments in Bruteforce_Ulp_Error_Double() for more details.\n\n" );
1319 }
1320
1321 vlog( "\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded] );
1322 if( gIsEmbedded )
1323 vlog( "\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode] );
1324 vlog( "\tTininess is detected before rounding? %s\n", no_yes[0 != gCheckTininessBeforeRounding] );
1325 vlog( "\tWorker threads: %d\n", GetThreadCount() );
1326 vlog( "\tTesting vector sizes:" );
1327 for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1328 vlog( "\t%d", sizeValues[i] );
1329
1330 vlog("\n");
1331 vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
1332 vlog( "\n\n" );
1333
1334 // Check to see if we are using single threaded mode on other than a 1.0 device
1335 if (getenv( "CL_TEST_SINGLE_THREADED" )) {
1336
1337 char device_version[1024] = { 0 };
1338 clGetDeviceInfo( gDevice, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
1339
1340 if (strcmp("OpenCL 1.0 ",device_version)) {
1341 vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
1342 }
1343 }
1344
1345 return TEST_PASS;
1346 }
1347
ReleaseCL(void)1348 static void ReleaseCL( void )
1349 {
1350 uint32_t i;
1351 clReleaseMemObject(gInBuffer);
1352 clReleaseMemObject(gInBuffer2);
1353 clReleaseMemObject(gInBuffer3);
1354 for ( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) {
1355 clReleaseMemObject(gOutBuffer[i]);
1356 clReleaseMemObject(gOutBuffer2[i]);
1357 }
1358 clReleaseCommandQueue(gQueue);
1359 clReleaseContext(gContext);
1360
1361 align_free(gIn);
1362 align_free(gIn2);
1363 align_free(gIn3);
1364 align_free(gOut_Ref);
1365 align_free(gOut_Ref2);
1366
1367 for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
1368 {
1369 align_free(gOut[i]);
1370 align_free(gOut2[i]);
1371 }
1372 }
1373
_LogBuildError(cl_program p,int line,const char * file)1374 void _LogBuildError( cl_program p, int line, const char *file )
1375 {
1376 char the_log[2048] = "";
1377
1378 vlog_error( "%s:%d: Build Log:\n", file, line );
1379 if( 0 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(the_log), the_log, NULL) )
1380 vlog_error( "%s", the_log );
1381 else
1382 vlog_error( "*** Error getting build log for program %p\n", p );
1383 }
1384
InitILogbConstants(void)1385 int InitILogbConstants( void )
1386 {
1387 int error;
1388 const char *kernel =
1389 "__kernel void GetILogBConstants( __global int *out )\n"
1390 "{\n"
1391 " out[0] = FP_ILOGB0;\n"
1392 " out[1] = FP_ILOGBNAN;\n"
1393 "}\n";
1394
1395 cl_program query;
1396 error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
1397 if (NULL == query || error)
1398 {
1399 vlog_error( "Error: Unable to create program to get FP_ILOGB0 and FP_ILOGBNAN for the device. (%d)", error );
1400 return error;
1401 }
1402
1403 cl_kernel k = clCreateKernel( query, "GetILogBConstants", &error );
1404 if( NULL == k || error)
1405 {
1406 vlog_error( "Error: Unable to create kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
1407 return error;
1408 }
1409
1410 if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
1411 {
1412 vlog_error( "Error: Unable to set kernel arg to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
1413 return error;
1414 }
1415
1416 size_t dim = 1;
1417 if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
1418 {
1419 vlog_error( "Error: Unable to execute kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
1420 return error;
1421 }
1422
1423 struct{ cl_int ilogb0, ilogbnan; }data;
1424 if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
1425 {
1426 vlog_error( "Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the device. Err = %d", error );
1427 return error;
1428 }
1429
1430 gDeviceILogb0 = data.ilogb0;
1431 gDeviceILogbNaN = data.ilogbnan;
1432
1433 clReleaseKernel(k);
1434 clReleaseProgram(query);
1435
1436 return 0;
1437 }
1438
IsTininessDetectedBeforeRounding(void)1439 int IsTininessDetectedBeforeRounding( void )
1440 {
1441 int error;
1442 const char *kernel =
1443 "__kernel void IsTininessDetectedBeforeRounding( __global float *out )\n"
1444 "{\n"
1445 " volatile float a = 0x1.000002p-126f;\n"
1446 " volatile float b = 0x1.fffffcp-1f;\n" // product is 0x1.fffffffffff8p-127
1447 " out[0] = a * b;\n"
1448 "}\n";
1449
1450 cl_program query;
1451 error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
1452 if (error != CL_SUCCESS) {
1453 vlog_error( "Error: Unable to create program to detect how tininess is detected for the device. (%d)", error );
1454 return error;
1455 }
1456
1457 cl_kernel k = clCreateKernel( query, "IsTininessDetectedBeforeRounding", &error );
1458 if( NULL == k || error)
1459 {
1460 vlog_error( "Error: Unable to create kernel to detect how tininess is detected for the device. Err = %d", error );
1461 return error;
1462 }
1463
1464 if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
1465 {
1466 vlog_error( "Error: Unable to set kernel arg to detect how tininess is detected for the device. Err = %d", error );
1467 return error;
1468 }
1469
1470 size_t dim = 1;
1471 if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
1472 {
1473 vlog_error( "Error: Unable to execute kernel to detect how tininess is detected for the device. Err = %d", error );
1474 return error;
1475 }
1476
1477 struct{ cl_uint f; }data;
1478 if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
1479 {
1480 vlog_error( "Error: unable to read result from tininess test from the device. Err = %d", error );
1481 return error;
1482 }
1483
1484 gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1485
1486 clReleaseKernel(k);
1487 clReleaseProgram(query);
1488
1489 return 0;
1490 }
1491
1492
MakeKernel(const char ** c,cl_uint count,const char * name,cl_kernel * k,cl_program * p)1493 int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p )
1494 {
1495 int error = 0;
1496 char options[200] = "";
1497
1498 if( gForceFTZ )
1499 {
1500 strcat(options," -cl-denorms-are-zero");
1501 }
1502
1503 if( gTestFastRelaxed )
1504 {
1505 strcat(options, " -cl-fast-relaxed-math");
1506 }
1507
1508 error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1509 if (error != CL_SUCCESS)
1510 {
1511 vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
1512 return error;
1513 }
1514
1515 *k = clCreateKernel( *p, name, &error );
1516 if( NULL == *k || error )
1517 {
1518 char buffer[2048] = "";
1519
1520 vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1521 clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
1522 vlog_error("Log: %s\n", buffer);
1523 clReleaseProgram( *p );
1524 return error;
1525 }
1526
1527 return error;
1528 }
1529
MakeKernels(const char ** c,cl_uint count,const char * name,cl_uint kernel_count,cl_kernel * k,cl_program * p)1530 int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p )
1531 {
1532 int error = 0;
1533 cl_uint i;
1534 char options[200] = "";
1535
1536 if (gForceFTZ)
1537 {
1538 strcat(options," -cl-denorms-are-zero ");
1539 }
1540
1541 if( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT )
1542 {
1543 strcat(options," -cl-fp32-correctly-rounded-divide-sqrt ");
1544 }
1545
1546 if( gTestFastRelaxed )
1547 {
1548 strcat(options, " -cl-fast-relaxed-math");
1549 }
1550
1551 error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1552 if ( error != CL_SUCCESS )
1553 {
1554 vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
1555 return error;
1556 }
1557
1558
1559 memset( k, 0, kernel_count * sizeof( *k) );
1560 for( i = 0; i< kernel_count; i++ )
1561 {
1562 k[i] = clCreateKernel( *p, name, &error );
1563 if( NULL == k[i]|| error )
1564 {
1565 char buffer[2048] = "";
1566
1567 vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1568 clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
1569 vlog_error("Log: %s\n", buffer);
1570 clReleaseProgram( *p );
1571 return error;
1572 }
1573 }
1574
1575 return error;
1576 }
1577
1578
IsInRTZMode(void)1579 static int IsInRTZMode( void )
1580 {
1581 int error;
1582 const char *kernel =
1583 "__kernel void GetRoundingMode( __global int *out )\n"
1584 "{\n"
1585 " volatile float a = 0x1.0p23f;\n"
1586 " volatile float b = -0x1.0p23f;\n"
1587 " out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);\n"
1588 "}\n";
1589
1590 cl_program query;
1591 error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
1592 if (error != CL_SUCCESS) {
1593 vlog_error( "Error: Unable to create program to detect RTZ mode for the device. (%d)", error );
1594 return error;
1595 }
1596
1597 cl_kernel k = clCreateKernel( query, "GetRoundingMode", &error );
1598 if( NULL == k || error)
1599 {
1600 vlog_error( "Error: Unable to create kernel to gdetect RTZ mode for the device. Err = %d", error );
1601 return error;
1602 }
1603
1604 if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
1605 {
1606 vlog_error( "Error: Unable to set kernel arg to detect RTZ mode for the device. Err = %d", error );
1607 return error;
1608 }
1609
1610 size_t dim = 1;
1611 if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
1612 {
1613 vlog_error( "Error: Unable to execute kernel to detect RTZ mode for the device. Err = %d", error );
1614 return error;
1615 }
1616
1617 struct{ cl_int isRTZ; }data;
1618 if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
1619 {
1620 vlog_error( "Error: unable to read RTZ mode data from the device. Err = %d", error );
1621 return error;
1622 }
1623
1624 clReleaseKernel(k);
1625 clReleaseProgram(query);
1626
1627 return data.isRTZ;
1628 }
1629
1630 #pragma mark -
1631
1632 const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1633 const int sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1634
1635 // TODO: There is another version of Ulp_Error_Double defined in test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1636 float Bruteforce_Ulp_Error_Double( double test, long double reference )
1637 {
1638 //Check for Non-power-of-two and NaN
1639
1640 // Note: This function presumes that someone has already tested whether the result is correctly,
1641 // rounded before calling this function. That test:
1642 //
1643 // if( (float) reference == test )
1644 // return 0.0f;
1645 //
1646 // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
1647 // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
1648 // results.
1649
1650 // Deal with long double = double
1651 // On most systems long double is a higher precision type than double. They provide either
1652 // a 80-bit or greater floating point type, or they provide a head-tail double double format.
1653 // That is sufficient to represent the accuracy of a floating point result to many more bits
1654 // than double and we can calculate sub-ulp errors. This is the standard system for which this
1655 // test suite is designed.
1656 //
1657 // On some systems double and long double are the same thing. Then we run into a problem,
1658 // because our representation of the infinitely precise result (passed in as reference above)
1659 // can be off by as much as a half double precision ulp itself. In this case, we inflate the
1660 // reported error by half an ulp to take this into account. A more correct and permanent fix
1661 // would be to undertake refactoring the reference code to return results in this format:
1662 //
1663 // typedef struct DoubleReference
1664 // { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
1665 // double correctlyRoundedResult; // as best we can
1666 // double ulps; // plus a fractional amount to account for the difference
1667 // }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
1668 //
1669 // This would provide a useful higher-than-double precision format for everyone that we can use,
1670 // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
1671 // that use a head to tail double double for long double.
1672
1673 int x;
1674 long double testVal = test;
1675
1676 // First, handle special reference values
1677 if (isinf(reference))
1678 {
1679 if (reference == testVal)
1680 return 0.0f;
1681
1682 return INFINITY;
1683 }
1684
1685 if (isnan(reference))
1686 {
1687 if (isnan(testVal))
1688 return 0.0f;
1689
1690 return INFINITY;
1691 }
1692
1693 if ( 0.0L != reference && 0.5L != frexpl(reference, &x) )
1694 { // Non-zero and Non-power of two
1695
1696 // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
1697 // There is something of a performance optimization here.
1698 if( testVal == reference )
1699 return 0.0f;
1700
1701 // The unbiased exponent of the ulp unit place
1702 int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
1703
1704 // Scale the exponent of the error
1705 float result = (float) scalbnl( testVal - reference, ulp_exp );
1706
1707 // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
1708 if( sizeof(long double) == sizeof( double ) )
1709 result += copysignf( 0.5f, result);
1710
1711 return result;
1712 }
1713
1714 // reference is a normal power of two or a zero
1715 // The unbiased exponent of the ulp unit place
1716 int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
1717
1718 // allow correctly rounded results to pass through unmolested. (We might add error to it below.)
1719 // There is something of a performance optimization here too.
1720 if( testVal == reference )
1721 return 0.0f;
1722
1723 // Scale the exponent of the error
1724 float result = (float) scalbnl( testVal - reference, ulp_exp );
1725
1726 // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
1727 if( sizeof(long double) == sizeof( double ) )
1728 result += copysignf( 0.5f, result);
1729
1730 return result;
1731 }
1732
Abs_Error(float test,double reference)1733 float Abs_Error( float test, double reference )
1734 {
1735 if( isnan(test) && isnan(reference) )
1736 return 0.0f;
1737 return fabs((float)(reference-(double)test));
1738 }
1739
1740 /*
1741 #define HALF_MIN_EXP -13
1742 #define HALF_MANT_DIG 11
1743 float Ulp_Error_Half( float test, double reference )
1744 {
1745 union{ double d; uint64_t u; }u; u.d = reference;
1746
1747 // Note: This function presumes that someone has already tested whether the result is correctly,
1748 // rounded before calling this function. That test:
1749 //
1750 // if( (float) reference == test )
1751 // return 0.0f;
1752 //
1753 // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
1754 // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
1755 // results.
1756
1757 double testVal = test;
1758 if( u.u & 0x000fffffffffffffULL )
1759 { // Non-power of two and NaN
1760 if( isnan( reference ) && isnan( test ) )
1761 return 0.0f; // if we are expecting a NaN, any NaN is fine
1762
1763 // The unbiased exponent of the ulp unit place
1764 int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
1765
1766 // Scale the exponent of the error
1767 return (float) scalbn( testVal - reference, ulp_exp );
1768 }
1769
1770 if( isinf( reference ) )
1771 {
1772 if( (double) test == reference )
1773 return 0.0f;
1774
1775 return (float) (testVal - reference );
1776 }
1777
1778 // reference is a normal power of two or a zero
1779 int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
1780
1781 // Scale the exponent of the error
1782 return (float) scalbn( testVal - reference, ulp_exp );
1783 }
1784 */
1785
1786
1787 #if defined( __APPLE__ )
1788 #include <mach/mach_time.h>
1789 #endif
1790
GetTime(void)1791 uint64_t GetTime( void )
1792 {
1793 #if defined( __APPLE__ )
1794 return mach_absolute_time();
1795 #elif defined(_WIN32) && defined(_MSC_VER)
1796 return ReadTime();
1797 #else
1798 //mach_absolute_time is a high precision timer with precision < 1 microsecond.
1799 #warning need accurate clock here. Times are invalid.
1800 return 0;
1801 #endif
1802 }
1803
1804
1805 #if defined(_WIN32) && defined (_MSC_VER)
1806 /* function is defined in "compat.h" */
1807 #else
SubtractTime(uint64_t endTime,uint64_t startTime)1808 double SubtractTime( uint64_t endTime, uint64_t startTime )
1809 {
1810 uint64_t diff = endTime - startTime;
1811 static double conversion = 0.0;
1812
1813 if( 0.0 == conversion )
1814 {
1815 #if defined( __APPLE__ )
1816 mach_timebase_info_data_t info = {0,0};
1817 kern_return_t err = mach_timebase_info( &info );
1818 if( 0 == err )
1819 conversion = 1e-9 * (double) info.numer / (double) info.denom;
1820 #else
1821 // This function consumes output from GetTime() above, and converts the time to secionds.
1822 #warning need accurate ticks to seconds conversion factor here. Times are invalid.
1823 #endif
1824 }
1825
1826 // strictly speaking we should also be subtracting out timer latency here
1827 return conversion * (double) diff;
1828 }
1829 #endif
1830
RoundUpToNextPowerOfTwo(cl_uint x)1831 cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
1832 {
1833 if( 0 == (x & (x-1)))
1834 return x;
1835
1836 while( x & (x-1) )
1837 x &= x-1;
1838
1839 return x+x;
1840 }
1841
1842