• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "function_list.h"
18 #include "sleep.h"
19 #include "utility.h"
20 
21 #include <algorithm>
22 #include <cstdio>
23 #include <cstdlib>
24 #include <ctime>
25 #include <string>
26 #include <vector>
27 
28 #include "harness/errorHelpers.h"
29 #include "harness/kernelHelpers.h"
30 #include "harness/parseParameters.h"
31 #include "harness/typeWrappers.h"
32 
33 #if defined(__APPLE__)
34 #include <sys/sysctl.h>
35 #include <sys/mman.h>
36 #include <libgen.h>
37 #include <sys/time.h>
38 #elif defined(__linux__)
39 #include <unistd.h>
40 #include <sys/syscall.h>
41 #include <linux/sysctl.h>
42 #include <sys/param.h>
43 #endif
44 
45 #if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
46 #include <sys/param.h>
47 #endif
48 
49 #include "harness/testHarness.h"
50 
51 #define kPageSize 4096
52 #define DOUBLE_REQUIRED_FEATURES                                               \
53     (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO                  \
54      | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
55 
56 static std::vector<const char *> gTestNames;
57 static char appName[MAXPATHLEN] = "";
58 cl_device_id gDevice = NULL;
59 cl_context gContext = NULL;
60 cl_command_queue gQueue = NULL;
61 static size_t gStartTestNumber = ~0u;
62 static size_t gEndTestNumber = ~0u;
63 int gSkipCorrectnessTesting = 0;
64 static int gStopOnError = 0;
65 static bool gSkipRestOfTests;
66 int gForceFTZ = 0;
67 int gWimpyMode = 0;
68 static int gHasDouble = 0;
69 static int gTestFloat = 1;
70 // This flag should be 'ON' by default and it can be changed through the command
71 // line arguments.
72 static int gTestFastRelaxed = 1;
73 /*This flag corresponds to defining if the implementation has Derived Fast
74   Relaxed functions. The spec does not specify ULP for derived function.  The
75   derived functions are composed of base functions which are tested for ULP,
76   thus when this flag is enabled, Derived functions will not be tested for ULP,
77   as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
78   device whether it is a derived or non-derived implementation according to
79   OpenCL 2.0 spec then it has to be changed through a command line argument.
80 */
81 int gFastRelaxedDerived = 1;
82 static int gToggleCorrectlyRoundedDivideSqrt = 0;
83 int gDeviceILogb0 = 1;
84 int gDeviceILogbNaN = 1;
85 int gCheckTininessBeforeRounding = 1;
86 int gIsInRTZMode = 0;
87 uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
88 uint32_t gMinVectorSizeIndex = 0;
89 void *gIn = NULL;
90 void *gIn2 = NULL;
91 void *gIn3 = NULL;
92 void *gOut_Ref = NULL;
93 void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
94 void *gOut_Ref2 = NULL;
95 void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
96 cl_mem gInBuffer = NULL;
97 cl_mem gInBuffer2 = NULL;
98 cl_mem gInBuffer3 = NULL;
99 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
100 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
101 static MTdataHolder gMTdata;
102 cl_device_fp_config gFloatCapabilities = 0;
103 int gWimpyReductionFactor = 32;
104 int gVerboseBruteForce = 0;
105 
106 static int ParseArgs(int argc, const char **argv);
107 static void PrintUsage(void);
108 static void PrintFunctions(void);
109 static test_status InitCL(cl_device_id device);
110 static void ReleaseCL(void);
111 static int InitILogbConstants(void);
112 static int IsTininessDetectedBeforeRounding(void);
113 static int
114 IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
115 
doTest(const char * name)116 static int doTest(const char *name)
117 {
118     if (gSkipRestOfTests)
119     {
120         vlog("Skipping function because of an earlier error.\n");
121         return 1;
122     }
123 
124     int error = 0;
125     const Func *func_data = NULL;
126 
127     for (size_t i = 0; i < functionListCount; i++)
128     {
129         const Func *const temp_func = functionList + i;
130         if (strcmp(temp_func->name, name) == 0)
131         {
132             if ((gStartTestNumber != ~0u && i < gStartTestNumber)
133                 || i > gEndTestNumber)
134             {
135                 vlog("Skipping function #%zu\n", i);
136                 return 0;
137             }
138 
139             func_data = temp_func;
140             break;
141         }
142     }
143 
144     if (func_data == NULL)
145     {
146         vlog("Function '%s' doesn't exist!\n", name);
147         exit(EXIT_FAILURE);
148     }
149 
150     if (func_data->func.p == NULL)
151     {
152         vlog("'%s' is missing implementation, skipping function.\n",
153              func_data->name);
154         return 0;
155     }
156 
157     // if correctly rounded divide & sqrt are supported by the implementation
158     // then test it; otherwise skip the test
159     if (strcmp(func_data->name, "sqrt_cr") == 0
160         || strcmp(func_data->name, "divide_cr") == 0)
161     {
162         if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
163         {
164             vlog("Correctly rounded divide and sqrt are not supported, "
165                  "skipping function.\n");
166             return 0;
167         }
168     }
169 
170     {
171         if (0 == strcmp("ilogb", func_data->name))
172         {
173             InitILogbConstants();
174         }
175 
176         if (gTestFastRelaxed && func_data->relaxed)
177         {
178             if (get_device_cl_version(gDevice) > Version(1, 2))
179             {
180                 gTestCount++;
181                 vlog("%3d: ", gTestCount);
182                 // Test with relaxed requirements here.
183                 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
184                                                   true /* relaxed mode */))
185                 {
186                     gFailCount++;
187                     error++;
188                     if (gStopOnError)
189                     {
190                         gSkipRestOfTests = true;
191                         return error;
192                     }
193                 }
194             }
195             else
196             {
197                 vlog("Skipping reduced precision testing for device with "
198                      "version 1.2 or less\n");
199             }
200         }
201 
202         if (gTestFloat)
203         {
204             gTestCount++;
205             vlog("%3d: ", gTestCount);
206             // Don't test with relaxed requirements.
207             if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
208                                               false /* relaxed mode */))
209             {
210                 gFailCount++;
211                 error++;
212                 if (gStopOnError)
213                 {
214                     gSkipRestOfTests = true;
215                     return error;
216                 }
217             }
218         }
219 
220         if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
221             && NULL != func_data->dfunc.p)
222         {
223             gTestCount++;
224             vlog("%3d: ", gTestCount);
225             // Don't test with relaxed requirements.
226             if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
227                                                     false /* relaxed mode*/))
228             {
229                 gFailCount++;
230                 error++;
231                 if (gStopOnError)
232                 {
233                     gSkipRestOfTests = true;
234                     return error;
235                 }
236             }
237         }
238     }
239 
240     return error;
241 }
242 
243 
244 #define TEST_LAMBDA(name)                                                      \
245     [](cl_device_id, cl_context, cl_command_queue, int) {                      \
246         return doTest(#name);                                                  \
247     }
248 
249 // Redefine ADD_TEST to use TEST_LAMBDA.
250 #undef ADD_TEST
251 #define ADD_TEST(name)                                                         \
252     {                                                                          \
253         TEST_LAMBDA(name), #name, Version(1, 0)                                \
254     }
255 
256 static test_definition test_list[] = {
257     ADD_TEST(acos),          ADD_TEST(acosh),      ADD_TEST(acospi),
258     ADD_TEST(asin),          ADD_TEST(asinh),      ADD_TEST(asinpi),
259     ADD_TEST(atan),          ADD_TEST(atanh),      ADD_TEST(atanpi),
260     ADD_TEST(atan2),         ADD_TEST(atan2pi),    ADD_TEST(cbrt),
261     ADD_TEST(ceil),          ADD_TEST(copysign),   ADD_TEST(cos),
262     ADD_TEST(cosh),          ADD_TEST(cospi),      ADD_TEST(exp),
263     ADD_TEST(exp2),          ADD_TEST(exp10),      ADD_TEST(expm1),
264     ADD_TEST(fabs),          ADD_TEST(fdim),       ADD_TEST(floor),
265     ADD_TEST(fma),           ADD_TEST(fmax),       ADD_TEST(fmin),
266     ADD_TEST(fmod),          ADD_TEST(fract),      ADD_TEST(frexp),
267     ADD_TEST(hypot),         ADD_TEST(ilogb),      ADD_TEST(isequal),
268     ADD_TEST(isfinite),      ADD_TEST(isgreater),  ADD_TEST(isgreaterequal),
269     ADD_TEST(isinf),         ADD_TEST(isless),     ADD_TEST(islessequal),
270     ADD_TEST(islessgreater), ADD_TEST(isnan),      ADD_TEST(isnormal),
271     ADD_TEST(isnotequal),    ADD_TEST(isordered),  ADD_TEST(isunordered),
272     ADD_TEST(ldexp),         ADD_TEST(lgamma),     ADD_TEST(lgamma_r),
273     ADD_TEST(log),           ADD_TEST(log2),       ADD_TEST(log10),
274     ADD_TEST(log1p),         ADD_TEST(logb),       ADD_TEST(mad),
275     ADD_TEST(maxmag),        ADD_TEST(minmag),     ADD_TEST(modf),
276     ADD_TEST(nan),           ADD_TEST(nextafter),  ADD_TEST(pow),
277     ADD_TEST(pown),          ADD_TEST(powr),       ADD_TEST(remainder),
278     ADD_TEST(remquo),        ADD_TEST(rint),       ADD_TEST(rootn),
279     ADD_TEST(round),         ADD_TEST(rsqrt),      ADD_TEST(signbit),
280     ADD_TEST(sin),           ADD_TEST(sincos),     ADD_TEST(sinh),
281     ADD_TEST(sinpi),         ADD_TEST(sqrt),       ADD_TEST(sqrt_cr),
282     ADD_TEST(tan),           ADD_TEST(tanh),       ADD_TEST(tanpi),
283     ADD_TEST(trunc),         ADD_TEST(half_cos),   ADD_TEST(half_divide),
284     ADD_TEST(half_exp),      ADD_TEST(half_exp2),  ADD_TEST(half_exp10),
285     ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
286     ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
287     ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
288     ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
289     ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
290     ADD_TEST(not),
291 };
292 
293 #undef ADD_TEST
294 #undef TEST_LAMBDA
295 
296 static const int test_num = ARRAY_SIZE(test_list);
297 
298 #pragma mark -
299 
main(int argc,const char * argv[])300 int main(int argc, const char *argv[])
301 {
302     int error;
303 
304     argc = parseCustomParam(argc, argv);
305     if (argc == -1)
306     {
307         return -1;
308     }
309 
310     error = ParseArgs(argc, argv);
311     if (error) return error;
312 
313     // This takes a while, so prevent the machine from going to sleep.
314     PreventSleep();
315     atexit(ResumeSleep);
316 
317     if (gSkipCorrectnessTesting)
318         vlog("*** Skipping correctness testing! ***\n\n");
319     else if (gStopOnError)
320         vlog("Stopping at first error.\n");
321 
322     vlog("   \t                                        ");
323     if (gWimpyMode) vlog("   ");
324     if (!gSkipCorrectnessTesting) vlog("\t  max_ulps");
325 
326     vlog("\n-------------------------------------------------------------------"
327          "----------------------------------------\n");
328 
329     gMTdata = MTdataHolder(gRandomSeed);
330 
331     FPU_mode_type oldMode;
332     DisableFTZ(&oldMode);
333 
334     int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(),
335                                       test_num, test_list, true, 0, InitCL);
336 
337     RestoreFPState(&oldMode);
338 
339     if (gQueue)
340     {
341         int error_code = clFinish(gQueue);
342         if (error_code) vlog_error("clFinish failed:%d\n", error_code);
343     }
344 
345     ReleaseCL();
346 
347     return ret;
348 }
349 
ParseArgs(int argc,const char ** argv)350 static int ParseArgs(int argc, const char **argv)
351 {
352     // We only pass test names to runTestHarnessWithCheck, hence global command
353     // line options defined by the harness cannot be used by the user.
354     // To respect the implementation details of runTestHarnessWithCheck,
355     // gTestNames[0] has to exist although its value is not important.
356     gTestNames.push_back("");
357 
358     int singleThreaded = 0;
359 
360     { // Extract the app name
361         strncpy(appName, argv[0], MAXPATHLEN - 1);
362         appName[MAXPATHLEN - 1] = '\0';
363 
364 #if defined(__APPLE__)
365         char baseName[MAXPATHLEN];
366         char *base = NULL;
367         strncpy(baseName, argv[0], MAXPATHLEN - 1);
368         baseName[MAXPATHLEN - 1] = '\0';
369         base = basename(baseName);
370         if (NULL != base)
371         {
372             strncpy(appName, base, sizeof(appName) - 1);
373             appName[sizeof(appName) - 1] = '\0';
374         }
375 #endif
376     }
377 
378     vlog("\n%s\t", appName);
379     for (int i = 1; i < argc; i++)
380     {
381         const char *arg = argv[i];
382         if (NULL == arg) break;
383 
384         vlog("\t%s", arg);
385         int optionFound = 0;
386         if (arg[0] == '-')
387         {
388             while (arg[1] != '\0')
389             {
390                 arg++;
391                 optionFound = 1;
392                 switch (*arg)
393                 {
394                     case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
395 
396                     case 'd': gHasDouble ^= 1; break;
397 
398                     case 'e': gFastRelaxedDerived ^= 1; break;
399 
400                     case 'f': gTestFloat ^= 1; break;
401 
402                     case 'h': PrintUsage(); return -1;
403 
404                     case 'p': PrintFunctions(); return -1;
405 
406                     case 'l': gSkipCorrectnessTesting ^= 1; break;
407 
408                     case 'm': singleThreaded ^= 1; break;
409 
410                     case 'r': gTestFastRelaxed ^= 1; break;
411 
412                     case 's': gStopOnError ^= 1; break;
413 
414                     case 'v': gVerboseBruteForce ^= 1; break;
415 
416                     case 'w': // wimpy mode
417                         gWimpyMode ^= 1;
418                         break;
419 
420                     case '[':
421                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
422                         break;
423 
424                     case 'z': gForceFTZ ^= 1; break;
425 
426                     case '1':
427                         if (arg[1] == '6')
428                         {
429                             gMinVectorSizeIndex = 5;
430                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
431                             arg++;
432                         }
433                         else
434                         {
435                             gMinVectorSizeIndex = 0;
436                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
437                         }
438                         break;
439                     case '2':
440                         gMinVectorSizeIndex = 1;
441                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
442                         break;
443                     case '3':
444                         gMinVectorSizeIndex = 2;
445                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
446                         break;
447                     case '4':
448                         gMinVectorSizeIndex = 3;
449                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
450                         break;
451                     case '8':
452                         gMinVectorSizeIndex = 4;
453                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
454                         break;
455 
456                     default:
457                         vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
458                         PrintUsage();
459                         return -1;
460                 }
461             }
462         }
463 
464         if (!optionFound)
465         {
466             char *t = NULL;
467             long number = strtol(arg, &t, 0);
468             if (t != arg)
469             {
470                 if (~0u == gStartTestNumber)
471                     gStartTestNumber = (int32_t)number;
472                 else
473                     gEndTestNumber = gStartTestNumber + (int32_t)number;
474             }
475             else
476             {
477                 // Make sure this is a valid name
478                 unsigned int k;
479                 for (k = 0; k < functionListCount; k++)
480                 {
481                     const Func *f = functionList + k;
482                     if (strcmp(arg, f->name) == 0)
483                     {
484                         gTestNames.push_back(arg);
485                         break;
486                     }
487                 }
488                 // If we didn't find it in the list of test names
489                 if (k >= functionListCount)
490                 {
491                     gTestNames.push_back(arg);
492                 }
493             }
494         }
495     }
496 
497     // Check for the wimpy mode environment variable
498     if (getenv("CL_WIMPY_MODE"))
499     {
500         vlog("\n");
501         vlog("*** Detected CL_WIMPY_MODE env                          ***\n");
502         gWimpyMode = 1;
503     }
504 
505     PrintArch();
506 
507     if (gWimpyMode)
508     {
509         vlog("\n");
510         vlog("*** WARNING: Testing in Wimpy mode!                     ***\n");
511         vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
512         vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
513              gWimpyReductionFactor);
514     }
515 
516     if (singleThreaded) SetThreadCount(1);
517 
518     return 0;
519 }
520 
521 
PrintFunctions(void)522 static void PrintFunctions(void)
523 {
524     vlog("\nMath function names:\n");
525     for (size_t i = 0; i < functionListCount; i++)
526     {
527         vlog("\t%s\n", functionList[i].name);
528     }
529 }
530 
PrintUsage(void)531 static void PrintUsage(void)
532 {
533     vlog("%s [-cglsz]: <optional: math function names>\n", appName);
534     vlog("\toptions:\n");
535     vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
536          "off)\n");
537     vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
538          "on)\n");
539     vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
540     vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
541     vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
542          "precision. (Default: on)\n");
543     vlog("\t\t-h\tPrint this message and quit\n");
544     vlog("\t\t-p\tPrint all math function names and quit\n");
545     vlog("\t\t-l\tlink check only (make sure functions are present, skip "
546          "accuracy checks.)\n");
547     vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
548     vlog("\t\t-s\tStop on error\n");
549     vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
550     vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
551          "1-10, default factor(%u)\n",
552          gWimpyReductionFactor);
553     vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
554          "device capabilities by default.)\n");
555     vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
556     vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
557          "\"-16\" tests 16-wide vectors only.\n");
558     vlog("\n\tYou may also pass a number instead of a function name.\n");
559     vlog("\tThis causes the first N tests to be skipped. The tests are "
560          "numbered.\n");
561     vlog("\tIf you pass a second number, that is the number tests to run after "
562          "the first one.\n");
563     vlog("\tA name list may be used in conjunction with a number range. In "
564          "that case,\n");
565     vlog("\tonly the named cases in the number range will run.\n");
566     vlog("\tYou may also choose to pass no arguments, in which case all tests "
567          "will be run.\n");
568     vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
569          "device.\n");
570     vlog("\n");
571 }
572 
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)573 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
574                                                    const void *private_info,
575                                                    size_t cb, void *user_data)
576 {
577     vlog("%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
578 }
579 
InitCL(cl_device_id device)580 test_status InitCL(cl_device_id device)
581 {
582     int error;
583     uint32_t i;
584     cl_device_type device_type;
585 
586     error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
587                             &device_type, NULL);
588     if (error)
589     {
590         print_error(error, "Unable to get device type");
591         return TEST_FAIL;
592     }
593 
594     gDevice = device;
595 
596     // Check extensions
597     if (is_extension_available(gDevice, "cl_khr_fp64"))
598     {
599         gHasDouble ^= 1;
600 #if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
601         cl_device_fp_config doubleCapabilities = 0;
602         if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
603                                      sizeof(doubleCapabilities),
604                                      &doubleCapabilities, NULL)))
605         {
606             vlog_error("ERROR: Unable to get device "
607                        "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
608                        error);
609             return TEST_FAIL;
610         }
611 
612         if (DOUBLE_REQUIRED_FEATURES
613             != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
614         {
615             std::string list;
616             if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
617             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
618                 list += "CL_FP_ROUND_TO_NEAREST, ";
619             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
620                 list += "CL_FP_ROUND_TO_ZERO, ";
621             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
622                 list += "CL_FP_ROUND_TO_INF, ";
623             if (0 == (doubleCapabilities & CL_FP_INF_NAN))
624                 list += "CL_FP_INF_NAN, ";
625             if (0 == (doubleCapabilities & CL_FP_DENORM))
626                 list += "CL_FP_DENORM, ";
627             vlog_error("ERROR: required double features are missing: %s\n",
628                        list.c_str());
629 
630             return TEST_FAIL;
631         }
632 #else
633         vlog_error("FAIL: device says it supports cl_khr_fp64 but "
634                    "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
635         return TEST_FAIL;
636 #endif
637     }
638 
639     uint32_t deviceFrequency = 0;
640     size_t configSize = sizeof(deviceFrequency);
641     if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
642                                  configSize, &deviceFrequency, NULL)))
643         deviceFrequency = 0;
644 
645     if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
646                                  sizeof(gFloatCapabilities),
647                                  &gFloatCapabilities, NULL)))
648     {
649         vlog_error(
650             "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
651             error);
652         return TEST_FAIL;
653     }
654 
655     gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
656                                NULL, &error);
657     if (NULL == gContext || error)
658     {
659         vlog_error("clCreateContext failed. (%d) \n", error);
660         return TEST_FAIL;
661     }
662 
663     gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
664     if (NULL == gQueue || error)
665     {
666         vlog_error("clCreateCommandQueue failed. (%d)\n", error);
667         return TEST_FAIL;
668     }
669 
670     // Allocate buffers
671     cl_uint min_alignment = 0;
672     error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
673                             sizeof(cl_uint), (void *)&min_alignment, NULL);
674     if (CL_SUCCESS != error)
675     {
676         vlog_error("clGetDeviceInfo failed. (%d)\n", error);
677         return TEST_FAIL;
678     }
679     min_alignment >>= 3; // convert bits to bytes
680 
681     gIn = align_malloc(BUFFER_SIZE, min_alignment);
682     if (NULL == gIn) return TEST_FAIL;
683     gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
684     if (NULL == gIn2) return TEST_FAIL;
685     gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
686     if (NULL == gIn3) return TEST_FAIL;
687     gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
688     if (NULL == gOut_Ref) return TEST_FAIL;
689     gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
690     if (NULL == gOut_Ref2) return TEST_FAIL;
691 
692     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
693     {
694         gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
695         if (NULL == gOut[i]) return TEST_FAIL;
696         gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
697         if (NULL == gOut2[i]) return TEST_FAIL;
698     }
699 
700     cl_mem_flags device_flags = CL_MEM_READ_ONLY;
701     // save a copy on the host device to make this go faster
702     if (CL_DEVICE_TYPE_CPU == device_type)
703         device_flags |= CL_MEM_USE_HOST_PTR;
704     else
705         device_flags |= CL_MEM_COPY_HOST_PTR;
706 
707     // setup input buffers
708     gInBuffer =
709         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
710     if (gInBuffer == NULL || error)
711     {
712         vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
713         return TEST_FAIL;
714     }
715 
716     gInBuffer2 =
717         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
718     if (gInBuffer2 == NULL || error)
719     {
720         vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
721         return TEST_FAIL;
722     }
723 
724     gInBuffer3 =
725         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
726     if (gInBuffer3 == NULL || error)
727     {
728         vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
729         return TEST_FAIL;
730     }
731 
732 
733     // setup output buffers
734     device_flags = CL_MEM_READ_WRITE;
735     // save a copy on the host device to make this go faster
736     if (CL_DEVICE_TYPE_CPU == device_type)
737         device_flags |= CL_MEM_USE_HOST_PTR;
738     else
739         device_flags |= CL_MEM_COPY_HOST_PTR;
740     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
741     {
742         gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
743                                        gOut[i], &error);
744         if (gOutBuffer[i] == NULL || error)
745         {
746             vlog_error("clCreateBuffer failed for output (%d)\n", error);
747             return TEST_FAIL;
748         }
749         gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
750                                         gOut2[i], &error);
751         if (gOutBuffer2[i] == NULL || error)
752         {
753             vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
754             return TEST_FAIL;
755         }
756     }
757 
758     // we are embedded, check current rounding mode
759     if (gIsEmbedded)
760     {
761         gIsInRTZMode = IsInRTZMode();
762     }
763 
764     // Check tininess detection
765     IsTininessDetectedBeforeRounding();
766 
767     cl_platform_id platform;
768     int err = clGetPlatformIDs(1, &platform, NULL);
769     if (err)
770     {
771         print_error(err, "clGetPlatformIDs failed");
772         return TEST_FAIL;
773     }
774 
775     char c[1024];
776     static const char *no_yes[] = { "NO", "YES" };
777     vlog("\nCompute Device info:\n");
778     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
779     vlog("\tPlatform Version: %s\n", c);
780     clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
781     vlog("\tDevice Name: %s\n", c);
782     clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
783     vlog("\tVendor: %s\n", c);
784     clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
785     vlog("\tDevice Version: %s\n", c);
786     clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
787     vlog("\tCL C Version: %s\n", c);
788     clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
789     vlog("\tDriver Version: %s\n", c);
790     vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
791     vlog("\tSubnormal values supported for floats? %s\n",
792          no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
793     vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
794          no_yes[0
795                 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
796     if (gToggleCorrectlyRoundedDivideSqrt)
797     {
798         gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
799     }
800     vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
801          no_yes[0
802                 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
803     vlog("\tTesting with FTZ mode ON for floats? %s\n",
804          no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
805     vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
806     vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
807     if (gTestFastRelaxed)
808     {
809         vlog("\tFast relaxed math has derived implementations? %s\n",
810              no_yes[0 != gFastRelaxedDerived]);
811     }
812     vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
813     if (sizeof(long double) == sizeof(double) && gHasDouble)
814     {
815         vlog("\n\t\tWARNING: Host system long double does not have better "
816              "precision than double!\n");
817         vlog("\t\t         All double results that do not match the reference "
818              "result have their reported\n");
819         vlog("\t\t         error inflated by 0.5 ulps to account for the fact "
820              "that this system\n");
821         vlog("\t\t         can not accurately represent the right result to an "
822              "accuracy closer\n");
823         vlog("\t\t         than half an ulp. See comments in "
824              "Bruteforce_Ulp_Error_Double() for more details.\n\n");
825     }
826 
827     vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
828     if (gIsEmbedded)
829         vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
830     vlog("\tTininess is detected before rounding? %s\n",
831          no_yes[0 != gCheckTininessBeforeRounding]);
832     vlog("\tWorker threads: %d\n", GetThreadCount());
833     vlog("\tTesting vector sizes:");
834     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
835         vlog("\t%d", sizeValues[i]);
836 
837     vlog("\n");
838     vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
839     vlog("\n\n");
840 
841     // Check to see if we are using single threaded mode on other than a 1.0
842     // device
843     if (getenv("CL_TEST_SINGLE_THREADED"))
844     {
845 
846         char device_version[1024] = { 0 };
847         clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
848                         device_version, NULL);
849 
850         if (strcmp("OpenCL 1.0 ", device_version))
851         {
852             vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
853                  "Running single threaded.\n");
854         }
855     }
856 
857     return TEST_PASS;
858 }
859 
ReleaseCL(void)860 static void ReleaseCL(void)
861 {
862     uint32_t i;
863     clReleaseMemObject(gInBuffer);
864     clReleaseMemObject(gInBuffer2);
865     clReleaseMemObject(gInBuffer3);
866     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
867     {
868         clReleaseMemObject(gOutBuffer[i]);
869         clReleaseMemObject(gOutBuffer2[i]);
870     }
871     clReleaseCommandQueue(gQueue);
872     clReleaseContext(gContext);
873 
874     align_free(gIn);
875     align_free(gIn2);
876     align_free(gIn3);
877     align_free(gOut_Ref);
878     align_free(gOut_Ref2);
879 
880     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
881     {
882         align_free(gOut[i]);
883         align_free(gOut2[i]);
884     }
885 }
886 
_LogBuildError(cl_program p,int line,const char * file)887 void _LogBuildError(cl_program p, int line, const char *file)
888 {
889     char the_log[2048] = "";
890 
891     vlog_error("%s:%d: Build Log:\n", file, line);
892     if (0
893         == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
894                                  sizeof(the_log), the_log, NULL))
895         vlog_error("%s", the_log);
896     else
897         vlog_error("*** Error getting build log for program %p\n", p);
898 }
899 
InitILogbConstants(void)900 int InitILogbConstants(void)
901 {
902     int error;
903     const char *kernelSource =
904         R"(__kernel void GetILogBConstants( __global int *out )
905         {
906             out[0] = FP_ILOGB0;
907             out[1] = FP_ILOGBNAN;
908         })";
909 
910     clProgramWrapper query;
911     clKernelWrapper kernel;
912     error = create_single_kernel_helper(gContext, &query, &kernel, 1,
913                                         &kernelSource, "GetILogBConstants");
914     if (error != CL_SUCCESS)
915     {
916         vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
917                    "FP_ILOGBNAN for the device. (%d)",
918                    error);
919         return error;
920     }
921 
922     if ((error =
923              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
924                             &gOutBuffer[gMinVectorSizeIndex])))
925     {
926         vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
927                    "FP_ILOGBNAN for the device. Err = %d",
928                    error);
929         return error;
930     }
931 
932     size_t dim = 1;
933     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
934                                         NULL, NULL)))
935     {
936         vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
937                    "FP_ILOGBNAN for the device. Err = %d",
938                    error);
939         return error;
940     }
941 
942     struct
943     {
944         cl_int ilogb0, ilogbnan;
945     } data;
946     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
947                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
948                                      NULL)))
949     {
950         vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
951                    "device. Err = %d",
952                    error);
953         return error;
954     }
955 
956     gDeviceILogb0 = data.ilogb0;
957     gDeviceILogbNaN = data.ilogbnan;
958 
959     return 0;
960 }
961 
IsTininessDetectedBeforeRounding(void)962 int IsTininessDetectedBeforeRounding(void)
963 {
964     int error;
965     const char *kernelSource =
966         R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
967         {
968            volatile float a = 0x1.000002p-126f;
969            volatile float b = 0x1.fffffcp-1f;
970            out[0] = a * b; // product is 0x1.fffffffffff8p-127
971         })";
972 
973     clProgramWrapper query;
974     clKernelWrapper kernel;
975     error =
976         create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
977                                     "IsTininessDetectedBeforeRounding");
978     if (error != CL_SUCCESS)
979     {
980         vlog_error("Error: Unable to create kernel to detect how tininess is "
981                    "detected for the device. (%d)",
982                    error);
983         return error;
984     }
985 
986     if ((error =
987              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
988                             &gOutBuffer[gMinVectorSizeIndex])))
989     {
990         vlog_error("Error: Unable to set kernel arg to detect how tininess is "
991                    "detected  for the device. Err = %d",
992                    error);
993         return error;
994     }
995 
996     size_t dim = 1;
997     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
998                                         NULL, NULL)))
999     {
1000         vlog_error("Error: Unable to execute kernel to detect how tininess is "
1001                    "detected  for the device. Err = %d",
1002                    error);
1003         return error;
1004     }
1005 
1006     struct
1007     {
1008         cl_uint f;
1009     } data;
1010     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1011                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1012                                      NULL)))
1013     {
1014         vlog_error("Error: unable to read result from tininess test from the "
1015                    "device. Err = %d",
1016                    error);
1017         return error;
1018     }
1019 
1020     gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1021 
1022     return 0;
1023 }
1024 
1025 
MakeKernel(const char ** c,cl_uint count,const char * name,cl_kernel * k,cl_program * p,bool relaxedMode)1026 int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
1027                cl_program *p, bool relaxedMode)
1028 {
1029     int error = 0;
1030     char options[200] = "";
1031 
1032     if (gForceFTZ)
1033     {
1034         strcat(options, " -cl-denorms-are-zero");
1035     }
1036 
1037     if (relaxedMode)
1038     {
1039         strcat(options, " -cl-fast-relaxed-math");
1040     }
1041 
1042     error =
1043         create_single_kernel_helper(gContext, p, k, count, c, name, options);
1044     if (error != CL_SUCCESS)
1045     {
1046         vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
1047         return error;
1048     }
1049 
1050     return error;
1051 }
1052 
MakeKernels(const char ** c,cl_uint count,const char * name,cl_uint kernel_count,cl_kernel * k,cl_program * p,bool relaxedMode)1053 int MakeKernels(const char **c, cl_uint count, const char *name,
1054                 cl_uint kernel_count, cl_kernel *k, cl_program *p,
1055                 bool relaxedMode)
1056 {
1057     char options[200] = "";
1058 
1059     if (gForceFTZ)
1060     {
1061         strcat(options, " -cl-denorms-are-zero ");
1062     }
1063 
1064     if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
1065     {
1066         strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
1067     }
1068 
1069     if (relaxedMode)
1070     {
1071         strcat(options, " -cl-fast-relaxed-math");
1072     }
1073 
1074     int error =
1075         create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1076     if (error != CL_SUCCESS)
1077     {
1078         vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
1079         return error;
1080     }
1081 
1082     for (cl_uint i = 0; i < kernel_count; i++)
1083     {
1084         k[i] = clCreateKernel(*p, name, &error);
1085         if (NULL == k[i] || error)
1086         {
1087             char buffer[2048] = "";
1088 
1089             vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1090             clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
1091                                   sizeof(buffer), buffer, NULL);
1092             vlog_error("Log: %s\n", buffer);
1093             return error;
1094         }
1095     }
1096 
1097     return error;
1098 }
1099 
1100 
IsInRTZMode(void)1101 static int IsInRTZMode(void)
1102 {
1103     int error;
1104     const char *kernelSource =
1105         R"(__kernel void GetRoundingMode( __global int *out )
1106         {
1107             volatile float a = 0x1.0p23f;
1108             volatile float b = -0x1.0p23f;
1109             out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
1110         })";
1111 
1112     clProgramWrapper query;
1113     clKernelWrapper kernel;
1114     error = create_single_kernel_helper(gContext, &query, &kernel, 1,
1115                                         &kernelSource, "GetRoundingMode");
1116     if (error != CL_SUCCESS)
1117     {
1118         vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
1119                    "device. (%d)",
1120                    error);
1121         return error;
1122     }
1123 
1124     if ((error =
1125              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
1126                             &gOutBuffer[gMinVectorSizeIndex])))
1127     {
1128         vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
1129                    "device. Err = %d",
1130                    error);
1131         return error;
1132     }
1133 
1134     size_t dim = 1;
1135     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1136                                         NULL, NULL)))
1137     {
1138         vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
1139                    "device. Err = %d",
1140                    error);
1141         return error;
1142     }
1143 
1144     struct
1145     {
1146         cl_int isRTZ;
1147     } data;
1148     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1149                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1150                                      NULL)))
1151     {
1152         vlog_error(
1153             "Error: unable to read RTZ mode data from the device. Err = %d",
1154             error);
1155         return error;
1156     }
1157 
1158     return data.isRTZ;
1159 }
1160 
1161 #pragma mark -
1162 
1163 const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1164 const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1165 
1166 // TODO: There is another version of Ulp_Error_Double defined in
1167 // test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1168 float Bruteforce_Ulp_Error_Double(double test, long double reference)
1169 {
1170     // Check for Non-power-of-two and NaN
1171 
1172     // Note: This function presumes that someone has already tested whether the
1173     // result is correctly, rounded before calling this function.  That test:
1174     //
1175     //    if( (float) reference == test )
1176     //        return 0.0f;
1177     //
1178     // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
1179     // before we get here. Otherwise, we'll return inf ulp error here, for what
1180     // are otherwise correctly rounded results.
1181 
1182     // Deal with long double = double
1183     // On most systems long double is a higher precision type than double. They
1184     // provide either a 80-bit or greater floating point type, or they provide a
1185     // head-tail double double format. That is sufficient to represent the
1186     // accuracy of a floating point result to many more bits than double and we
1187     // can calculate sub-ulp errors. This is the standard system for which this
1188     // test suite is designed.
1189     //
1190     // On some systems double and long double are the same thing. Then we run
1191     // into a problem, because our representation of the infinitely precise
1192     // result (passed in as reference above) can be off by as much as a half
1193     // double precision ulp itself.  In this case, we inflate the reported error
1194     // by half an ulp to take this into account.  A more correct and permanent
1195     // fix would be to undertake refactoring the reference code to return
1196     // results in this format:
1197     //
1198     //    typedef struct DoubleReference
1199     //    { // true value = correctlyRoundedResult + ulps *
1200     //    ulp(correctlyRoundedResult)        (infinitely precise)
1201     //        double  correctlyRoundedResult;     // as best we can
1202     //        double  ulps;                       // plus a fractional amount to
1203     //        account for the difference
1204     //    }DoubleReference;                       //     between infinitely
1205     //    precise result and correctlyRoundedResult, in units of ulps.
1206     //
1207     // This would provide a useful higher-than-double precision format for
1208     // everyone that we can use, and would solve a few problems with
1209     // representing absolute errors below DBL_MIN and over DBL_MAX for systems
1210     // that use a head to tail double double for long double.
1211 
1212     int x;
1213     long double testVal = test;
1214 
1215     // First, handle special reference values
1216     if (isinf(reference))
1217     {
1218         if (reference == testVal) return 0.0f;
1219 
1220         return INFINITY;
1221     }
1222 
1223     if (isnan(reference))
1224     {
1225         if (isnan(testVal)) return 0.0f;
1226 
1227         return INFINITY;
1228     }
1229 
1230     if (0.0L != reference && 0.5L != frexpl(reference, &x))
1231     { // Non-zero and Non-power of two
1232 
1233         // allow correctly rounded results to pass through unmolested. (We might
1234         // add error to it below.) There is something of a performance
1235         // optimization here.
1236         if (testVal == reference) return 0.0f;
1237 
1238         // The unbiased exponent of the ulp unit place
1239         int ulp_exp =
1240             DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
1241 
1242         // Scale the exponent of the error
1243         float result = (float)scalbnl(testVal - reference, ulp_exp);
1244 
1245         // account for rounding error in reference result on systems that do not
1246         // have a higher precision floating point type (see above)
1247         if (sizeof(long double) == sizeof(double))
1248             result += copysignf(0.5f, result);
1249 
1250         return result;
1251     }
1252 
1253     // reference is a normal power of two or a zero
1254     // The unbiased exponent of the ulp unit place
1255     int ulp_exp =
1256         DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
1257 
1258     // allow correctly rounded results to pass through unmolested. (We might add
1259     // error to it below.) There is something of a performance optimization here
1260     // too.
1261     if (testVal == reference) return 0.0f;
1262 
1263     // Scale the exponent of the error
1264     float result = (float)scalbnl(testVal - reference, ulp_exp);
1265 
1266     // account for rounding error in reference result on systems that do not
1267     // have a higher precision floating point type (see above)
1268     if (sizeof(long double) == sizeof(double))
1269         result += copysignf(0.5f, result);
1270 
1271     return result;
1272 }
1273 
Abs_Error(float test,double reference)1274 float Abs_Error(float test, double reference)
1275 {
1276     if (isnan(test) && isnan(reference)) return 0.0f;
1277     return fabs((float)(reference - (double)test));
1278 }
1279 
RoundUpToNextPowerOfTwo(cl_uint x)1280 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
1281 {
1282     if (0 == (x & (x - 1))) return x;
1283 
1284     while (x & (x - 1)) x &= x - 1;
1285 
1286     return x + x;
1287 }
1288