1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "function_list.h"
18 #include "sleep.h"
19 #include "utility.h"
20
21 #include <algorithm>
22 #include <cstdio>
23 #include <cstdlib>
24 #include <ctime>
25 #include <string>
26 #include <vector>
27
28 #include "harness/errorHelpers.h"
29 #include "harness/kernelHelpers.h"
30 #include "harness/parseParameters.h"
31 #include "harness/typeWrappers.h"
32
33 #if defined(__APPLE__)
34 #include <sys/sysctl.h>
35 #include <sys/mman.h>
36 #include <libgen.h>
37 #include <sys/time.h>
38 #elif defined(__linux__)
39 #include <unistd.h>
40 #include <sys/syscall.h>
41 #include <linux/sysctl.h>
42 #include <sys/param.h>
43 #endif
44
45 #if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
46 #include <sys/param.h>
47 #endif
48
49 #include "harness/testHarness.h"
50
51 #define kPageSize 4096
52 #define DOUBLE_REQUIRED_FEATURES \
53 (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \
54 | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
55
56 static std::vector<const char *> gTestNames;
57 static char appName[MAXPATHLEN] = "";
58 cl_device_id gDevice = NULL;
59 cl_context gContext = NULL;
60 cl_command_queue gQueue = NULL;
61 static size_t gStartTestNumber = ~0u;
62 static size_t gEndTestNumber = ~0u;
63 int gSkipCorrectnessTesting = 0;
64 static int gStopOnError = 0;
65 static bool gSkipRestOfTests;
66 int gForceFTZ = 0;
67 int gWimpyMode = 0;
68 static int gHasDouble = 0;
69 static int gTestFloat = 1;
70 // This flag should be 'ON' by default and it can be changed through the command
71 // line arguments.
72 static int gTestFastRelaxed = 1;
73 /*This flag corresponds to defining if the implementation has Derived Fast
74 Relaxed functions. The spec does not specify ULP for derived function. The
75 derived functions are composed of base functions which are tested for ULP,
76 thus when this flag is enabled, Derived functions will not be tested for ULP,
77 as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
78 device whether it is a derived or non-derived implementation according to
79 OpenCL 2.0 spec then it has to be changed through a command line argument.
80 */
81 int gFastRelaxedDerived = 1;
82 static int gToggleCorrectlyRoundedDivideSqrt = 0;
83 int gDeviceILogb0 = 1;
84 int gDeviceILogbNaN = 1;
85 int gCheckTininessBeforeRounding = 1;
86 int gIsInRTZMode = 0;
87 uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
88 uint32_t gMinVectorSizeIndex = 0;
89 void *gIn = NULL;
90 void *gIn2 = NULL;
91 void *gIn3 = NULL;
92 void *gOut_Ref = NULL;
93 void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
94 void *gOut_Ref2 = NULL;
95 void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
96 cl_mem gInBuffer = NULL;
97 cl_mem gInBuffer2 = NULL;
98 cl_mem gInBuffer3 = NULL;
99 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
100 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
101 static MTdataHolder gMTdata;
102 cl_device_fp_config gFloatCapabilities = 0;
103 int gWimpyReductionFactor = 32;
104 int gVerboseBruteForce = 0;
105
106 static int ParseArgs(int argc, const char **argv);
107 static void PrintUsage(void);
108 static void PrintFunctions(void);
109 static test_status InitCL(cl_device_id device);
110 static void ReleaseCL(void);
111 static int InitILogbConstants(void);
112 static int IsTininessDetectedBeforeRounding(void);
113 static int
114 IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
115
doTest(const char * name)116 static int doTest(const char *name)
117 {
118 if (gSkipRestOfTests)
119 {
120 vlog("Skipping function because of an earlier error.\n");
121 return 1;
122 }
123
124 int error = 0;
125 const Func *func_data = NULL;
126
127 for (size_t i = 0; i < functionListCount; i++)
128 {
129 const Func *const temp_func = functionList + i;
130 if (strcmp(temp_func->name, name) == 0)
131 {
132 if ((gStartTestNumber != ~0u && i < gStartTestNumber)
133 || i > gEndTestNumber)
134 {
135 vlog("Skipping function #%zu\n", i);
136 return 0;
137 }
138
139 func_data = temp_func;
140 break;
141 }
142 }
143
144 if (func_data == NULL)
145 {
146 vlog("Function '%s' doesn't exist!\n", name);
147 exit(EXIT_FAILURE);
148 }
149
150 if (func_data->func.p == NULL)
151 {
152 vlog("'%s' is missing implementation, skipping function.\n",
153 func_data->name);
154 return 0;
155 }
156
157 // if correctly rounded divide & sqrt are supported by the implementation
158 // then test it; otherwise skip the test
159 if (strcmp(func_data->name, "sqrt_cr") == 0
160 || strcmp(func_data->name, "divide_cr") == 0)
161 {
162 if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
163 {
164 vlog("Correctly rounded divide and sqrt are not supported, "
165 "skipping function.\n");
166 return 0;
167 }
168 }
169
170 {
171 if (0 == strcmp("ilogb", func_data->name))
172 {
173 InitILogbConstants();
174 }
175
176 if (gTestFastRelaxed && func_data->relaxed)
177 {
178 if (get_device_cl_version(gDevice) > Version(1, 2))
179 {
180 gTestCount++;
181 vlog("%3d: ", gTestCount);
182 // Test with relaxed requirements here.
183 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
184 true /* relaxed mode */))
185 {
186 gFailCount++;
187 error++;
188 if (gStopOnError)
189 {
190 gSkipRestOfTests = true;
191 return error;
192 }
193 }
194 }
195 else
196 {
197 vlog("Skipping reduced precision testing for device with "
198 "version 1.2 or less\n");
199 }
200 }
201
202 if (gTestFloat)
203 {
204 gTestCount++;
205 vlog("%3d: ", gTestCount);
206 // Don't test with relaxed requirements.
207 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
208 false /* relaxed mode */))
209 {
210 gFailCount++;
211 error++;
212 if (gStopOnError)
213 {
214 gSkipRestOfTests = true;
215 return error;
216 }
217 }
218 }
219
220 if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
221 && NULL != func_data->dfunc.p)
222 {
223 gTestCount++;
224 vlog("%3d: ", gTestCount);
225 // Don't test with relaxed requirements.
226 if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
227 false /* relaxed mode*/))
228 {
229 gFailCount++;
230 error++;
231 if (gStopOnError)
232 {
233 gSkipRestOfTests = true;
234 return error;
235 }
236 }
237 }
238 }
239
240 return error;
241 }
242
243
244 #define TEST_LAMBDA(name) \
245 [](cl_device_id, cl_context, cl_command_queue, int) { \
246 return doTest(#name); \
247 }
248
249 // Redefine ADD_TEST to use TEST_LAMBDA.
250 #undef ADD_TEST
251 #define ADD_TEST(name) \
252 { \
253 TEST_LAMBDA(name), #name, Version(1, 0) \
254 }
255
256 static test_definition test_list[] = {
257 ADD_TEST(acos), ADD_TEST(acosh), ADD_TEST(acospi),
258 ADD_TEST(asin), ADD_TEST(asinh), ADD_TEST(asinpi),
259 ADD_TEST(atan), ADD_TEST(atanh), ADD_TEST(atanpi),
260 ADD_TEST(atan2), ADD_TEST(atan2pi), ADD_TEST(cbrt),
261 ADD_TEST(ceil), ADD_TEST(copysign), ADD_TEST(cos),
262 ADD_TEST(cosh), ADD_TEST(cospi), ADD_TEST(exp),
263 ADD_TEST(exp2), ADD_TEST(exp10), ADD_TEST(expm1),
264 ADD_TEST(fabs), ADD_TEST(fdim), ADD_TEST(floor),
265 ADD_TEST(fma), ADD_TEST(fmax), ADD_TEST(fmin),
266 ADD_TEST(fmod), ADD_TEST(fract), ADD_TEST(frexp),
267 ADD_TEST(hypot), ADD_TEST(ilogb), ADD_TEST(isequal),
268 ADD_TEST(isfinite), ADD_TEST(isgreater), ADD_TEST(isgreaterequal),
269 ADD_TEST(isinf), ADD_TEST(isless), ADD_TEST(islessequal),
270 ADD_TEST(islessgreater), ADD_TEST(isnan), ADD_TEST(isnormal),
271 ADD_TEST(isnotequal), ADD_TEST(isordered), ADD_TEST(isunordered),
272 ADD_TEST(ldexp), ADD_TEST(lgamma), ADD_TEST(lgamma_r),
273 ADD_TEST(log), ADD_TEST(log2), ADD_TEST(log10),
274 ADD_TEST(log1p), ADD_TEST(logb), ADD_TEST(mad),
275 ADD_TEST(maxmag), ADD_TEST(minmag), ADD_TEST(modf),
276 ADD_TEST(nan), ADD_TEST(nextafter), ADD_TEST(pow),
277 ADD_TEST(pown), ADD_TEST(powr), ADD_TEST(remainder),
278 ADD_TEST(remquo), ADD_TEST(rint), ADD_TEST(rootn),
279 ADD_TEST(round), ADD_TEST(rsqrt), ADD_TEST(signbit),
280 ADD_TEST(sin), ADD_TEST(sincos), ADD_TEST(sinh),
281 ADD_TEST(sinpi), ADD_TEST(sqrt), ADD_TEST(sqrt_cr),
282 ADD_TEST(tan), ADD_TEST(tanh), ADD_TEST(tanpi),
283 ADD_TEST(trunc), ADD_TEST(half_cos), ADD_TEST(half_divide),
284 ADD_TEST(half_exp), ADD_TEST(half_exp2), ADD_TEST(half_exp10),
285 ADD_TEST(half_log), ADD_TEST(half_log2), ADD_TEST(half_log10),
286 ADD_TEST(half_powr), ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
287 ADD_TEST(half_sin), ADD_TEST(half_sqrt), ADD_TEST(half_tan),
288 ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(divide),
289 ADD_TEST(divide_cr), ADD_TEST(multiply), ADD_TEST(assignment),
290 ADD_TEST(not),
291 };
292
293 #undef ADD_TEST
294 #undef TEST_LAMBDA
295
296 static const int test_num = ARRAY_SIZE(test_list);
297
298 #pragma mark -
299
main(int argc,const char * argv[])300 int main(int argc, const char *argv[])
301 {
302 int error;
303
304 argc = parseCustomParam(argc, argv);
305 if (argc == -1)
306 {
307 return -1;
308 }
309
310 error = ParseArgs(argc, argv);
311 if (error) return error;
312
313 // This takes a while, so prevent the machine from going to sleep.
314 PreventSleep();
315 atexit(ResumeSleep);
316
317 if (gSkipCorrectnessTesting)
318 vlog("*** Skipping correctness testing! ***\n\n");
319 else if (gStopOnError)
320 vlog("Stopping at first error.\n");
321
322 vlog(" \t ");
323 if (gWimpyMode) vlog(" ");
324 if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
325
326 vlog("\n-------------------------------------------------------------------"
327 "----------------------------------------\n");
328
329 gMTdata = MTdataHolder(gRandomSeed);
330
331 FPU_mode_type oldMode;
332 DisableFTZ(&oldMode);
333
334 int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(),
335 test_num, test_list, true, 0, InitCL);
336
337 RestoreFPState(&oldMode);
338
339 if (gQueue)
340 {
341 int error_code = clFinish(gQueue);
342 if (error_code) vlog_error("clFinish failed:%d\n", error_code);
343 }
344
345 ReleaseCL();
346
347 return ret;
348 }
349
ParseArgs(int argc,const char ** argv)350 static int ParseArgs(int argc, const char **argv)
351 {
352 // We only pass test names to runTestHarnessWithCheck, hence global command
353 // line options defined by the harness cannot be used by the user.
354 // To respect the implementation details of runTestHarnessWithCheck,
355 // gTestNames[0] has to exist although its value is not important.
356 gTestNames.push_back("");
357
358 int singleThreaded = 0;
359
360 { // Extract the app name
361 strncpy(appName, argv[0], MAXPATHLEN - 1);
362 appName[MAXPATHLEN - 1] = '\0';
363
364 #if defined(__APPLE__)
365 char baseName[MAXPATHLEN];
366 char *base = NULL;
367 strncpy(baseName, argv[0], MAXPATHLEN - 1);
368 baseName[MAXPATHLEN - 1] = '\0';
369 base = basename(baseName);
370 if (NULL != base)
371 {
372 strncpy(appName, base, sizeof(appName) - 1);
373 appName[sizeof(appName) - 1] = '\0';
374 }
375 #endif
376 }
377
378 vlog("\n%s\t", appName);
379 for (int i = 1; i < argc; i++)
380 {
381 const char *arg = argv[i];
382 if (NULL == arg) break;
383
384 vlog("\t%s", arg);
385 int optionFound = 0;
386 if (arg[0] == '-')
387 {
388 while (arg[1] != '\0')
389 {
390 arg++;
391 optionFound = 1;
392 switch (*arg)
393 {
394 case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
395
396 case 'd': gHasDouble ^= 1; break;
397
398 case 'e': gFastRelaxedDerived ^= 1; break;
399
400 case 'f': gTestFloat ^= 1; break;
401
402 case 'h': PrintUsage(); return -1;
403
404 case 'p': PrintFunctions(); return -1;
405
406 case 'l': gSkipCorrectnessTesting ^= 1; break;
407
408 case 'm': singleThreaded ^= 1; break;
409
410 case 'r': gTestFastRelaxed ^= 1; break;
411
412 case 's': gStopOnError ^= 1; break;
413
414 case 'v': gVerboseBruteForce ^= 1; break;
415
416 case 'w': // wimpy mode
417 gWimpyMode ^= 1;
418 break;
419
420 case '[':
421 parseWimpyReductionFactor(arg, gWimpyReductionFactor);
422 break;
423
424 case 'z': gForceFTZ ^= 1; break;
425
426 case '1':
427 if (arg[1] == '6')
428 {
429 gMinVectorSizeIndex = 5;
430 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
431 arg++;
432 }
433 else
434 {
435 gMinVectorSizeIndex = 0;
436 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
437 }
438 break;
439 case '2':
440 gMinVectorSizeIndex = 1;
441 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
442 break;
443 case '3':
444 gMinVectorSizeIndex = 2;
445 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
446 break;
447 case '4':
448 gMinVectorSizeIndex = 3;
449 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
450 break;
451 case '8':
452 gMinVectorSizeIndex = 4;
453 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
454 break;
455
456 default:
457 vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
458 PrintUsage();
459 return -1;
460 }
461 }
462 }
463
464 if (!optionFound)
465 {
466 char *t = NULL;
467 long number = strtol(arg, &t, 0);
468 if (t != arg)
469 {
470 if (~0u == gStartTestNumber)
471 gStartTestNumber = (int32_t)number;
472 else
473 gEndTestNumber = gStartTestNumber + (int32_t)number;
474 }
475 else
476 {
477 // Make sure this is a valid name
478 unsigned int k;
479 for (k = 0; k < functionListCount; k++)
480 {
481 const Func *f = functionList + k;
482 if (strcmp(arg, f->name) == 0)
483 {
484 gTestNames.push_back(arg);
485 break;
486 }
487 }
488 // If we didn't find it in the list of test names
489 if (k >= functionListCount)
490 {
491 gTestNames.push_back(arg);
492 }
493 }
494 }
495 }
496
497 // Check for the wimpy mode environment variable
498 if (getenv("CL_WIMPY_MODE"))
499 {
500 vlog("\n");
501 vlog("*** Detected CL_WIMPY_MODE env ***\n");
502 gWimpyMode = 1;
503 }
504
505 PrintArch();
506
507 if (gWimpyMode)
508 {
509 vlog("\n");
510 vlog("*** WARNING: Testing in Wimpy mode! ***\n");
511 vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
512 vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
513 gWimpyReductionFactor);
514 }
515
516 if (singleThreaded) SetThreadCount(1);
517
518 return 0;
519 }
520
521
PrintFunctions(void)522 static void PrintFunctions(void)
523 {
524 vlog("\nMath function names:\n");
525 for (size_t i = 0; i < functionListCount; i++)
526 {
527 vlog("\t%s\n", functionList[i].name);
528 }
529 }
530
PrintUsage(void)531 static void PrintUsage(void)
532 {
533 vlog("%s [-cglsz]: <optional: math function names>\n", appName);
534 vlog("\toptions:\n");
535 vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
536 "off)\n");
537 vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
538 "on)\n");
539 vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
540 vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
541 vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
542 "precision. (Default: on)\n");
543 vlog("\t\t-h\tPrint this message and quit\n");
544 vlog("\t\t-p\tPrint all math function names and quit\n");
545 vlog("\t\t-l\tlink check only (make sure functions are present, skip "
546 "accuracy checks.)\n");
547 vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
548 vlog("\t\t-s\tStop on error\n");
549 vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
550 vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
551 "1-10, default factor(%u)\n",
552 gWimpyReductionFactor);
553 vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
554 "device capabilities by default.)\n");
555 vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
556 vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
557 "\"-16\" tests 16-wide vectors only.\n");
558 vlog("\n\tYou may also pass a number instead of a function name.\n");
559 vlog("\tThis causes the first N tests to be skipped. The tests are "
560 "numbered.\n");
561 vlog("\tIf you pass a second number, that is the number tests to run after "
562 "the first one.\n");
563 vlog("\tA name list may be used in conjunction with a number range. In "
564 "that case,\n");
565 vlog("\tonly the named cases in the number range will run.\n");
566 vlog("\tYou may also choose to pass no arguments, in which case all tests "
567 "will be run.\n");
568 vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
569 "device.\n");
570 vlog("\n");
571 }
572
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)573 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
574 const void *private_info,
575 size_t cb, void *user_data)
576 {
577 vlog("%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
578 }
579
InitCL(cl_device_id device)580 test_status InitCL(cl_device_id device)
581 {
582 int error;
583 uint32_t i;
584 cl_device_type device_type;
585
586 error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
587 &device_type, NULL);
588 if (error)
589 {
590 print_error(error, "Unable to get device type");
591 return TEST_FAIL;
592 }
593
594 gDevice = device;
595
596 // Check extensions
597 if (is_extension_available(gDevice, "cl_khr_fp64"))
598 {
599 gHasDouble ^= 1;
600 #if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
601 cl_device_fp_config doubleCapabilities = 0;
602 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
603 sizeof(doubleCapabilities),
604 &doubleCapabilities, NULL)))
605 {
606 vlog_error("ERROR: Unable to get device "
607 "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
608 error);
609 return TEST_FAIL;
610 }
611
612 if (DOUBLE_REQUIRED_FEATURES
613 != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
614 {
615 std::string list;
616 if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
617 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
618 list += "CL_FP_ROUND_TO_NEAREST, ";
619 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
620 list += "CL_FP_ROUND_TO_ZERO, ";
621 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
622 list += "CL_FP_ROUND_TO_INF, ";
623 if (0 == (doubleCapabilities & CL_FP_INF_NAN))
624 list += "CL_FP_INF_NAN, ";
625 if (0 == (doubleCapabilities & CL_FP_DENORM))
626 list += "CL_FP_DENORM, ";
627 vlog_error("ERROR: required double features are missing: %s\n",
628 list.c_str());
629
630 return TEST_FAIL;
631 }
632 #else
633 vlog_error("FAIL: device says it supports cl_khr_fp64 but "
634 "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
635 return TEST_FAIL;
636 #endif
637 }
638
639 uint32_t deviceFrequency = 0;
640 size_t configSize = sizeof(deviceFrequency);
641 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
642 configSize, &deviceFrequency, NULL)))
643 deviceFrequency = 0;
644
645 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
646 sizeof(gFloatCapabilities),
647 &gFloatCapabilities, NULL)))
648 {
649 vlog_error(
650 "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
651 error);
652 return TEST_FAIL;
653 }
654
655 gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
656 NULL, &error);
657 if (NULL == gContext || error)
658 {
659 vlog_error("clCreateContext failed. (%d) \n", error);
660 return TEST_FAIL;
661 }
662
663 gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
664 if (NULL == gQueue || error)
665 {
666 vlog_error("clCreateCommandQueue failed. (%d)\n", error);
667 return TEST_FAIL;
668 }
669
670 // Allocate buffers
671 cl_uint min_alignment = 0;
672 error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
673 sizeof(cl_uint), (void *)&min_alignment, NULL);
674 if (CL_SUCCESS != error)
675 {
676 vlog_error("clGetDeviceInfo failed. (%d)\n", error);
677 return TEST_FAIL;
678 }
679 min_alignment >>= 3; // convert bits to bytes
680
681 gIn = align_malloc(BUFFER_SIZE, min_alignment);
682 if (NULL == gIn) return TEST_FAIL;
683 gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
684 if (NULL == gIn2) return TEST_FAIL;
685 gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
686 if (NULL == gIn3) return TEST_FAIL;
687 gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
688 if (NULL == gOut_Ref) return TEST_FAIL;
689 gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
690 if (NULL == gOut_Ref2) return TEST_FAIL;
691
692 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
693 {
694 gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
695 if (NULL == gOut[i]) return TEST_FAIL;
696 gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
697 if (NULL == gOut2[i]) return TEST_FAIL;
698 }
699
700 cl_mem_flags device_flags = CL_MEM_READ_ONLY;
701 // save a copy on the host device to make this go faster
702 if (CL_DEVICE_TYPE_CPU == device_type)
703 device_flags |= CL_MEM_USE_HOST_PTR;
704 else
705 device_flags |= CL_MEM_COPY_HOST_PTR;
706
707 // setup input buffers
708 gInBuffer =
709 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
710 if (gInBuffer == NULL || error)
711 {
712 vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
713 return TEST_FAIL;
714 }
715
716 gInBuffer2 =
717 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
718 if (gInBuffer2 == NULL || error)
719 {
720 vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
721 return TEST_FAIL;
722 }
723
724 gInBuffer3 =
725 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
726 if (gInBuffer3 == NULL || error)
727 {
728 vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
729 return TEST_FAIL;
730 }
731
732
733 // setup output buffers
734 device_flags = CL_MEM_READ_WRITE;
735 // save a copy on the host device to make this go faster
736 if (CL_DEVICE_TYPE_CPU == device_type)
737 device_flags |= CL_MEM_USE_HOST_PTR;
738 else
739 device_flags |= CL_MEM_COPY_HOST_PTR;
740 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
741 {
742 gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
743 gOut[i], &error);
744 if (gOutBuffer[i] == NULL || error)
745 {
746 vlog_error("clCreateBuffer failed for output (%d)\n", error);
747 return TEST_FAIL;
748 }
749 gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
750 gOut2[i], &error);
751 if (gOutBuffer2[i] == NULL || error)
752 {
753 vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
754 return TEST_FAIL;
755 }
756 }
757
758 // we are embedded, check current rounding mode
759 if (gIsEmbedded)
760 {
761 gIsInRTZMode = IsInRTZMode();
762 }
763
764 // Check tininess detection
765 IsTininessDetectedBeforeRounding();
766
767 cl_platform_id platform;
768 int err = clGetPlatformIDs(1, &platform, NULL);
769 if (err)
770 {
771 print_error(err, "clGetPlatformIDs failed");
772 return TEST_FAIL;
773 }
774
775 char c[1024];
776 static const char *no_yes[] = { "NO", "YES" };
777 vlog("\nCompute Device info:\n");
778 clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
779 vlog("\tPlatform Version: %s\n", c);
780 clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
781 vlog("\tDevice Name: %s\n", c);
782 clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
783 vlog("\tVendor: %s\n", c);
784 clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
785 vlog("\tDevice Version: %s\n", c);
786 clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
787 vlog("\tCL C Version: %s\n", c);
788 clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
789 vlog("\tDriver Version: %s\n", c);
790 vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
791 vlog("\tSubnormal values supported for floats? %s\n",
792 no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
793 vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
794 no_yes[0
795 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
796 if (gToggleCorrectlyRoundedDivideSqrt)
797 {
798 gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
799 }
800 vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
801 no_yes[0
802 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
803 vlog("\tTesting with FTZ mode ON for floats? %s\n",
804 no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
805 vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
806 vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
807 if (gTestFastRelaxed)
808 {
809 vlog("\tFast relaxed math has derived implementations? %s\n",
810 no_yes[0 != gFastRelaxedDerived]);
811 }
812 vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
813 if (sizeof(long double) == sizeof(double) && gHasDouble)
814 {
815 vlog("\n\t\tWARNING: Host system long double does not have better "
816 "precision than double!\n");
817 vlog("\t\t All double results that do not match the reference "
818 "result have their reported\n");
819 vlog("\t\t error inflated by 0.5 ulps to account for the fact "
820 "that this system\n");
821 vlog("\t\t can not accurately represent the right result to an "
822 "accuracy closer\n");
823 vlog("\t\t than half an ulp. See comments in "
824 "Bruteforce_Ulp_Error_Double() for more details.\n\n");
825 }
826
827 vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
828 if (gIsEmbedded)
829 vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
830 vlog("\tTininess is detected before rounding? %s\n",
831 no_yes[0 != gCheckTininessBeforeRounding]);
832 vlog("\tWorker threads: %d\n", GetThreadCount());
833 vlog("\tTesting vector sizes:");
834 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
835 vlog("\t%d", sizeValues[i]);
836
837 vlog("\n");
838 vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
839 vlog("\n\n");
840
841 // Check to see if we are using single threaded mode on other than a 1.0
842 // device
843 if (getenv("CL_TEST_SINGLE_THREADED"))
844 {
845
846 char device_version[1024] = { 0 };
847 clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
848 device_version, NULL);
849
850 if (strcmp("OpenCL 1.0 ", device_version))
851 {
852 vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
853 "Running single threaded.\n");
854 }
855 }
856
857 return TEST_PASS;
858 }
859
ReleaseCL(void)860 static void ReleaseCL(void)
861 {
862 uint32_t i;
863 clReleaseMemObject(gInBuffer);
864 clReleaseMemObject(gInBuffer2);
865 clReleaseMemObject(gInBuffer3);
866 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
867 {
868 clReleaseMemObject(gOutBuffer[i]);
869 clReleaseMemObject(gOutBuffer2[i]);
870 }
871 clReleaseCommandQueue(gQueue);
872 clReleaseContext(gContext);
873
874 align_free(gIn);
875 align_free(gIn2);
876 align_free(gIn3);
877 align_free(gOut_Ref);
878 align_free(gOut_Ref2);
879
880 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
881 {
882 align_free(gOut[i]);
883 align_free(gOut2[i]);
884 }
885 }
886
_LogBuildError(cl_program p,int line,const char * file)887 void _LogBuildError(cl_program p, int line, const char *file)
888 {
889 char the_log[2048] = "";
890
891 vlog_error("%s:%d: Build Log:\n", file, line);
892 if (0
893 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
894 sizeof(the_log), the_log, NULL))
895 vlog_error("%s", the_log);
896 else
897 vlog_error("*** Error getting build log for program %p\n", p);
898 }
899
InitILogbConstants(void)900 int InitILogbConstants(void)
901 {
902 int error;
903 const char *kernelSource =
904 R"(__kernel void GetILogBConstants( __global int *out )
905 {
906 out[0] = FP_ILOGB0;
907 out[1] = FP_ILOGBNAN;
908 })";
909
910 clProgramWrapper query;
911 clKernelWrapper kernel;
912 error = create_single_kernel_helper(gContext, &query, &kernel, 1,
913 &kernelSource, "GetILogBConstants");
914 if (error != CL_SUCCESS)
915 {
916 vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
917 "FP_ILOGBNAN for the device. (%d)",
918 error);
919 return error;
920 }
921
922 if ((error =
923 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
924 &gOutBuffer[gMinVectorSizeIndex])))
925 {
926 vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
927 "FP_ILOGBNAN for the device. Err = %d",
928 error);
929 return error;
930 }
931
932 size_t dim = 1;
933 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
934 NULL, NULL)))
935 {
936 vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
937 "FP_ILOGBNAN for the device. Err = %d",
938 error);
939 return error;
940 }
941
942 struct
943 {
944 cl_int ilogb0, ilogbnan;
945 } data;
946 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
947 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
948 NULL)))
949 {
950 vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
951 "device. Err = %d",
952 error);
953 return error;
954 }
955
956 gDeviceILogb0 = data.ilogb0;
957 gDeviceILogbNaN = data.ilogbnan;
958
959 return 0;
960 }
961
IsTininessDetectedBeforeRounding(void)962 int IsTininessDetectedBeforeRounding(void)
963 {
964 int error;
965 const char *kernelSource =
966 R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
967 {
968 volatile float a = 0x1.000002p-126f;
969 volatile float b = 0x1.fffffcp-1f;
970 out[0] = a * b; // product is 0x1.fffffffffff8p-127
971 })";
972
973 clProgramWrapper query;
974 clKernelWrapper kernel;
975 error =
976 create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
977 "IsTininessDetectedBeforeRounding");
978 if (error != CL_SUCCESS)
979 {
980 vlog_error("Error: Unable to create kernel to detect how tininess is "
981 "detected for the device. (%d)",
982 error);
983 return error;
984 }
985
986 if ((error =
987 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
988 &gOutBuffer[gMinVectorSizeIndex])))
989 {
990 vlog_error("Error: Unable to set kernel arg to detect how tininess is "
991 "detected for the device. Err = %d",
992 error);
993 return error;
994 }
995
996 size_t dim = 1;
997 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
998 NULL, NULL)))
999 {
1000 vlog_error("Error: Unable to execute kernel to detect how tininess is "
1001 "detected for the device. Err = %d",
1002 error);
1003 return error;
1004 }
1005
1006 struct
1007 {
1008 cl_uint f;
1009 } data;
1010 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1011 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1012 NULL)))
1013 {
1014 vlog_error("Error: unable to read result from tininess test from the "
1015 "device. Err = %d",
1016 error);
1017 return error;
1018 }
1019
1020 gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1021
1022 return 0;
1023 }
1024
1025
MakeKernel(const char ** c,cl_uint count,const char * name,cl_kernel * k,cl_program * p,bool relaxedMode)1026 int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
1027 cl_program *p, bool relaxedMode)
1028 {
1029 int error = 0;
1030 char options[200] = "";
1031
1032 if (gForceFTZ)
1033 {
1034 strcat(options, " -cl-denorms-are-zero");
1035 }
1036
1037 if (relaxedMode)
1038 {
1039 strcat(options, " -cl-fast-relaxed-math");
1040 }
1041
1042 error =
1043 create_single_kernel_helper(gContext, p, k, count, c, name, options);
1044 if (error != CL_SUCCESS)
1045 {
1046 vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
1047 return error;
1048 }
1049
1050 return error;
1051 }
1052
MakeKernels(const char ** c,cl_uint count,const char * name,cl_uint kernel_count,cl_kernel * k,cl_program * p,bool relaxedMode)1053 int MakeKernels(const char **c, cl_uint count, const char *name,
1054 cl_uint kernel_count, cl_kernel *k, cl_program *p,
1055 bool relaxedMode)
1056 {
1057 char options[200] = "";
1058
1059 if (gForceFTZ)
1060 {
1061 strcat(options, " -cl-denorms-are-zero ");
1062 }
1063
1064 if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
1065 {
1066 strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
1067 }
1068
1069 if (relaxedMode)
1070 {
1071 strcat(options, " -cl-fast-relaxed-math");
1072 }
1073
1074 int error =
1075 create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1076 if (error != CL_SUCCESS)
1077 {
1078 vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
1079 return error;
1080 }
1081
1082 for (cl_uint i = 0; i < kernel_count; i++)
1083 {
1084 k[i] = clCreateKernel(*p, name, &error);
1085 if (NULL == k[i] || error)
1086 {
1087 char buffer[2048] = "";
1088
1089 vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1090 clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
1091 sizeof(buffer), buffer, NULL);
1092 vlog_error("Log: %s\n", buffer);
1093 return error;
1094 }
1095 }
1096
1097 return error;
1098 }
1099
1100
IsInRTZMode(void)1101 static int IsInRTZMode(void)
1102 {
1103 int error;
1104 const char *kernelSource =
1105 R"(__kernel void GetRoundingMode( __global int *out )
1106 {
1107 volatile float a = 0x1.0p23f;
1108 volatile float b = -0x1.0p23f;
1109 out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
1110 })";
1111
1112 clProgramWrapper query;
1113 clKernelWrapper kernel;
1114 error = create_single_kernel_helper(gContext, &query, &kernel, 1,
1115 &kernelSource, "GetRoundingMode");
1116 if (error != CL_SUCCESS)
1117 {
1118 vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
1119 "device. (%d)",
1120 error);
1121 return error;
1122 }
1123
1124 if ((error =
1125 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
1126 &gOutBuffer[gMinVectorSizeIndex])))
1127 {
1128 vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
1129 "device. Err = %d",
1130 error);
1131 return error;
1132 }
1133
1134 size_t dim = 1;
1135 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1136 NULL, NULL)))
1137 {
1138 vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
1139 "device. Err = %d",
1140 error);
1141 return error;
1142 }
1143
1144 struct
1145 {
1146 cl_int isRTZ;
1147 } data;
1148 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1149 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1150 NULL)))
1151 {
1152 vlog_error(
1153 "Error: unable to read RTZ mode data from the device. Err = %d",
1154 error);
1155 return error;
1156 }
1157
1158 return data.isRTZ;
1159 }
1160
1161 #pragma mark -
1162
1163 const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1164 const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1165
1166 // TODO: There is another version of Ulp_Error_Double defined in
1167 // test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1168 float Bruteforce_Ulp_Error_Double(double test, long double reference)
1169 {
1170 // Check for Non-power-of-two and NaN
1171
1172 // Note: This function presumes that someone has already tested whether the
1173 // result is correctly, rounded before calling this function. That test:
1174 //
1175 // if( (float) reference == test )
1176 // return 0.0f;
1177 //
1178 // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
1179 // before we get here. Otherwise, we'll return inf ulp error here, for what
1180 // are otherwise correctly rounded results.
1181
1182 // Deal with long double = double
1183 // On most systems long double is a higher precision type than double. They
1184 // provide either a 80-bit or greater floating point type, or they provide a
1185 // head-tail double double format. That is sufficient to represent the
1186 // accuracy of a floating point result to many more bits than double and we
1187 // can calculate sub-ulp errors. This is the standard system for which this
1188 // test suite is designed.
1189 //
1190 // On some systems double and long double are the same thing. Then we run
1191 // into a problem, because our representation of the infinitely precise
1192 // result (passed in as reference above) can be off by as much as a half
1193 // double precision ulp itself. In this case, we inflate the reported error
1194 // by half an ulp to take this into account. A more correct and permanent
1195 // fix would be to undertake refactoring the reference code to return
1196 // results in this format:
1197 //
1198 // typedef struct DoubleReference
1199 // { // true value = correctlyRoundedResult + ulps *
1200 // ulp(correctlyRoundedResult) (infinitely precise)
1201 // double correctlyRoundedResult; // as best we can
1202 // double ulps; // plus a fractional amount to
1203 // account for the difference
1204 // }DoubleReference; // between infinitely
1205 // precise result and correctlyRoundedResult, in units of ulps.
1206 //
1207 // This would provide a useful higher-than-double precision format for
1208 // everyone that we can use, and would solve a few problems with
1209 // representing absolute errors below DBL_MIN and over DBL_MAX for systems
1210 // that use a head to tail double double for long double.
1211
1212 int x;
1213 long double testVal = test;
1214
1215 // First, handle special reference values
1216 if (isinf(reference))
1217 {
1218 if (reference == testVal) return 0.0f;
1219
1220 return INFINITY;
1221 }
1222
1223 if (isnan(reference))
1224 {
1225 if (isnan(testVal)) return 0.0f;
1226
1227 return INFINITY;
1228 }
1229
1230 if (0.0L != reference && 0.5L != frexpl(reference, &x))
1231 { // Non-zero and Non-power of two
1232
1233 // allow correctly rounded results to pass through unmolested. (We might
1234 // add error to it below.) There is something of a performance
1235 // optimization here.
1236 if (testVal == reference) return 0.0f;
1237
1238 // The unbiased exponent of the ulp unit place
1239 int ulp_exp =
1240 DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
1241
1242 // Scale the exponent of the error
1243 float result = (float)scalbnl(testVal - reference, ulp_exp);
1244
1245 // account for rounding error in reference result on systems that do not
1246 // have a higher precision floating point type (see above)
1247 if (sizeof(long double) == sizeof(double))
1248 result += copysignf(0.5f, result);
1249
1250 return result;
1251 }
1252
1253 // reference is a normal power of two or a zero
1254 // The unbiased exponent of the ulp unit place
1255 int ulp_exp =
1256 DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
1257
1258 // allow correctly rounded results to pass through unmolested. (We might add
1259 // error to it below.) There is something of a performance optimization here
1260 // too.
1261 if (testVal == reference) return 0.0f;
1262
1263 // Scale the exponent of the error
1264 float result = (float)scalbnl(testVal - reference, ulp_exp);
1265
1266 // account for rounding error in reference result on systems that do not
1267 // have a higher precision floating point type (see above)
1268 if (sizeof(long double) == sizeof(double))
1269 result += copysignf(0.5f, result);
1270
1271 return result;
1272 }
1273
Abs_Error(float test,double reference)1274 float Abs_Error(float test, double reference)
1275 {
1276 if (isnan(test) && isnan(reference)) return 0.0f;
1277 return fabs((float)(reference - (double)test));
1278 }
1279
RoundUpToNextPowerOfTwo(cl_uint x)1280 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
1281 {
1282 if (0 == (x & (x - 1))) return x;
1283
1284 while (x & (x - 1)) x &= x - 1;
1285
1286 return x + x;
1287 }
1288