1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "function_list.h"
18 #include "sleep.h"
19 #include "utility.h"
20
21 #include <cstdio>
22 #include <cstdlib>
23 #include <ctime>
24 #include <string>
25 #include <vector>
26
27 #include "harness/errorHelpers.h"
28 #include "harness/kernelHelpers.h"
29 #include "harness/parseParameters.h"
30 #include "harness/typeWrappers.h"
31
32 #if defined(__APPLE__)
33 #include <sys/sysctl.h>
34 #include <sys/mman.h>
35 #include <libgen.h>
36 #include <sys/time.h>
37 #elif defined(__linux__)
38 #include <unistd.h>
39 #include <sys/syscall.h>
40 #include <linux/sysctl.h>
41 #include <sys/param.h>
42 #endif
43
44 #if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
45 #include <sys/param.h>
46 #endif
47
48 #include "harness/testHarness.h"
49
50 #define kPageSize 4096
51 #define DOUBLE_REQUIRED_FEATURES \
52 (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO \
53 | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
54
55 static std::vector<const char *> gTestNames;
56 static char appName[MAXPATHLEN] = "";
57 cl_device_id gDevice = NULL;
58 cl_context gContext = NULL;
59 cl_command_queue gQueue = NULL;
60 static int32_t gStartTestNumber = -1;
61 static int32_t gEndTestNumber = -1;
62 int gSkipCorrectnessTesting = 0;
63 static int gStopOnError = 0;
64 static bool gSkipRestOfTests;
65 int gForceFTZ = 0;
66 int gWimpyMode = 0;
67 static int gHasDouble = 0;
68 static int gTestFloat = 1;
69 // This flag should be 'ON' by default and it can be changed through the command
70 // line arguments.
71 static int gTestFastRelaxed = 1;
72 /*This flag corresponds to defining if the implementation has Derived Fast
73 Relaxed functions. The spec does not specify ULP for derived function. The
74 derived functions are composed of base functions which are tested for ULP,
75 thus when this flag is enabled, Derived functions will not be tested for ULP,
76 as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
77 device whether it is a derived or non-derived implementation according to
78 OpenCL 2.0 spec then it has to be changed through a command line argument.
79 */
80 int gFastRelaxedDerived = 1;
81 static int gToggleCorrectlyRoundedDivideSqrt = 0;
82 int gDeviceILogb0 = 1;
83 int gDeviceILogbNaN = 1;
84 int gCheckTininessBeforeRounding = 1;
85 int gIsInRTZMode = 0;
86 uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
87 uint32_t gMinVectorSizeIndex = 0;
88 void *gIn = NULL;
89 void *gIn2 = NULL;
90 void *gIn3 = NULL;
91 void *gOut_Ref = NULL;
92 void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
93 void *gOut_Ref2 = NULL;
94 void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
95 cl_mem gInBuffer = NULL;
96 cl_mem gInBuffer2 = NULL;
97 cl_mem gInBuffer3 = NULL;
98 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
99 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
100 static MTdata gMTdata;
101 cl_device_fp_config gFloatCapabilities = 0;
102 int gWimpyReductionFactor = 32;
103 int gVerboseBruteForce = 0;
104
105 static int ParseArgs(int argc, const char **argv);
106 static void PrintUsage(void);
107 static void PrintFunctions(void);
108 static test_status InitCL(cl_device_id device);
109 static void ReleaseCL(void);
110 static int InitILogbConstants(void);
111 static int IsTininessDetectedBeforeRounding(void);
112 static int
113 IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
114
doTest(const char * name)115 static int doTest(const char *name)
116 {
117 if (gSkipRestOfTests)
118 {
119 vlog("Skipping function because of an earlier error.\n");
120 return 1;
121 }
122
123 int error = 0;
124 const Func *func_data = NULL;
125
126 for (size_t i = 0; i < functionListCount; i++)
127 {
128 const Func *const temp_func = functionList + i;
129 if (strcmp(temp_func->name, name) == 0)
130 {
131 if ((gStartTestNumber != -1 && i < gStartTestNumber)
132 || i > gEndTestNumber)
133 {
134 vlog("Skipping function #%d\n", i);
135 return 0;
136 }
137
138 func_data = temp_func;
139 break;
140 }
141 }
142
143 if (func_data == NULL)
144 {
145 vlog("Function '%s' doesn't exist!\n", name);
146 exit(EXIT_FAILURE);
147 }
148
149 if (func_data->func.p == NULL)
150 {
151 vlog("'%s' is missing implementation, skipping function.\n",
152 func_data->name);
153 return 0;
154 }
155
156 // if correctly rounded divide & sqrt are supported by the implementation
157 // then test it; otherwise skip the test
158 if (strcmp(func_data->name, "sqrt_cr") == 0
159 || strcmp(func_data->name, "divide_cr") == 0)
160 {
161 if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
162 {
163 vlog("Correctly rounded divide and sqrt are not supported, "
164 "skipping function.\n");
165 return 0;
166 }
167 }
168
169 {
170 extern int my_ilogb(double);
171 if (0 == strcmp("ilogb", func_data->name))
172 {
173 InitILogbConstants();
174 }
175
176 if (gTestFastRelaxed && func_data->relaxed)
177 {
178 if (get_device_cl_version(gDevice) > Version(1, 2))
179 {
180 gTestCount++;
181 vlog("%3d: ", gTestCount);
182 // Test with relaxed requirements here.
183 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
184 true /* relaxed mode */))
185 {
186 gFailCount++;
187 error++;
188 if (gStopOnError)
189 {
190 gSkipRestOfTests = true;
191 return error;
192 }
193 }
194 }
195 else
196 {
197 vlog("Skipping reduced precision testing for device with "
198 "version 1.2 or less\n");
199 }
200 }
201
202 if (gTestFloat)
203 {
204 gTestCount++;
205 vlog("%3d: ", gTestCount);
206 // Don't test with relaxed requirements.
207 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
208 false /* relaxed mode */))
209 {
210 gFailCount++;
211 error++;
212 if (gStopOnError)
213 {
214 gSkipRestOfTests = true;
215 return error;
216 }
217 }
218 }
219
220 if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
221 && NULL != func_data->dfunc.p)
222 {
223 gTestCount++;
224 vlog("%3d: ", gTestCount);
225 // Don't test with relaxed requirements.
226 if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
227 false /* relaxed mode*/))
228 {
229 gFailCount++;
230 error++;
231 if (gStopOnError)
232 {
233 gSkipRestOfTests = true;
234 return error;
235 }
236 }
237 }
238 }
239
240 return error;
241 }
242
243
244 #define TEST_LAMBDA(name) \
245 [](cl_device_id, cl_context, cl_command_queue, int) { \
246 return doTest(#name); \
247 }
248
249 // Redefine ADD_TEST to use TEST_LAMBDA.
250 #undef ADD_TEST
251 #define ADD_TEST(name) \
252 { \
253 TEST_LAMBDA(name), #name, Version(1, 0) \
254 }
255
256 static test_definition test_list[] = {
257 ADD_TEST(acos), ADD_TEST(acosh), ADD_TEST(acospi),
258 ADD_TEST(asin), ADD_TEST(asinh), ADD_TEST(asinpi),
259 ADD_TEST(atan), ADD_TEST(atanh), ADD_TEST(atanpi),
260 ADD_TEST(atan2), ADD_TEST(atan2pi), ADD_TEST(cbrt),
261 ADD_TEST(ceil), ADD_TEST(copysign), ADD_TEST(cos),
262 ADD_TEST(cosh), ADD_TEST(cospi), ADD_TEST(exp),
263 ADD_TEST(exp2), ADD_TEST(exp10), ADD_TEST(expm1),
264 ADD_TEST(fabs), ADD_TEST(fdim), ADD_TEST(floor),
265 ADD_TEST(fma), ADD_TEST(fmax), ADD_TEST(fmin),
266 ADD_TEST(fmod), ADD_TEST(fract), ADD_TEST(frexp),
267 ADD_TEST(hypot), ADD_TEST(ilogb), ADD_TEST(isequal),
268 ADD_TEST(isfinite), ADD_TEST(isgreater), ADD_TEST(isgreaterequal),
269 ADD_TEST(isinf), ADD_TEST(isless), ADD_TEST(islessequal),
270 ADD_TEST(islessgreater), ADD_TEST(isnan), ADD_TEST(isnormal),
271 ADD_TEST(isnotequal), ADD_TEST(isordered), ADD_TEST(isunordered),
272 ADD_TEST(ldexp), ADD_TEST(lgamma), ADD_TEST(lgamma_r),
273 ADD_TEST(log), ADD_TEST(log2), ADD_TEST(log10),
274 ADD_TEST(log1p), ADD_TEST(logb), ADD_TEST(mad),
275 ADD_TEST(maxmag), ADD_TEST(minmag), ADD_TEST(modf),
276 ADD_TEST(nan), ADD_TEST(nextafter), ADD_TEST(pow),
277 ADD_TEST(pown), ADD_TEST(powr), ADD_TEST(remainder),
278 ADD_TEST(remquo), ADD_TEST(rint), ADD_TEST(rootn),
279 ADD_TEST(round), ADD_TEST(rsqrt), ADD_TEST(signbit),
280 ADD_TEST(sin), ADD_TEST(sincos), ADD_TEST(sinh),
281 ADD_TEST(sinpi), ADD_TEST(sqrt), ADD_TEST(sqrt_cr),
282 ADD_TEST(tan), ADD_TEST(tanh), ADD_TEST(tanpi),
283 ADD_TEST(trunc), ADD_TEST(half_cos), ADD_TEST(half_divide),
284 ADD_TEST(half_exp), ADD_TEST(half_exp2), ADD_TEST(half_exp10),
285 ADD_TEST(half_log), ADD_TEST(half_log2), ADD_TEST(half_log10),
286 ADD_TEST(half_powr), ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
287 ADD_TEST(half_sin), ADD_TEST(half_sqrt), ADD_TEST(half_tan),
288 ADD_TEST(add), ADD_TEST(subtract), ADD_TEST(divide),
289 ADD_TEST(divide_cr), ADD_TEST(multiply), ADD_TEST(assignment),
290 ADD_TEST(not),
291 };
292
293 #undef ADD_TEST
294 #undef TEST_LAMBDA
295
296 static const int test_num = ARRAY_SIZE(test_list);
297
298 #pragma mark -
299
main(int argc,const char * argv[])300 int main(int argc, const char *argv[])
301 {
302 int error;
303
304 argc = parseCustomParam(argc, argv);
305 if (argc == -1)
306 {
307 return -1;
308 }
309
310 error = ParseArgs(argc, argv);
311 if (error) return error;
312
313 // This takes a while, so prevent the machine from going to sleep.
314 PreventSleep();
315 atexit(ResumeSleep);
316
317 if (gSkipCorrectnessTesting)
318 vlog("*** Skipping correctness testing! ***\n\n");
319 else if (gStopOnError)
320 vlog("Stopping at first error.\n");
321
322 vlog(" \t ");
323 if (gWimpyMode) vlog(" ");
324 if (!gSkipCorrectnessTesting) vlog("\t max_ulps");
325
326 vlog("\n-------------------------------------------------------------------"
327 "----------------------------------------\n");
328
329 gMTdata = init_genrand(gRandomSeed);
330
331 FPU_mode_type oldMode;
332 DisableFTZ(&oldMode);
333
334 int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(),
335 test_num, test_list, true, 0, InitCL);
336
337 RestoreFPState(&oldMode);
338
339 free_mtdata(gMTdata);
340
341 if (gQueue)
342 {
343 int error_code = clFinish(gQueue);
344 if (error_code) vlog_error("clFinish failed:%d\n", error_code);
345 }
346
347 ReleaseCL();
348
349 return ret;
350 }
351
ParseArgs(int argc,const char ** argv)352 static int ParseArgs(int argc, const char **argv)
353 {
354 // We only pass test names to runTestHarnessWithCheck, hence global command
355 // line options defined by the harness cannot be used by the user.
356 // To respect the implementation details of runTestHarnessWithCheck,
357 // gTestNames[0] has to exist although its value is not important.
358 gTestNames.push_back("");
359
360 int singleThreaded = 0;
361
362 { // Extract the app name
363 strncpy(appName, argv[0], MAXPATHLEN);
364
365 #if defined(__APPLE__)
366 char baseName[MAXPATHLEN];
367 char *base = NULL;
368 strncpy(baseName, argv[0], MAXPATHLEN);
369 base = basename(baseName);
370 if (NULL != base)
371 {
372 strncpy(appName, base, sizeof(appName));
373 appName[sizeof(appName) - 1] = '\0';
374 }
375 #endif
376 }
377
378 vlog("\n%s\t", appName);
379 for (int i = 1; i < argc; i++)
380 {
381 const char *arg = argv[i];
382 if (NULL == arg) break;
383
384 vlog("\t%s", arg);
385 int optionFound = 0;
386 if (arg[0] == '-')
387 {
388 while (arg[1] != '\0')
389 {
390 arg++;
391 optionFound = 1;
392 switch (*arg)
393 {
394 case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
395
396 case 'd': gHasDouble ^= 1; break;
397
398 case 'e': gFastRelaxedDerived ^= 1; break;
399
400 case 'f': gTestFloat ^= 1; break;
401
402 case 'h': PrintUsage(); return -1;
403
404 case 'p': PrintFunctions(); return -1;
405
406 case 'l': gSkipCorrectnessTesting ^= 1; break;
407
408 case 'm': singleThreaded ^= 1; break;
409
410 case 'r': gTestFastRelaxed ^= 1; break;
411
412 case 's': gStopOnError ^= 1; break;
413
414 case 'v': gVerboseBruteForce ^= 1; break;
415
416 case 'w': // wimpy mode
417 gWimpyMode ^= 1;
418 break;
419
420 case '[':
421 parseWimpyReductionFactor(arg, gWimpyReductionFactor);
422 break;
423
424 case 'z': gForceFTZ ^= 1; break;
425
426 case '1':
427 if (arg[1] == '6')
428 {
429 gMinVectorSizeIndex = 5;
430 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
431 arg++;
432 }
433 else
434 {
435 gMinVectorSizeIndex = 0;
436 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
437 }
438 break;
439 case '2':
440 gMinVectorSizeIndex = 1;
441 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
442 break;
443 case '3':
444 gMinVectorSizeIndex = 2;
445 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
446 break;
447 case '4':
448 gMinVectorSizeIndex = 3;
449 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
450 break;
451 case '8':
452 gMinVectorSizeIndex = 4;
453 gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
454 break;
455
456 default:
457 vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
458 PrintUsage();
459 return -1;
460 }
461 }
462 }
463
464 if (!optionFound)
465 {
466 char *t = NULL;
467 long number = strtol(arg, &t, 0);
468 if (t != arg)
469 {
470 if (-1 == gStartTestNumber)
471 gStartTestNumber = (int32_t)number;
472 else
473 gEndTestNumber = gStartTestNumber + (int32_t)number;
474 }
475 else
476 {
477 // Make sure this is a valid name
478 unsigned int k;
479 for (k = 0; k < functionListCount; k++)
480 {
481 const Func *f = functionList + k;
482 if (strcmp(arg, f->name) == 0)
483 {
484 gTestNames.push_back(arg);
485 break;
486 }
487 }
488 // If we didn't find it in the list of test names
489 if (k >= functionListCount)
490 {
491 gTestNames.push_back(arg);
492 }
493 }
494 }
495 }
496
497 // Check for the wimpy mode environment variable
498 if (getenv("CL_WIMPY_MODE"))
499 {
500 vlog("\n");
501 vlog("*** Detected CL_WIMPY_MODE env ***\n");
502 gWimpyMode = 1;
503 }
504
505 vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
506
507 PrintArch();
508
509 if (gWimpyMode)
510 {
511 vlog("\n");
512 vlog("*** WARNING: Testing in Wimpy mode! ***\n");
513 vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
514 vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
515 gWimpyReductionFactor);
516 }
517
518 if (singleThreaded) SetThreadCount(1);
519
520 return 0;
521 }
522
523
PrintFunctions(void)524 static void PrintFunctions(void)
525 {
526 vlog("\nMath function names:\n");
527 for (int i = 0; i < functionListCount; i++)
528 {
529 vlog("\t%s\n", functionList[i].name);
530 }
531 }
532
PrintUsage(void)533 static void PrintUsage(void)
534 {
535 vlog("%s [-cglsz]: <optional: math function names>\n", appName);
536 vlog("\toptions:\n");
537 vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
538 "off)\n");
539 vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
540 "on)\n");
541 vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
542 vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
543 vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
544 "precision. (Default: on)\n");
545 vlog("\t\t-h\tPrint this message and quit\n");
546 vlog("\t\t-p\tPrint all math function names and quit\n");
547 vlog("\t\t-l\tlink check only (make sure functions are present, skip "
548 "accuracy checks.)\n");
549 vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
550 vlog("\t\t-s\tStop on error\n");
551 vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
552 vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
553 "1-10, default factor(%u)\n",
554 gWimpyReductionFactor);
555 vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
556 "device capabilities by default.)\n");
557 vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
558 vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
559 "\"-16\" tests 16-wide vectors only.\n");
560 vlog("\n\tYou may also pass a number instead of a function name.\n");
561 vlog("\tThis causes the first N tests to be skipped. The tests are "
562 "numbered.\n");
563 vlog("\tIf you pass a second number, that is the number tests to run after "
564 "the first one.\n");
565 vlog("\tA name list may be used in conjunction with a number range. In "
566 "that case,\n");
567 vlog("\tonly the named cases in the number range will run.\n");
568 vlog("\tYou may also choose to pass no arguments, in which case all tests "
569 "will be run.\n");
570 vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
571 "device.\n");
572 vlog("\n");
573 }
574
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)575 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
576 const void *private_info,
577 size_t cb, void *user_data)
578 {
579 vlog("%s (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
580 }
581
InitCL(cl_device_id device)582 test_status InitCL(cl_device_id device)
583 {
584 int error;
585 uint32_t i;
586 cl_device_type device_type;
587
588 error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
589 &device_type, NULL);
590 if (error)
591 {
592 print_error(error, "Unable to get device type");
593 return TEST_FAIL;
594 }
595
596 gDevice = device;
597
598 // Check extensions
599 if (is_extension_available(gDevice, "cl_khr_fp64"))
600 {
601 gHasDouble ^= 1;
602 #if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
603 cl_device_fp_config doubleCapabilities = 0;
604 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
605 sizeof(doubleCapabilities),
606 &doubleCapabilities, NULL)))
607 {
608 vlog_error("ERROR: Unable to get device "
609 "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
610 error);
611 return TEST_FAIL;
612 }
613
614 if (DOUBLE_REQUIRED_FEATURES
615 != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
616 {
617 std::string list;
618 if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
619 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
620 list += "CL_FP_ROUND_TO_NEAREST, ";
621 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
622 list += "CL_FP_ROUND_TO_ZERO, ";
623 if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
624 list += "CL_FP_ROUND_TO_INF, ";
625 if (0 == (doubleCapabilities & CL_FP_INF_NAN))
626 list += "CL_FP_INF_NAN, ";
627 if (0 == (doubleCapabilities & CL_FP_DENORM))
628 list += "CL_FP_DENORM, ";
629 vlog_error("ERROR: required double features are missing: %s\n",
630 list.c_str());
631
632 return TEST_FAIL;
633 }
634 #else
635 vlog_error("FAIL: device says it supports cl_khr_fp64 but "
636 "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
637 return TEST_FAIL;
638 #endif
639 }
640
641 uint32_t deviceFrequency = 0;
642 size_t configSize = sizeof(deviceFrequency);
643 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
644 configSize, &deviceFrequency, NULL)))
645 deviceFrequency = 0;
646
647 if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
648 sizeof(gFloatCapabilities),
649 &gFloatCapabilities, NULL)))
650 {
651 vlog_error(
652 "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
653 error);
654 return TEST_FAIL;
655 }
656
657 gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
658 NULL, &error);
659 if (NULL == gContext || error)
660 {
661 vlog_error("clCreateContext failed. (%d) \n", error);
662 return TEST_FAIL;
663 }
664
665 gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
666 if (NULL == gQueue || error)
667 {
668 vlog_error("clCreateCommandQueue failed. (%d)\n", error);
669 return TEST_FAIL;
670 }
671
672 // Allocate buffers
673 cl_uint min_alignment = 0;
674 error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
675 sizeof(cl_uint), (void *)&min_alignment, NULL);
676 if (CL_SUCCESS != error)
677 {
678 vlog_error("clGetDeviceInfo failed. (%d)\n", error);
679 return TEST_FAIL;
680 }
681 min_alignment >>= 3; // convert bits to bytes
682
683 gIn = align_malloc(BUFFER_SIZE, min_alignment);
684 if (NULL == gIn) return TEST_FAIL;
685 gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
686 if (NULL == gIn2) return TEST_FAIL;
687 gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
688 if (NULL == gIn3) return TEST_FAIL;
689 gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
690 if (NULL == gOut_Ref) return TEST_FAIL;
691 gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
692 if (NULL == gOut_Ref2) return TEST_FAIL;
693
694 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
695 {
696 gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
697 if (NULL == gOut[i]) return TEST_FAIL;
698 gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
699 if (NULL == gOut2[i]) return TEST_FAIL;
700 }
701
702 cl_mem_flags device_flags = CL_MEM_READ_ONLY;
703 // save a copy on the host device to make this go faster
704 if (CL_DEVICE_TYPE_CPU == device_type)
705 device_flags |= CL_MEM_USE_HOST_PTR;
706 else
707 device_flags |= CL_MEM_COPY_HOST_PTR;
708
709 // setup input buffers
710 gInBuffer =
711 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
712 if (gInBuffer == NULL || error)
713 {
714 vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
715 return TEST_FAIL;
716 }
717
718 gInBuffer2 =
719 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
720 if (gInBuffer2 == NULL || error)
721 {
722 vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
723 return TEST_FAIL;
724 }
725
726 gInBuffer3 =
727 clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
728 if (gInBuffer3 == NULL || error)
729 {
730 vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
731 return TEST_FAIL;
732 }
733
734
735 // setup output buffers
736 device_flags = CL_MEM_READ_WRITE;
737 // save a copy on the host device to make this go faster
738 if (CL_DEVICE_TYPE_CPU == device_type)
739 device_flags |= CL_MEM_USE_HOST_PTR;
740 else
741 device_flags |= CL_MEM_COPY_HOST_PTR;
742 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
743 {
744 gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
745 gOut[i], &error);
746 if (gOutBuffer[i] == NULL || error)
747 {
748 vlog_error("clCreateBuffer failed for output (%d)\n", error);
749 return TEST_FAIL;
750 }
751 gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
752 gOut2[i], &error);
753 if (gOutBuffer2[i] == NULL || error)
754 {
755 vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
756 return TEST_FAIL;
757 }
758 }
759
760 // we are embedded, check current rounding mode
761 if (gIsEmbedded)
762 {
763 gIsInRTZMode = IsInRTZMode();
764 }
765
766 // Check tininess detection
767 IsTininessDetectedBeforeRounding();
768
769 cl_platform_id platform;
770 int err = clGetPlatformIDs(1, &platform, NULL);
771 if (err)
772 {
773 print_error(err, "clGetPlatformIDs failed");
774 return TEST_FAIL;
775 }
776
777 char c[1024];
778 static const char *no_yes[] = { "NO", "YES" };
779 vlog("\nCompute Device info:\n");
780 clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
781 vlog("\tPlatform Version: %s\n", c);
782 clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
783 vlog("\tDevice Name: %s\n", c);
784 clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
785 vlog("\tVendor: %s\n", c);
786 clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
787 vlog("\tDevice Version: %s\n", c);
788 clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
789 vlog("\tCL C Version: %s\n", c);
790 clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
791 vlog("\tDriver Version: %s\n", c);
792 vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
793 vlog("\tSubnormal values supported for floats? %s\n",
794 no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
795 vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
796 no_yes[0
797 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
798 if (gToggleCorrectlyRoundedDivideSqrt)
799 {
800 gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
801 }
802 vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
803 no_yes[0
804 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
805 vlog("\tTesting with FTZ mode ON for floats? %s\n",
806 no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
807 vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
808 vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
809 if (gTestFastRelaxed)
810 {
811 vlog("\tFast relaxed math has derived implementations? %s\n",
812 no_yes[0 != gFastRelaxedDerived]);
813 }
814 vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
815 if (sizeof(long double) == sizeof(double) && gHasDouble)
816 {
817 vlog("\n\t\tWARNING: Host system long double does not have better "
818 "precision than double!\n");
819 vlog("\t\t All double results that do not match the reference "
820 "result have their reported\n");
821 vlog("\t\t error inflated by 0.5 ulps to account for the fact "
822 "that this system\n");
823 vlog("\t\t can not accurately represent the right result to an "
824 "accuracy closer\n");
825 vlog("\t\t than half an ulp. See comments in "
826 "Bruteforce_Ulp_Error_Double() for more details.\n\n");
827 }
828
829 vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
830 if (gIsEmbedded)
831 vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
832 vlog("\tTininess is detected before rounding? %s\n",
833 no_yes[0 != gCheckTininessBeforeRounding]);
834 vlog("\tWorker threads: %d\n", GetThreadCount());
835 vlog("\tTesting vector sizes:");
836 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
837 vlog("\t%d", sizeValues[i]);
838
839 vlog("\n");
840 vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
841 vlog("\n\n");
842
843 // Check to see if we are using single threaded mode on other than a 1.0
844 // device
845 if (getenv("CL_TEST_SINGLE_THREADED"))
846 {
847
848 char device_version[1024] = { 0 };
849 clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
850 device_version, NULL);
851
852 if (strcmp("OpenCL 1.0 ", device_version))
853 {
854 vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
855 "Running single threaded.\n");
856 }
857 }
858
859 return TEST_PASS;
860 }
861
ReleaseCL(void)862 static void ReleaseCL(void)
863 {
864 uint32_t i;
865 clReleaseMemObject(gInBuffer);
866 clReleaseMemObject(gInBuffer2);
867 clReleaseMemObject(gInBuffer3);
868 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
869 {
870 clReleaseMemObject(gOutBuffer[i]);
871 clReleaseMemObject(gOutBuffer2[i]);
872 }
873 clReleaseCommandQueue(gQueue);
874 clReleaseContext(gContext);
875
876 align_free(gIn);
877 align_free(gIn2);
878 align_free(gIn3);
879 align_free(gOut_Ref);
880 align_free(gOut_Ref2);
881
882 for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
883 {
884 align_free(gOut[i]);
885 align_free(gOut2[i]);
886 }
887 }
888
_LogBuildError(cl_program p,int line,const char * file)889 void _LogBuildError(cl_program p, int line, const char *file)
890 {
891 char the_log[2048] = "";
892
893 vlog_error("%s:%d: Build Log:\n", file, line);
894 if (0
895 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
896 sizeof(the_log), the_log, NULL))
897 vlog_error("%s", the_log);
898 else
899 vlog_error("*** Error getting build log for program %p\n", p);
900 }
901
InitILogbConstants(void)902 int InitILogbConstants(void)
903 {
904 int error;
905 const char *kernelSource =
906 R"(__kernel void GetILogBConstants( __global int *out )
907 {
908 out[0] = FP_ILOGB0;
909 out[1] = FP_ILOGBNAN;
910 })";
911
912 clProgramWrapper query;
913 clKernelWrapper kernel;
914 error = create_single_kernel_helper(gContext, &query, &kernel, 1,
915 &kernelSource, "GetILogBConstants");
916 if (error != CL_SUCCESS)
917 {
918 vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
919 "FP_ILOGBNAN for the device. (%d)",
920 error);
921 return error;
922 }
923
924 if ((error =
925 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
926 &gOutBuffer[gMinVectorSizeIndex])))
927 {
928 vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
929 "FP_ILOGBNAN for the device. Err = %d",
930 error);
931 return error;
932 }
933
934 size_t dim = 1;
935 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
936 NULL, NULL)))
937 {
938 vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
939 "FP_ILOGBNAN for the device. Err = %d",
940 error);
941 return error;
942 }
943
944 struct
945 {
946 cl_int ilogb0, ilogbnan;
947 } data;
948 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
949 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
950 NULL)))
951 {
952 vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
953 "device. Err = %d",
954 error);
955 return error;
956 }
957
958 gDeviceILogb0 = data.ilogb0;
959 gDeviceILogbNaN = data.ilogbnan;
960
961 return 0;
962 }
963
IsTininessDetectedBeforeRounding(void)964 int IsTininessDetectedBeforeRounding(void)
965 {
966 int error;
967 const char *kernelSource =
968 R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
969 {
970 volatile float a = 0x1.000002p-126f;
971 volatile float b = 0x1.fffffcp-1f;
972 out[0] = a * b; // product is 0x1.fffffffffff8p-127
973 })";
974
975 clProgramWrapper query;
976 clKernelWrapper kernel;
977 error =
978 create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
979 "IsTininessDetectedBeforeRounding");
980 if (error != CL_SUCCESS)
981 {
982 vlog_error("Error: Unable to create kernel to detect how tininess is "
983 "detected for the device. (%d)",
984 error);
985 return error;
986 }
987
988 if ((error =
989 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
990 &gOutBuffer[gMinVectorSizeIndex])))
991 {
992 vlog_error("Error: Unable to set kernel arg to detect how tininess is "
993 "detected for the device. Err = %d",
994 error);
995 return error;
996 }
997
998 size_t dim = 1;
999 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1000 NULL, NULL)))
1001 {
1002 vlog_error("Error: Unable to execute kernel to detect how tininess is "
1003 "detected for the device. Err = %d",
1004 error);
1005 return error;
1006 }
1007
1008 struct
1009 {
1010 cl_uint f;
1011 } data;
1012 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1013 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1014 NULL)))
1015 {
1016 vlog_error("Error: unable to read result from tininess test from the "
1017 "device. Err = %d",
1018 error);
1019 return error;
1020 }
1021
1022 gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1023
1024 return 0;
1025 }
1026
1027
MakeKernel(const char ** c,cl_uint count,const char * name,cl_kernel * k,cl_program * p,bool relaxedMode)1028 int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
1029 cl_program *p, bool relaxedMode)
1030 {
1031 int error = 0;
1032 char options[200] = "";
1033
1034 if (gForceFTZ)
1035 {
1036 strcat(options, " -cl-denorms-are-zero");
1037 }
1038
1039 if (relaxedMode)
1040 {
1041 strcat(options, " -cl-fast-relaxed-math");
1042 }
1043
1044 error =
1045 create_single_kernel_helper(gContext, p, k, count, c, name, options);
1046 if (error != CL_SUCCESS)
1047 {
1048 vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
1049 return error;
1050 }
1051
1052 return error;
1053 }
1054
MakeKernels(const char ** c,cl_uint count,const char * name,cl_uint kernel_count,cl_kernel * k,cl_program * p,bool relaxedMode)1055 int MakeKernels(const char **c, cl_uint count, const char *name,
1056 cl_uint kernel_count, cl_kernel *k, cl_program *p,
1057 bool relaxedMode)
1058 {
1059 int error = 0;
1060 cl_uint i;
1061 char options[200] = "";
1062
1063 if (gForceFTZ)
1064 {
1065 strcat(options, " -cl-denorms-are-zero ");
1066 }
1067
1068 if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
1069 {
1070 strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
1071 }
1072
1073 if (relaxedMode)
1074 {
1075 strcat(options, " -cl-fast-relaxed-math");
1076 }
1077
1078 error =
1079 create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1080 if (error != CL_SUCCESS)
1081 {
1082 vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
1083 return error;
1084 }
1085
1086
1087 memset(k, 0, kernel_count * sizeof(*k));
1088 for (i = 0; i < kernel_count; i++)
1089 {
1090 k[i] = clCreateKernel(*p, name, &error);
1091 if (NULL == k[i] || error)
1092 {
1093 char buffer[2048] = "";
1094
1095 vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1096 clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
1097 sizeof(buffer), buffer, NULL);
1098 vlog_error("Log: %s\n", buffer);
1099 clReleaseProgram(*p);
1100 return error;
1101 }
1102 }
1103
1104 return error;
1105 }
1106
1107
IsInRTZMode(void)1108 static int IsInRTZMode(void)
1109 {
1110 int error;
1111 const char *kernelSource =
1112 R"(__kernel void GetRoundingMode( __global int *out )
1113 {
1114 volatile float a = 0x1.0p23f;
1115 volatile float b = -0x1.0p23f;
1116 out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
1117 })";
1118
1119 clProgramWrapper query;
1120 clKernelWrapper kernel;
1121 error = create_single_kernel_helper(gContext, &query, &kernel, 1,
1122 &kernelSource, "GetRoundingMode");
1123 if (error != CL_SUCCESS)
1124 {
1125 vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
1126 "device. (%d)",
1127 error);
1128 return error;
1129 }
1130
1131 if ((error =
1132 clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
1133 &gOutBuffer[gMinVectorSizeIndex])))
1134 {
1135 vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
1136 "device. Err = %d",
1137 error);
1138 return error;
1139 }
1140
1141 size_t dim = 1;
1142 if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1143 NULL, NULL)))
1144 {
1145 vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
1146 "device. Err = %d",
1147 error);
1148 return error;
1149 }
1150
1151 struct
1152 {
1153 cl_int isRTZ;
1154 } data;
1155 if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1156 CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1157 NULL)))
1158 {
1159 vlog_error(
1160 "Error: unable to read RTZ mode data from the device. Err = %d",
1161 error);
1162 return error;
1163 }
1164
1165 return data.isRTZ;
1166 }
1167
1168 #pragma mark -
1169
1170 const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1171 const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1172
1173 // TODO: There is another version of Ulp_Error_Double defined in
1174 // test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1175 float Bruteforce_Ulp_Error_Double(double test, long double reference)
1176 {
1177 // Check for Non-power-of-two and NaN
1178
1179 // Note: This function presumes that someone has already tested whether the
1180 // result is correctly, rounded before calling this function. That test:
1181 //
1182 // if( (float) reference == test )
1183 // return 0.0f;
1184 //
1185 // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
1186 // before we get here. Otherwise, we'll return inf ulp error here, for what
1187 // are otherwise correctly rounded results.
1188
1189 // Deal with long double = double
1190 // On most systems long double is a higher precision type than double. They
1191 // provide either a 80-bit or greater floating point type, or they provide a
1192 // head-tail double double format. That is sufficient to represent the
1193 // accuracy of a floating point result to many more bits than double and we
1194 // can calculate sub-ulp errors. This is the standard system for which this
1195 // test suite is designed.
1196 //
1197 // On some systems double and long double are the same thing. Then we run
1198 // into a problem, because our representation of the infinitely precise
1199 // result (passed in as reference above) can be off by as much as a half
1200 // double precision ulp itself. In this case, we inflate the reported error
1201 // by half an ulp to take this into account. A more correct and permanent
1202 // fix would be to undertake refactoring the reference code to return
1203 // results in this format:
1204 //
1205 // typedef struct DoubleReference
1206 // { // true value = correctlyRoundedResult + ulps *
1207 // ulp(correctlyRoundedResult) (infinitely precise)
1208 // double correctlyRoundedResult; // as best we can
1209 // double ulps; // plus a fractional amount to
1210 // account for the difference
1211 // }DoubleReference; // between infinitely
1212 // precise result and correctlyRoundedResult, in units of ulps.
1213 //
1214 // This would provide a useful higher-than-double precision format for
1215 // everyone that we can use, and would solve a few problems with
1216 // representing absolute errors below DBL_MIN and over DBL_MAX for systems
1217 // that use a head to tail double double for long double.
1218
1219 int x;
1220 long double testVal = test;
1221
1222 // First, handle special reference values
1223 if (isinf(reference))
1224 {
1225 if (reference == testVal) return 0.0f;
1226
1227 return INFINITY;
1228 }
1229
1230 if (isnan(reference))
1231 {
1232 if (isnan(testVal)) return 0.0f;
1233
1234 return INFINITY;
1235 }
1236
1237 if (0.0L != reference && 0.5L != frexpl(reference, &x))
1238 { // Non-zero and Non-power of two
1239
1240 // allow correctly rounded results to pass through unmolested. (We might
1241 // add error to it below.) There is something of a performance
1242 // optimization here.
1243 if (testVal == reference) return 0.0f;
1244
1245 // The unbiased exponent of the ulp unit place
1246 int ulp_exp =
1247 DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
1248
1249 // Scale the exponent of the error
1250 float result = (float)scalbnl(testVal - reference, ulp_exp);
1251
1252 // account for rounding error in reference result on systems that do not
1253 // have a higher precision floating point type (see above)
1254 if (sizeof(long double) == sizeof(double))
1255 result += copysignf(0.5f, result);
1256
1257 return result;
1258 }
1259
1260 // reference is a normal power of two or a zero
1261 // The unbiased exponent of the ulp unit place
1262 int ulp_exp =
1263 DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
1264
1265 // allow correctly rounded results to pass through unmolested. (We might add
1266 // error to it below.) There is something of a performance optimization here
1267 // too.
1268 if (testVal == reference) return 0.0f;
1269
1270 // Scale the exponent of the error
1271 float result = (float)scalbnl(testVal - reference, ulp_exp);
1272
1273 // account for rounding error in reference result on systems that do not
1274 // have a higher precision floating point type (see above)
1275 if (sizeof(long double) == sizeof(double))
1276 result += copysignf(0.5f, result);
1277
1278 return result;
1279 }
1280
Abs_Error(float test,double reference)1281 float Abs_Error(float test, double reference)
1282 {
1283 if (isnan(test) && isnan(reference)) return 0.0f;
1284 return fabs((float)(reference - (double)test));
1285 }
1286
RoundUpToNextPowerOfTwo(cl_uint x)1287 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
1288 {
1289 if (0 == (x & (x - 1))) return x;
1290
1291 while (x & (x - 1)) x &= x - 1;
1292
1293 return x + x;
1294 }
1295