1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cinttypes>
23 #include <cstring>
24
25 #define CORRECTLY_ROUNDED 0
26 #define FLUSHED 1
27
28 namespace {
29
BuildKernel(const char * name,int vectorSize,cl_kernel * k,cl_program * p,bool relaxedMode)30 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
31 bool relaxedMode)
32 {
33 auto kernel_name = GetKernelName(vectorSize);
34 auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
35 ParameterType::Float, ParameterType::Float,
36 ParameterType::Float, vectorSize);
37 std::array<const char *, 1> sources{ source.c_str() };
38 return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
39 relaxedMode);
40 }
41
42 struct BuildKernelInfo2
43 {
44 cl_kernel *kernels;
45 Programs &programs;
46 const char *nameInCode;
47 bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
48 };
49
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)50 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
51 {
52 BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
53 cl_uint vectorSize = gMinVectorSizeIndex + job_id;
54 return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
55 &(info->programs[vectorSize]), info->relaxedMode);
56 }
57
58 // A table of more difficult cases to get right
59 const float specialValues[] = {
60 -NAN,
61 -INFINITY,
62 -FLT_MAX,
63 MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40),
64 MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
65 MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
66 MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39),
67 MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63),
68 MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
69 -3.0f,
70 MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23),
71 -2.5f,
72 MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23),
73 -2.0f,
74 MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24),
75 -1.75f,
76 -1.5f,
77 -1.25f,
78 MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),
79 MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24),
80 MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24),
81 -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
82 -1.0f,
83 MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
84 MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150),
85 -FLT_MIN,
86 MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
87 MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150),
88 MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
89 MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150),
90 MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
91 MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
92 MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
93 MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150),
94 MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
95 MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150),
96 -0.0f,
97
98 +NAN,
99 +INFINITY,
100 +FLT_MAX,
101 MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40),
102 MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64),
103 MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
104 MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39),
105 MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63),
106 MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
107 +3.0f,
108 MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23),
109 2.5f,
110 MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),
111 +2.0f,
112 MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24),
113 1.75f,
114 1.5f,
115 1.25f,
116 MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
117 MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24),
118 MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24),
119 +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24),
120 +1.0f,
121 MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
122 MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150),
123 +FLT_MIN,
124 MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
125 MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150),
126 MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
127 MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150),
128 MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
129 MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
130 MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
131 MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150),
132 MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
133 MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150),
134 +0.0f,
135 };
136
137 constexpr size_t specialValuesCount =
138 sizeof(specialValues) / sizeof(specialValues[0]);
139
140 } // anonymous namespace
141
TestFunc_Float_Float_Float_Float(const Func * f,MTdata d,bool relaxedMode)142 int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
143 {
144 int error;
145
146 logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
147
148 Programs programs;
149 cl_kernel kernels[VECTOR_SIZE_COUNT];
150 float maxError = 0.0f;
151 int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
152 float maxErrorVal = 0.0f;
153 float maxErrorVal2 = 0.0f;
154 float maxErrorVal3 = 0.0f;
155 uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
156
157 cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
158
159 float float_ulps;
160 if (gIsEmbedded)
161 float_ulps = f->float_embedded_ulps;
162 else
163 float_ulps = f->float_ulps;
164
165 int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
166
167 // Init the kernels
168 {
169 BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
170 relaxedMode };
171 if ((error = ThreadPool_Do(BuildKernelFn,
172 gMaxVectorSizeIndex - gMinVectorSizeIndex,
173 &build_info)))
174 return error;
175 }
176
177 for (uint64_t i = 0; i < (1ULL << 32); i += step)
178 {
179 // Init input array
180 cl_uint *p = (cl_uint *)gIn;
181 cl_uint *p2 = (cl_uint *)gIn2;
182 cl_uint *p3 = (cl_uint *)gIn3;
183 size_t idx = 0;
184
185 if (i == 0)
186 { // test edge cases
187 float *fp = (float *)gIn;
188 float *fp2 = (float *)gIn2;
189 float *fp3 = (float *)gIn3;
190 uint32_t x, y, z;
191 x = y = z = 0;
192 for (; idx < BUFFER_SIZE / sizeof(float); idx++)
193 {
194 fp[idx] = specialValues[x];
195 fp2[idx] = specialValues[y];
196 fp3[idx] = specialValues[z];
197
198 if (++x >= specialValuesCount)
199 {
200 x = 0;
201 if (++y >= specialValuesCount)
202 {
203 y = 0;
204 if (++z >= specialValuesCount) break;
205 }
206 }
207 }
208 if (idx == BUFFER_SIZE / sizeof(float))
209 vlog_error("Test Error: not all special cases tested!\n");
210 }
211
212 for (; idx < BUFFER_SIZE / sizeof(float); idx++)
213 {
214 p[idx] = genrand_int32(d);
215 p2[idx] = genrand_int32(d);
216 p3[idx] = genrand_int32(d);
217 }
218
219 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
220 BUFFER_SIZE, gIn, 0, NULL, NULL)))
221 {
222 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
223 return error;
224 }
225
226 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
227 BUFFER_SIZE, gIn2, 0, NULL, NULL)))
228 {
229 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
230 return error;
231 }
232
233 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
234 BUFFER_SIZE, gIn3, 0, NULL, NULL)))
235 {
236 vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
237 return error;
238 }
239
240 // write garbage into output arrays
241 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
242 {
243 uint32_t pattern = 0xffffdead;
244 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
245 if ((error =
246 clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
247 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
248 {
249 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
250 error, j);
251 goto exit;
252 }
253 }
254
255 // Run the kernels
256 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
257 {
258 size_t vectorSize = sizeof(cl_float) * sizeValues[j];
259 size_t localCount = (BUFFER_SIZE + vectorSize - 1)
260 / vectorSize; // BUFFER_SIZE / vectorSize rounded up
261 if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
262 &gOutBuffer[j])))
263 {
264 LogBuildError(programs[j]);
265 goto exit;
266 }
267 if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
268 &gInBuffer)))
269 {
270 LogBuildError(programs[j]);
271 goto exit;
272 }
273 if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
274 &gInBuffer2)))
275 {
276 LogBuildError(programs[j]);
277 goto exit;
278 }
279 if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
280 &gInBuffer3)))
281 {
282 LogBuildError(programs[j]);
283 goto exit;
284 }
285
286 if ((error =
287 clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
288 &localCount, NULL, 0, NULL, NULL)))
289 {
290 vlog_error("FAILED -- could not execute kernel\n");
291 goto exit;
292 }
293 }
294
295 // Get that moving
296 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
297
298 // Calculate the correctly rounded reference result
299 float *r = (float *)gOut_Ref;
300 float *s = (float *)gIn;
301 float *s2 = (float *)gIn2;
302 float *s3 = (float *)gIn3;
303 if (skipNanInf)
304 {
305 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
306 {
307 feclearexcept(FE_OVERFLOW);
308 r[j] =
309 (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
310 overflow[j] =
311 FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
312 }
313 }
314 else
315 {
316 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
317 r[j] =
318 (float)f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
319 }
320
321 // Read the data back
322 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
323 {
324 if ((error =
325 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
326 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
327 {
328 vlog_error("ReadArray failed %d\n", error);
329 goto exit;
330 }
331 }
332
333 if (gSkipCorrectnessTesting) break;
334
335 // Verify data
336 uint32_t *t = (uint32_t *)gOut_Ref;
337 for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
338 {
339 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
340 {
341 uint32_t *q = (uint32_t *)(gOut[k]);
342
343 // If we aren't getting the correctly rounded result
344 if (t[j] != q[j])
345 {
346 float err;
347 int fail;
348 float test = ((float *)q)[j];
349 float correct =
350 f->func.f_fma(s[j], s2[j], s3[j], CORRECTLY_ROUNDED);
351
352 // Per section 10 paragraph 6, accept any result if an input
353 // or output is a infinity or NaN or overflow
354 if (skipNanInf)
355 {
356 if (overflow[j] || IsFloatInfinity(correct)
357 || IsFloatNaN(correct) || IsFloatInfinity(s[j])
358 || IsFloatNaN(s[j]) || IsFloatInfinity(s2[j])
359 || IsFloatNaN(s2[j]) || IsFloatInfinity(s3[j])
360 || IsFloatNaN(s3[j]))
361 continue;
362 }
363
364
365 err = Ulp_Error(test, correct);
366 fail = !(fabsf(err) <= float_ulps);
367
368 if (fail && (ftz || relaxedMode))
369 {
370 float correct2, err2;
371
372 // retry per section 6.5.3.2 with flushing on
373 if (0.0f == test
374 && 0.0f
375 == f->func.f_fma(s[j], s2[j], s3[j], FLUSHED))
376 {
377 fail = 0;
378 err = 0.0f;
379 }
380
381 // retry per section 6.5.3.3
382 if (fail && IsFloatSubnormal(s[j]))
383 { // look at me,
384 float err3, correct3;
385
386 if (skipNanInf) feclearexcept(FE_OVERFLOW);
387
388 correct2 = f->func.f_fma(0.0f, s2[j], s3[j],
389 CORRECTLY_ROUNDED);
390 correct3 = f->func.f_fma(-0.0f, s2[j], s3[j],
391 CORRECTLY_ROUNDED);
392
393 if (skipNanInf)
394 {
395 if (fetestexcept(FE_OVERFLOW)) continue;
396
397 // Note: no double rounding here. Reference
398 // functions calculate in single precision.
399 if (IsFloatInfinity(correct2)
400 || IsFloatNaN(correct2)
401 || IsFloatInfinity(correct3)
402 || IsFloatNaN(correct3))
403 continue;
404 }
405
406 err2 = Ulp_Error(test, correct2);
407 err3 = Ulp_Error(test, correct3);
408 fail = fail
409 && ((!(fabsf(err2) <= float_ulps))
410 && (!(fabsf(err3) <= float_ulps)));
411 if (fabsf(err2) < fabsf(err)) err = err2;
412 if (fabsf(err3) < fabsf(err)) err = err3;
413
414 // retry per section 6.5.3.4
415 if (0.0f == test
416 && (0.0f
417 == f->func.f_fma(0.0f, s2[j], s3[j],
418 FLUSHED)
419 || 0.0f
420 == f->func.f_fma(-0.0f, s2[j], s3[j],
421 FLUSHED)))
422 {
423 fail = 0;
424 err = 0.0f;
425 }
426
427 // try with first two args as zero
428 if (IsFloatSubnormal(s2[j]))
429 { // its fun to have fun,
430 double correct4, correct5;
431 float err4, err5;
432
433 if (skipNanInf) feclearexcept(FE_OVERFLOW);
434
435 correct2 = f->func.f_fma(0.0f, 0.0f, s3[j],
436 CORRECTLY_ROUNDED);
437 correct3 = f->func.f_fma(-0.0f, 0.0f, s3[j],
438 CORRECTLY_ROUNDED);
439 correct4 = f->func.f_fma(0.0f, -0.0f, s3[j],
440 CORRECTLY_ROUNDED);
441 correct5 = f->func.f_fma(-0.0f, -0.0f, s3[j],
442 CORRECTLY_ROUNDED);
443
444 // Per section 10 paragraph 6, accept any result
445 // if an input or output is a infinity or NaN or
446 // overflow
447 if (!gInfNanSupport)
448 {
449 if (fetestexcept(FE_OVERFLOW)) continue;
450
451 // Note: no double rounding here. Reference
452 // functions calculate in single precision.
453 if (IsFloatInfinity(correct2)
454 || IsFloatNaN(correct2)
455 || IsFloatInfinity(correct3)
456 || IsFloatNaN(correct3)
457 || IsFloatInfinity(correct4)
458 || IsFloatNaN(correct4)
459 || IsFloatInfinity(correct5)
460 || IsFloatNaN(correct5))
461 continue;
462 }
463
464 err2 = Ulp_Error(test, correct2);
465 err3 = Ulp_Error(test, correct3);
466 err4 = Ulp_Error(test, correct4);
467 err5 = Ulp_Error(test, correct5);
468 fail = fail
469 && ((!(fabsf(err2) <= float_ulps))
470 && (!(fabsf(err3) <= float_ulps))
471 && (!(fabsf(err4) <= float_ulps))
472 && (!(fabsf(err5) <= float_ulps)));
473 if (fabsf(err2) < fabsf(err)) err = err2;
474 if (fabsf(err3) < fabsf(err)) err = err3;
475 if (fabsf(err4) < fabsf(err)) err = err4;
476 if (fabsf(err5) < fabsf(err)) err = err5;
477
478 // retry per section 6.5.3.4
479 if (0.0f == test
480 && (0.0f
481 == f->func.f_fma(0.0f, 0.0f, s3[j],
482 FLUSHED)
483 || 0.0f
484 == f->func.f_fma(-0.0f, 0.0f, s3[j],
485 FLUSHED)
486 || 0.0f
487 == f->func.f_fma(0.0f, -0.0f, s3[j],
488 FLUSHED)
489 || 0.0f
490 == f->func.f_fma(-0.0f, -0.0f,
491 s3[j], FLUSHED)))
492 {
493 fail = 0;
494 err = 0.0f;
495 }
496
497 if (IsFloatSubnormal(s3[j]))
498 {
499 if (test == 0.0f) // 0*0+0 is 0
500 {
501 fail = 0;
502 err = 0.0f;
503 }
504 }
505 }
506 else if (IsFloatSubnormal(s3[j]))
507 {
508 double correct4, correct5;
509 float err4, err5;
510
511 if (skipNanInf) feclearexcept(FE_OVERFLOW);
512
513 correct2 = f->func.f_fma(0.0f, s2[j], 0.0f,
514 CORRECTLY_ROUNDED);
515 correct3 = f->func.f_fma(-0.0f, s2[j], 0.0f,
516 CORRECTLY_ROUNDED);
517 correct4 = f->func.f_fma(0.0f, s2[j], -0.0f,
518 CORRECTLY_ROUNDED);
519 correct5 = f->func.f_fma(-0.0f, s2[j], -0.0f,
520 CORRECTLY_ROUNDED);
521
522 // Per section 10 paragraph 6, accept any result
523 // if an input or output is a infinity or NaN or
524 // overflow
525 if (!gInfNanSupport)
526 {
527 if (fetestexcept(FE_OVERFLOW)) continue;
528
529 // Note: no double rounding here. Reference
530 // functions calculate in single precision.
531 if (IsFloatInfinity(correct2)
532 || IsFloatNaN(correct2)
533 || IsFloatInfinity(correct3)
534 || IsFloatNaN(correct3)
535 || IsFloatInfinity(correct4)
536 || IsFloatNaN(correct4)
537 || IsFloatInfinity(correct5)
538 || IsFloatNaN(correct5))
539 continue;
540 }
541
542 err2 = Ulp_Error(test, correct2);
543 err3 = Ulp_Error(test, correct3);
544 err4 = Ulp_Error(test, correct4);
545 err5 = Ulp_Error(test, correct5);
546 fail = fail
547 && ((!(fabsf(err2) <= float_ulps))
548 && (!(fabsf(err3) <= float_ulps))
549 && (!(fabsf(err4) <= float_ulps))
550 && (!(fabsf(err5) <= float_ulps)));
551 if (fabsf(err2) < fabsf(err)) err = err2;
552 if (fabsf(err3) < fabsf(err)) err = err3;
553 if (fabsf(err4) < fabsf(err)) err = err4;
554 if (fabsf(err5) < fabsf(err)) err = err5;
555
556 // retry per section 6.5.3.4
557 if (0.0f == test
558 && (0.0f
559 == f->func.f_fma(0.0f, s2[j], 0.0f,
560 FLUSHED)
561 || 0.0f
562 == f->func.f_fma(-0.0f, s2[j], 0.0f,
563 FLUSHED)
564 || 0.0f
565 == f->func.f_fma(0.0f, s2[j], -0.0f,
566 FLUSHED)
567 || 0.0f
568 == f->func.f_fma(-0.0f, s2[j],
569 -0.0f, FLUSHED)))
570 {
571 fail = 0;
572 err = 0.0f;
573 }
574 }
575 }
576 else if (fail && IsFloatSubnormal(s2[j]))
577 {
578 double correct2, correct3;
579 float err2, err3;
580
581 if (skipNanInf) feclearexcept(FE_OVERFLOW);
582
583 correct2 = f->func.f_fma(s[j], 0.0f, s3[j],
584 CORRECTLY_ROUNDED);
585 correct3 = f->func.f_fma(s[j], -0.0f, s3[j],
586 CORRECTLY_ROUNDED);
587
588 if (skipNanInf)
589 {
590 if (fetestexcept(FE_OVERFLOW)) continue;
591
592 // Note: no double rounding here. Reference
593 // functions calculate in single precision.
594 if (IsFloatInfinity(correct2)
595 || IsFloatNaN(correct2)
596 || IsFloatInfinity(correct3)
597 || IsFloatNaN(correct3))
598 continue;
599 }
600
601 err2 = Ulp_Error(test, correct2);
602 err3 = Ulp_Error(test, correct3);
603 fail = fail
604 && ((!(fabsf(err2) <= float_ulps))
605 && (!(fabsf(err3) <= float_ulps)));
606 if (fabsf(err2) < fabsf(err)) err = err2;
607 if (fabsf(err3) < fabsf(err)) err = err3;
608
609 // retry per section 6.5.3.4
610 if (0.0f == test
611 && (0.0f
612 == f->func.f_fma(s[j], 0.0f, s3[j],
613 FLUSHED)
614 || 0.0f
615 == f->func.f_fma(s[j], -0.0f, s3[j],
616 FLUSHED)))
617 {
618 fail = 0;
619 err = 0.0f;
620 }
621
622 // try with second two args as zero
623 if (IsFloatSubnormal(s3[j]))
624 {
625 double correct4, correct5;
626 float err4, err5;
627
628 if (skipNanInf) feclearexcept(FE_OVERFLOW);
629
630 correct2 = f->func.f_fma(s[j], 0.0f, 0.0f,
631 CORRECTLY_ROUNDED);
632 correct3 = f->func.f_fma(s[j], -0.0f, 0.0f,
633 CORRECTLY_ROUNDED);
634 correct4 = f->func.f_fma(s[j], 0.0f, -0.0f,
635 CORRECTLY_ROUNDED);
636 correct5 = f->func.f_fma(s[j], -0.0f, -0.0f,
637 CORRECTLY_ROUNDED);
638
639 // Per section 10 paragraph 6, accept any result
640 // if an input or output is a infinity or NaN or
641 // overflow
642 if (!gInfNanSupport)
643 {
644 if (fetestexcept(FE_OVERFLOW)) continue;
645
646 // Note: no double rounding here. Reference
647 // functions calculate in single precision.
648 if (IsFloatInfinity(correct2)
649 || IsFloatNaN(correct2)
650 || IsFloatInfinity(correct3)
651 || IsFloatNaN(correct3)
652 || IsFloatInfinity(correct4)
653 || IsFloatNaN(correct4)
654 || IsFloatInfinity(correct5)
655 || IsFloatNaN(correct5))
656 continue;
657 }
658
659 err2 = Ulp_Error(test, correct2);
660 err3 = Ulp_Error(test, correct3);
661 err4 = Ulp_Error(test, correct4);
662 err5 = Ulp_Error(test, correct5);
663 fail = fail
664 && ((!(fabsf(err2) <= float_ulps))
665 && (!(fabsf(err3) <= float_ulps))
666 && (!(fabsf(err4) <= float_ulps))
667 && (!(fabsf(err5) <= float_ulps)));
668 if (fabsf(err2) < fabsf(err)) err = err2;
669 if (fabsf(err3) < fabsf(err)) err = err3;
670 if (fabsf(err4) < fabsf(err)) err = err4;
671 if (fabsf(err5) < fabsf(err)) err = err5;
672
673 // retry per section 6.5.3.4
674 if (0.0f == test
675 && (0.0f
676 == f->func.f_fma(s[j], 0.0f, 0.0f,
677 FLUSHED)
678 || 0.0f
679 == f->func.f_fma(s[j], -0.0f, 0.0f,
680 FLUSHED)
681 || 0.0f
682 == f->func.f_fma(s[j], 0.0f, -0.0f,
683 FLUSHED)
684 || 0.0f
685 == f->func.f_fma(s[j], -0.0f, -0.0f,
686 FLUSHED)))
687 {
688 fail = 0;
689 err = 0.0f;
690 }
691 }
692 }
693 else if (fail && IsFloatSubnormal(s3[j]))
694 {
695 double correct2, correct3;
696 float err2, err3;
697
698 if (skipNanInf) feclearexcept(FE_OVERFLOW);
699
700 correct2 = f->func.f_fma(s[j], s2[j], 0.0f,
701 CORRECTLY_ROUNDED);
702 correct3 = f->func.f_fma(s[j], s2[j], -0.0f,
703 CORRECTLY_ROUNDED);
704
705 if (skipNanInf)
706 {
707 if (fetestexcept(FE_OVERFLOW)) continue;
708
709 // Note: no double rounding here. Reference
710 // functions calculate in single precision.
711 if (IsFloatInfinity(correct2)
712 || IsFloatNaN(correct2)
713 || IsFloatInfinity(correct3)
714 || IsFloatNaN(correct3))
715 continue;
716 }
717
718 err2 = Ulp_Error(test, correct2);
719 err3 = Ulp_Error(test, correct3);
720 fail = fail
721 && ((!(fabsf(err2) <= float_ulps))
722 && (!(fabsf(err3) <= float_ulps)));
723 if (fabsf(err2) < fabsf(err)) err = err2;
724 if (fabsf(err3) < fabsf(err)) err = err3;
725
726 // retry per section 6.5.3.4
727 if (0.0f == test
728 && (0.0f
729 == f->func.f_fma(s[j], s2[j], 0.0f,
730 FLUSHED)
731 || 0.0f
732 == f->func.f_fma(s[j], s2[j], -0.0f,
733 FLUSHED)))
734 {
735 fail = 0;
736 err = 0.0f;
737 }
738 }
739 }
740
741 if (fabsf(err) > maxError)
742 {
743 maxError = fabsf(err);
744 maxErrorVal = s[j];
745 maxErrorVal2 = s2[j];
746 maxErrorVal3 = s3[j];
747 }
748
749 if (fail)
750 {
751 vlog_error(
752 "\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
753 "({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n",
754 f->name, sizeNames[k], err, s[j], s2[j], s3[j],
755 ((cl_uint *)s)[j], ((cl_uint *)s2)[j],
756 ((cl_uint *)s3)[j], ((float *)gOut_Ref)[j], test);
757 error = -1;
758 goto exit;
759 }
760 }
761 }
762 }
763
764 if (0 == (i & 0x0fffffff))
765 {
766 if (gVerboseBruteForce)
767 {
768 vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
769 i, step, BUFFER_SIZE);
770 }
771 else
772 {
773 vlog(".");
774 }
775 fflush(stdout);
776 }
777 }
778
779 if (!gSkipCorrectnessTesting)
780 {
781 if (gWimpyMode)
782 vlog("Wimp pass");
783 else
784 vlog("passed");
785
786 vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
787 maxErrorVal3);
788 }
789
790 vlog("\n");
791
792 exit:
793 // Release
794 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
795 {
796 clReleaseKernel(kernels[k]);
797 }
798
799 return error;
800 }
801