1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cinttypes>
23 #include <cstring>
24
25 #define CORRECTLY_ROUNDED 0
26 #define FLUSHED 1
27
28 namespace {
29
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)30 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
31 {
32 BuildKernelInfo &info = *(BuildKernelInfo *)p;
33 auto generator = [](const std::string &kernel_name, const char *builtin,
34 cl_uint vector_size_index) {
35 return GetTernaryKernel(kernel_name, builtin, ParameterType::Double,
36 ParameterType::Double, ParameterType::Double,
37 ParameterType::Double, vector_size_index);
38 };
39 return BuildKernels(info, job_id, generator);
40 }
41
42 // A table of more difficult cases to get right
43 const double specialValues[] = {
44 -NAN,
45 -INFINITY,
46 -DBL_MAX,
47 MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
48 MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
49 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
50 MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
51 MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
52 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
53 -3.0,
54 MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
55 -2.5,
56 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
57 -2.0,
58 MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
59 -1.5,
60 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
61 MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
62 -1.0,
63 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
64 MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
65 -DBL_MIN,
66 MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
67 MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
68 MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
69 MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
70 MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
71 MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
72 MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
73 MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
74 MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
75 -0.0,
76
77 +NAN,
78 +INFINITY,
79 +DBL_MAX,
80 MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
81 MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
82 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
83 MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
84 MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
85 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
86 +3.0,
87 MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
88 +2.5,
89 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
90 +2.0,
91 MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
92 +1.5,
93 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
94 MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
95 +1.0,
96 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
97 MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
98 +DBL_MIN,
99 MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
100 MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
101 MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
102 MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
103 MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
104 MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
105 MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
106 MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
107 MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
108 +0.0,
109 };
110
111 constexpr size_t specialValuesCount =
112 sizeof(specialValues) / sizeof(specialValues[0]);
113
114 } // anonymous namespace
115
TestFunc_Double_Double_Double_Double(const Func * f,MTdata d,bool relaxedMode)116 int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
117 bool relaxedMode)
118 {
119 int error;
120 Programs programs;
121 const unsigned thread_id = 0; // Test is currently not multithreaded.
122 KernelMatrix kernels;
123 float maxError = 0.0f;
124 int ftz = f->ftz || gForceFTZ;
125 double maxErrorVal = 0.0f;
126 double maxErrorVal2 = 0.0f;
127 double maxErrorVal3 = 0.0f;
128 uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
129
130 logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
131
132 Force64BitFPUPrecision();
133
134 // Init the kernels
135 BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
136 relaxedMode };
137 if ((error = ThreadPool_Do(BuildKernelFn,
138 gMaxVectorSizeIndex - gMinVectorSizeIndex,
139 &build_info)))
140 return error;
141
142 for (uint64_t i = 0; i < (1ULL << 32); i += step)
143 {
144 // Init input array
145 double *p = (double *)gIn;
146 double *p2 = (double *)gIn2;
147 double *p3 = (double *)gIn3;
148 size_t idx = 0;
149
150 if (i == 0)
151 { // test edge cases
152 uint32_t x, y, z;
153 x = y = z = 0;
154 for (; idx < BUFFER_SIZE / sizeof(double); idx++)
155 {
156 p[idx] = specialValues[x];
157 p2[idx] = specialValues[y];
158 p3[idx] = specialValues[z];
159 if (++x >= specialValuesCount)
160 {
161 x = 0;
162 if (++y >= specialValuesCount)
163 {
164 y = 0;
165 if (++z >= specialValuesCount) break;
166 }
167 }
168 }
169 if (idx == BUFFER_SIZE / sizeof(double))
170 vlog_error("Test Error: not all special cases tested!\n");
171 }
172
173 for (; idx < BUFFER_SIZE / sizeof(double); idx++)
174 {
175 p[idx] = DoubleFromUInt32(genrand_int32(d));
176 p2[idx] = DoubleFromUInt32(genrand_int32(d));
177 p3[idx] = DoubleFromUInt32(genrand_int32(d));
178 }
179
180 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
181 BUFFER_SIZE, gIn, 0, NULL, NULL)))
182 {
183 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
184 return error;
185 }
186
187 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
188 BUFFER_SIZE, gIn2, 0, NULL, NULL)))
189 {
190 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
191 return error;
192 }
193
194 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
195 BUFFER_SIZE, gIn3, 0, NULL, NULL)))
196 {
197 vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
198 return error;
199 }
200
201 // Write garbage into output arrays
202 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
203 {
204 uint32_t pattern = 0xffffdead;
205 if (gHostFill)
206 {
207 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
208 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
209 CL_FALSE, 0, BUFFER_SIZE,
210 gOut[j], 0, NULL, NULL)))
211 {
212 vlog_error(
213 "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
214 error, j);
215 return error;
216 }
217 }
218 else
219 {
220 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
221 &pattern, sizeof(pattern), 0,
222 BUFFER_SIZE, 0, NULL, NULL)))
223 {
224 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
225 error);
226 return error;
227 }
228 }
229 }
230
231 // Run the kernels
232 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
233 {
234 size_t vectorSize = sizeof(cl_double) * sizeValues[j];
235 size_t localCount = (BUFFER_SIZE + vectorSize - 1)
236 / vectorSize; // BUFFER_SIZE / vectorSize rounded up
237 if ((error = clSetKernelArg(kernels[j][thread_id], 0,
238 sizeof(gOutBuffer[j]), &gOutBuffer[j])))
239 {
240 LogBuildError(programs[j]);
241 return error;
242 }
243 if ((error = clSetKernelArg(kernels[j][thread_id], 1,
244 sizeof(gInBuffer), &gInBuffer)))
245 {
246 LogBuildError(programs[j]);
247 return error;
248 }
249 if ((error = clSetKernelArg(kernels[j][thread_id], 2,
250 sizeof(gInBuffer2), &gInBuffer2)))
251 {
252 LogBuildError(programs[j]);
253 return error;
254 }
255 if ((error = clSetKernelArg(kernels[j][thread_id], 3,
256 sizeof(gInBuffer3), &gInBuffer3)))
257 {
258 LogBuildError(programs[j]);
259 return error;
260 }
261
262 if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
263 1, NULL, &localCount, NULL, 0,
264 NULL, NULL)))
265 {
266 vlog_error("FAILED -- could not execute kernel\n");
267 return error;
268 }
269 }
270
271 // Get that moving
272 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
273
274 // Calculate the correctly rounded reference result
275 double *r = (double *)gOut_Ref;
276 double *s = (double *)gIn;
277 double *s2 = (double *)gIn2;
278 double *s3 = (double *)gIn3;
279 for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
280 r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
281
282 // Read the data back
283 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
284 {
285 if ((error =
286 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
287 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
288 {
289 vlog_error("ReadArray failed %d\n", error);
290 return error;
291 }
292 }
293
294 if (gSkipCorrectnessTesting) break;
295
296 // Verify data
297 uint64_t *t = (uint64_t *)gOut_Ref;
298 for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
299 {
300 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
301 {
302 uint64_t *q = (uint64_t *)(gOut[k]);
303
304 // If we aren't getting the correctly rounded result
305 if (t[j] != q[j])
306 {
307 double test = ((double *)q)[j];
308 long double correct = f->dfunc.f_fff(s[j], s2[j], s3[j]);
309 float err = Bruteforce_Ulp_Error_Double(test, correct);
310 int fail = !(fabsf(err) <= f->double_ulps);
311
312 if (fail && (ftz || relaxedMode))
313 {
314 // retry per section 6.5.3.2
315 if (IsDoubleSubnormal(correct))
316 { // look at me,
317 fail = fail && (test != 0.0f);
318 if (!fail) err = 0.0f;
319 }
320
321 // retry per section 6.5.3.3
322 if (fail && IsDoubleSubnormal(s[j]))
323 { // look at me,
324 long double correct2 =
325 f->dfunc.f_fff(0.0, s2[j], s3[j]);
326 long double correct3 =
327 f->dfunc.f_fff(-0.0, s2[j], s3[j]);
328 float err2 =
329 Bruteforce_Ulp_Error_Double(test, correct2);
330 float err3 =
331 Bruteforce_Ulp_Error_Double(test, correct3);
332 fail = fail
333 && ((!(fabsf(err2) <= f->double_ulps))
334 && (!(fabsf(err3) <= f->double_ulps)));
335 if (fabsf(err2) < fabsf(err)) err = err2;
336 if (fabsf(err3) < fabsf(err)) err = err3;
337
338 // retry per section 6.5.3.4
339 if (IsDoubleResultSubnormal(correct2,
340 f->double_ulps)
341 || IsDoubleResultSubnormal(correct3,
342 f->double_ulps))
343 { // look at me now,
344 fail = fail && (test != 0.0f);
345 if (!fail) err = 0.0f;
346 }
347
348 // try with first two args as zero
349 if (IsDoubleSubnormal(s2[j]))
350 { // its fun to have fun,
351 correct2 = f->dfunc.f_fff(0.0, 0.0, s3[j]);
352 correct3 = f->dfunc.f_fff(-0.0, 0.0, s3[j]);
353 long double correct4 =
354 f->dfunc.f_fff(0.0, -0.0, s3[j]);
355 long double correct5 =
356 f->dfunc.f_fff(-0.0, -0.0, s3[j]);
357 err2 =
358 Bruteforce_Ulp_Error_Double(test, correct2);
359 err3 =
360 Bruteforce_Ulp_Error_Double(test, correct3);
361 float err4 =
362 Bruteforce_Ulp_Error_Double(test, correct4);
363 float err5 =
364 Bruteforce_Ulp_Error_Double(test, correct5);
365 fail = fail
366 && ((!(fabsf(err2) <= f->double_ulps))
367 && (!(fabsf(err3) <= f->double_ulps))
368 && (!(fabsf(err4) <= f->double_ulps))
369 && (!(fabsf(err5) <= f->double_ulps)));
370 if (fabsf(err2) < fabsf(err)) err = err2;
371 if (fabsf(err3) < fabsf(err)) err = err3;
372 if (fabsf(err4) < fabsf(err)) err = err4;
373 if (fabsf(err5) < fabsf(err)) err = err5;
374
375 // retry per section 6.5.3.4
376 if (IsDoubleResultSubnormal(correct2,
377 f->double_ulps)
378 || IsDoubleResultSubnormal(correct3,
379 f->double_ulps)
380 || IsDoubleResultSubnormal(correct4,
381 f->double_ulps)
382 || IsDoubleResultSubnormal(correct5,
383 f->double_ulps))
384 {
385 fail = fail && (test != 0.0f);
386 if (!fail) err = 0.0f;
387 }
388
389 if (IsDoubleSubnormal(s3[j]))
390 { // but you have to know how!
391 correct2 = f->dfunc.f_fff(0.0, 0.0, 0.0f);
392 correct3 = f->dfunc.f_fff(-0.0, 0.0, 0.0f);
393 correct4 = f->dfunc.f_fff(0.0, -0.0, 0.0f);
394 correct5 = f->dfunc.f_fff(-0.0, -0.0, 0.0f);
395 long double correct6 =
396 f->dfunc.f_fff(0.0, 0.0, -0.0f);
397 long double correct7 =
398 f->dfunc.f_fff(-0.0, 0.0, -0.0f);
399 long double correct8 =
400 f->dfunc.f_fff(0.0, -0.0, -0.0f);
401 long double correct9 =
402 f->dfunc.f_fff(-0.0, -0.0, -0.0f);
403 err2 = Bruteforce_Ulp_Error_Double(
404 test, correct2);
405 err3 = Bruteforce_Ulp_Error_Double(
406 test, correct3);
407 err4 = Bruteforce_Ulp_Error_Double(
408 test, correct4);
409 err5 = Bruteforce_Ulp_Error_Double(
410 test, correct5);
411 float err6 = Bruteforce_Ulp_Error_Double(
412 test, correct6);
413 float err7 = Bruteforce_Ulp_Error_Double(
414 test, correct7);
415 float err8 = Bruteforce_Ulp_Error_Double(
416 test, correct8);
417 float err9 = Bruteforce_Ulp_Error_Double(
418 test, correct9);
419 fail = fail
420 && ((!(fabsf(err2) <= f->double_ulps))
421 && (!(fabsf(err3)
422 <= f->double_ulps))
423 && (!(fabsf(err4)
424 <= f->double_ulps))
425 && (!(fabsf(err5)
426 <= f->double_ulps))
427 && (!(fabsf(err5)
428 <= f->double_ulps))
429 && (!(fabsf(err6)
430 <= f->double_ulps))
431 && (!(fabsf(err7)
432 <= f->double_ulps))
433 && (!(fabsf(err8)
434 <= f->double_ulps)));
435 if (fabsf(err2) < fabsf(err)) err = err2;
436 if (fabsf(err3) < fabsf(err)) err = err3;
437 if (fabsf(err4) < fabsf(err)) err = err4;
438 if (fabsf(err5) < fabsf(err)) err = err5;
439 if (fabsf(err6) < fabsf(err)) err = err6;
440 if (fabsf(err7) < fabsf(err)) err = err7;
441 if (fabsf(err8) < fabsf(err)) err = err8;
442 if (fabsf(err9) < fabsf(err)) err = err9;
443
444 // retry per section 6.5.3.4
445 if (IsDoubleResultSubnormal(correct2,
446 f->double_ulps)
447 || IsDoubleResultSubnormal(
448 correct3, f->double_ulps)
449 || IsDoubleResultSubnormal(
450 correct4, f->double_ulps)
451 || IsDoubleResultSubnormal(
452 correct5, f->double_ulps)
453 || IsDoubleResultSubnormal(
454 correct6, f->double_ulps)
455 || IsDoubleResultSubnormal(
456 correct7, f->double_ulps)
457 || IsDoubleResultSubnormal(
458 correct8, f->double_ulps)
459 || IsDoubleResultSubnormal(
460 correct9, f->double_ulps))
461 {
462 fail = fail && (test != 0.0f);
463 if (!fail) err = 0.0f;
464 }
465 }
466 }
467 else if (IsDoubleSubnormal(s3[j]))
468 {
469 correct2 = f->dfunc.f_fff(0.0, s2[j], 0.0);
470 correct3 = f->dfunc.f_fff(-0.0, s2[j], 0.0);
471 long double correct4 =
472 f->dfunc.f_fff(0.0, s2[j], -0.0);
473 long double correct5 =
474 f->dfunc.f_fff(-0.0, s2[j], -0.0);
475 err2 =
476 Bruteforce_Ulp_Error_Double(test, correct2);
477 err3 =
478 Bruteforce_Ulp_Error_Double(test, correct3);
479 float err4 =
480 Bruteforce_Ulp_Error_Double(test, correct4);
481 float err5 =
482 Bruteforce_Ulp_Error_Double(test, correct5);
483 fail = fail
484 && ((!(fabsf(err2) <= f->double_ulps))
485 && (!(fabsf(err3) <= f->double_ulps))
486 && (!(fabsf(err4) <= f->double_ulps))
487 && (!(fabsf(err5) <= f->double_ulps)));
488 if (fabsf(err2) < fabsf(err)) err = err2;
489 if (fabsf(err3) < fabsf(err)) err = err3;
490 if (fabsf(err4) < fabsf(err)) err = err4;
491 if (fabsf(err5) < fabsf(err)) err = err5;
492
493 // retry per section 6.5.3.4
494 if (IsDoubleResultSubnormal(correct2,
495 f->double_ulps)
496 || IsDoubleResultSubnormal(correct3,
497 f->double_ulps)
498 || IsDoubleResultSubnormal(correct4,
499 f->double_ulps)
500 || IsDoubleResultSubnormal(correct5,
501 f->double_ulps))
502 {
503 fail = fail && (test != 0.0f);
504 if (!fail) err = 0.0f;
505 }
506 }
507 }
508 else if (fail && IsDoubleSubnormal(s2[j]))
509 {
510 long double correct2 =
511 f->dfunc.f_fff(s[j], 0.0, s3[j]);
512 long double correct3 =
513 f->dfunc.f_fff(s[j], -0.0, s3[j]);
514 float err2 =
515 Bruteforce_Ulp_Error_Double(test, correct2);
516 float err3 =
517 Bruteforce_Ulp_Error_Double(test, correct3);
518 fail = fail
519 && ((!(fabsf(err2) <= f->double_ulps))
520 && (!(fabsf(err3) <= f->double_ulps)));
521 if (fabsf(err2) < fabsf(err)) err = err2;
522 if (fabsf(err3) < fabsf(err)) err = err3;
523
524 // retry per section 6.5.3.4
525 if (IsDoubleResultSubnormal(correct2,
526 f->double_ulps)
527 || IsDoubleResultSubnormal(correct3,
528 f->double_ulps))
529 {
530 fail = fail && (test != 0.0f);
531 if (!fail) err = 0.0f;
532 }
533
534 // try with second two args as zero
535 if (IsDoubleSubnormal(s3[j]))
536 {
537 correct2 = f->dfunc.f_fff(s[j], 0.0, 0.0);
538 correct3 = f->dfunc.f_fff(s[j], -0.0, 0.0);
539 long double correct4 =
540 f->dfunc.f_fff(s[j], 0.0, -0.0);
541 long double correct5 =
542 f->dfunc.f_fff(s[j], -0.0, -0.0);
543 err2 =
544 Bruteforce_Ulp_Error_Double(test, correct2);
545 err3 =
546 Bruteforce_Ulp_Error_Double(test, correct3);
547 float err4 =
548 Bruteforce_Ulp_Error_Double(test, correct4);
549 float err5 =
550 Bruteforce_Ulp_Error_Double(test, correct5);
551 fail = fail
552 && ((!(fabsf(err2) <= f->double_ulps))
553 && (!(fabsf(err3) <= f->double_ulps))
554 && (!(fabsf(err4) <= f->double_ulps))
555 && (!(fabsf(err5) <= f->double_ulps)));
556 if (fabsf(err2) < fabsf(err)) err = err2;
557 if (fabsf(err3) < fabsf(err)) err = err3;
558 if (fabsf(err4) < fabsf(err)) err = err4;
559 if (fabsf(err5) < fabsf(err)) err = err5;
560
561 // retry per section 6.5.3.4
562 if (IsDoubleResultSubnormal(correct2,
563 f->double_ulps)
564 || IsDoubleResultSubnormal(correct3,
565 f->double_ulps)
566 || IsDoubleResultSubnormal(correct4,
567 f->double_ulps)
568 || IsDoubleResultSubnormal(correct5,
569 f->double_ulps))
570 {
571 fail = fail && (test != 0.0f);
572 if (!fail) err = 0.0f;
573 }
574 }
575 }
576 else if (fail && IsDoubleSubnormal(s3[j]))
577 {
578 long double correct2 =
579 f->dfunc.f_fff(s[j], s2[j], 0.0);
580 long double correct3 =
581 f->dfunc.f_fff(s[j], s2[j], -0.0);
582 float err2 =
583 Bruteforce_Ulp_Error_Double(test, correct2);
584 float err3 =
585 Bruteforce_Ulp_Error_Double(test, correct3);
586 fail = fail
587 && ((!(fabsf(err2) <= f->double_ulps))
588 && (!(fabsf(err3) <= f->double_ulps)));
589 if (fabsf(err2) < fabsf(err)) err = err2;
590 if (fabsf(err3) < fabsf(err)) err = err3;
591
592 // retry per section 6.5.3.4
593 if (IsDoubleResultSubnormal(correct2,
594 f->double_ulps)
595 || IsDoubleResultSubnormal(correct3,
596 f->double_ulps))
597 {
598 fail = fail && (test != 0.0f);
599 if (!fail) err = 0.0f;
600 }
601 }
602 }
603
604 if (fabsf(err) > maxError)
605 {
606 maxError = fabsf(err);
607 maxErrorVal = s[j];
608 maxErrorVal2 = s2[j];
609 maxErrorVal3 = s3[j];
610 }
611
612 if (fail)
613 {
614 vlog_error("\nERROR: %sD%s: %f ulp error at {%.13la, "
615 "%.13la, %.13la}: *%.13la vs. %.13la\n",
616 f->name, sizeNames[k], err, s[j], s2[j],
617 s3[j], ((double *)gOut_Ref)[j], test);
618 return -1;
619 }
620 }
621 }
622 }
623
624 if (0 == (i & 0x0fffffff))
625 {
626 if (gVerboseBruteForce)
627 {
628 vlog("base:%14" PRIu64 " step:%10" PRIu64
629 " bufferSize:%10d \n",
630 i, step, BUFFER_SIZE);
631 }
632 else
633 {
634 vlog(".");
635 }
636 fflush(stdout);
637 }
638 }
639
640 if (!gSkipCorrectnessTesting)
641 {
642 if (gWimpyMode)
643 vlog("Wimp pass");
644 else
645 vlog("passed");
646
647 vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
648 maxErrorVal3);
649 }
650
651 vlog("\n");
652
653 return CL_SUCCESS;
654 }
655