• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21 
22 #include <cinttypes>
23 #include <cstring>
24 
25 #define CORRECTLY_ROUNDED 0
26 #define FLUSHED 1
27 
28 namespace {
29 
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)30 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
31 {
32     BuildKernelInfo &info = *(BuildKernelInfo *)p;
33     auto generator = [](const std::string &kernel_name, const char *builtin,
34                         cl_uint vector_size_index) {
35         return GetTernaryKernel(kernel_name, builtin, ParameterType::Double,
36                                 ParameterType::Double, ParameterType::Double,
37                                 ParameterType::Double, vector_size_index);
38     };
39     return BuildKernels(info, job_id, generator);
40 }
41 
42 // A table of more difficult cases to get right
43 const double specialValues[] = {
44     -NAN,
45     -INFINITY,
46     -DBL_MAX,
47     MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
48     MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
49     MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
50     MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
51     MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
52     MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
53     -3.0,
54     MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
55     -2.5,
56     MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
57     -2.0,
58     MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
59     -1.5,
60     MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
61     MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
62     -1.0,
63     MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
64     MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
65     -DBL_MIN,
66     MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
67     MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
68     MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
69     MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
70     MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
71     MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
72     MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
73     MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
74     MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
75     -0.0,
76 
77     +NAN,
78     +INFINITY,
79     +DBL_MAX,
80     MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
81     MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
82     MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
83     MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
84     MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
85     MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
86     +3.0,
87     MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
88     +2.5,
89     MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
90     +2.0,
91     MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
92     +1.5,
93     MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
94     MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
95     +1.0,
96     MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
97     MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
98     +DBL_MIN,
99     MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
100     MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
101     MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
102     MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
103     MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
104     MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
105     MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
106     MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
107     MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
108     +0.0,
109 };
110 
111 constexpr size_t specialValuesCount =
112     sizeof(specialValues) / sizeof(specialValues[0]);
113 
114 } // anonymous namespace
115 
TestFunc_Double_Double_Double_Double(const Func * f,MTdata d,bool relaxedMode)116 int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
117                                          bool relaxedMode)
118 {
119     int error;
120     Programs programs;
121     const unsigned thread_id = 0; // Test is currently not multithreaded.
122     KernelMatrix kernels;
123     float maxError = 0.0f;
124     int ftz = f->ftz || gForceFTZ;
125     double maxErrorVal = 0.0f;
126     double maxErrorVal2 = 0.0f;
127     double maxErrorVal3 = 0.0f;
128     uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
129 
130     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
131 
132     Force64BitFPUPrecision();
133 
134     // Init the kernels
135     BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
136                                 relaxedMode };
137     if ((error = ThreadPool_Do(BuildKernelFn,
138                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
139                                &build_info)))
140         return error;
141 
142     for (uint64_t i = 0; i < (1ULL << 32); i += step)
143     {
144         // Init input array
145         double *p = (double *)gIn;
146         double *p2 = (double *)gIn2;
147         double *p3 = (double *)gIn3;
148         size_t idx = 0;
149 
150         if (i == 0)
151         { // test edge cases
152             uint32_t x, y, z;
153             x = y = z = 0;
154             for (; idx < BUFFER_SIZE / sizeof(double); idx++)
155             {
156                 p[idx] = specialValues[x];
157                 p2[idx] = specialValues[y];
158                 p3[idx] = specialValues[z];
159                 if (++x >= specialValuesCount)
160                 {
161                     x = 0;
162                     if (++y >= specialValuesCount)
163                     {
164                         y = 0;
165                         if (++z >= specialValuesCount) break;
166                     }
167                 }
168             }
169             if (idx == BUFFER_SIZE / sizeof(double))
170                 vlog_error("Test Error: not all special cases tested!\n");
171         }
172 
173         for (; idx < BUFFER_SIZE / sizeof(double); idx++)
174         {
175             p[idx] = DoubleFromUInt32(genrand_int32(d));
176             p2[idx] = DoubleFromUInt32(genrand_int32(d));
177             p3[idx] = DoubleFromUInt32(genrand_int32(d));
178         }
179 
180         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
181                                           BUFFER_SIZE, gIn, 0, NULL, NULL)))
182         {
183             vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
184             return error;
185         }
186 
187         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
188                                           BUFFER_SIZE, gIn2, 0, NULL, NULL)))
189         {
190             vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
191             return error;
192         }
193 
194         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
195                                           BUFFER_SIZE, gIn3, 0, NULL, NULL)))
196         {
197             vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
198             return error;
199         }
200 
201         // Write garbage into output arrays
202         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
203         {
204             uint32_t pattern = 0xffffdead;
205             if (gHostFill)
206             {
207                 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
208                 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
209                                                   CL_FALSE, 0, BUFFER_SIZE,
210                                                   gOut[j], 0, NULL, NULL)))
211                 {
212                     vlog_error(
213                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
214                         error, j);
215                     return error;
216                 }
217             }
218             else
219             {
220                 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
221                                                  &pattern, sizeof(pattern), 0,
222                                                  BUFFER_SIZE, 0, NULL, NULL)))
223                 {
224                     vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
225                                error);
226                     return error;
227                 }
228             }
229         }
230 
231         // Run the kernels
232         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
233         {
234             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
235             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
236                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
237             if ((error = clSetKernelArg(kernels[j][thread_id], 0,
238                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
239             {
240                 LogBuildError(programs[j]);
241                 return error;
242             }
243             if ((error = clSetKernelArg(kernels[j][thread_id], 1,
244                                         sizeof(gInBuffer), &gInBuffer)))
245             {
246                 LogBuildError(programs[j]);
247                 return error;
248             }
249             if ((error = clSetKernelArg(kernels[j][thread_id], 2,
250                                         sizeof(gInBuffer2), &gInBuffer2)))
251             {
252                 LogBuildError(programs[j]);
253                 return error;
254             }
255             if ((error = clSetKernelArg(kernels[j][thread_id], 3,
256                                         sizeof(gInBuffer3), &gInBuffer3)))
257             {
258                 LogBuildError(programs[j]);
259                 return error;
260             }
261 
262             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
263                                                 1, NULL, &localCount, NULL, 0,
264                                                 NULL, NULL)))
265             {
266                 vlog_error("FAILED -- could not execute kernel\n");
267                 return error;
268             }
269         }
270 
271         // Get that moving
272         if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
273 
274         // Calculate the correctly rounded reference result
275         double *r = (double *)gOut_Ref;
276         double *s = (double *)gIn;
277         double *s2 = (double *)gIn2;
278         double *s3 = (double *)gIn3;
279         for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
280             r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
281 
282         // Read the data back
283         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
284         {
285             if ((error =
286                      clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
287                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
288             {
289                 vlog_error("ReadArray failed %d\n", error);
290                 return error;
291             }
292         }
293 
294         if (gSkipCorrectnessTesting) break;
295 
296         // Verify data
297         uint64_t *t = (uint64_t *)gOut_Ref;
298         for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
299         {
300             for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
301             {
302                 uint64_t *q = (uint64_t *)(gOut[k]);
303 
304                 // If we aren't getting the correctly rounded result
305                 if (t[j] != q[j])
306                 {
307                     double test = ((double *)q)[j];
308                     long double correct = f->dfunc.f_fff(s[j], s2[j], s3[j]);
309                     float err = Bruteforce_Ulp_Error_Double(test, correct);
310                     int fail = !(fabsf(err) <= f->double_ulps);
311 
312                     if (fail && (ftz || relaxedMode))
313                     {
314                         // retry per section 6.5.3.2
315                         if (IsDoubleSubnormal(correct))
316                         { // look at me,
317                             fail = fail && (test != 0.0f);
318                             if (!fail) err = 0.0f;
319                         }
320 
321                         // retry per section 6.5.3.3
322                         if (fail && IsDoubleSubnormal(s[j]))
323                         { // look at me,
324                             long double correct2 =
325                                 f->dfunc.f_fff(0.0, s2[j], s3[j]);
326                             long double correct3 =
327                                 f->dfunc.f_fff(-0.0, s2[j], s3[j]);
328                             float err2 =
329                                 Bruteforce_Ulp_Error_Double(test, correct2);
330                             float err3 =
331                                 Bruteforce_Ulp_Error_Double(test, correct3);
332                             fail = fail
333                                 && ((!(fabsf(err2) <= f->double_ulps))
334                                     && (!(fabsf(err3) <= f->double_ulps)));
335                             if (fabsf(err2) < fabsf(err)) err = err2;
336                             if (fabsf(err3) < fabsf(err)) err = err3;
337 
338                             // retry per section 6.5.3.4
339                             if (IsDoubleResultSubnormal(correct2,
340                                                         f->double_ulps)
341                                 || IsDoubleResultSubnormal(correct3,
342                                                            f->double_ulps))
343                             { // look at me now,
344                                 fail = fail && (test != 0.0f);
345                                 if (!fail) err = 0.0f;
346                             }
347 
348                             // try with first two args as zero
349                             if (IsDoubleSubnormal(s2[j]))
350                             { // its fun to have fun,
351                                 correct2 = f->dfunc.f_fff(0.0, 0.0, s3[j]);
352                                 correct3 = f->dfunc.f_fff(-0.0, 0.0, s3[j]);
353                                 long double correct4 =
354                                     f->dfunc.f_fff(0.0, -0.0, s3[j]);
355                                 long double correct5 =
356                                     f->dfunc.f_fff(-0.0, -0.0, s3[j]);
357                                 err2 =
358                                     Bruteforce_Ulp_Error_Double(test, correct2);
359                                 err3 =
360                                     Bruteforce_Ulp_Error_Double(test, correct3);
361                                 float err4 =
362                                     Bruteforce_Ulp_Error_Double(test, correct4);
363                                 float err5 =
364                                     Bruteforce_Ulp_Error_Double(test, correct5);
365                                 fail = fail
366                                     && ((!(fabsf(err2) <= f->double_ulps))
367                                         && (!(fabsf(err3) <= f->double_ulps))
368                                         && (!(fabsf(err4) <= f->double_ulps))
369                                         && (!(fabsf(err5) <= f->double_ulps)));
370                                 if (fabsf(err2) < fabsf(err)) err = err2;
371                                 if (fabsf(err3) < fabsf(err)) err = err3;
372                                 if (fabsf(err4) < fabsf(err)) err = err4;
373                                 if (fabsf(err5) < fabsf(err)) err = err5;
374 
375                                 // retry per section 6.5.3.4
376                                 if (IsDoubleResultSubnormal(correct2,
377                                                             f->double_ulps)
378                                     || IsDoubleResultSubnormal(correct3,
379                                                                f->double_ulps)
380                                     || IsDoubleResultSubnormal(correct4,
381                                                                f->double_ulps)
382                                     || IsDoubleResultSubnormal(correct5,
383                                                                f->double_ulps))
384                                 {
385                                     fail = fail && (test != 0.0f);
386                                     if (!fail) err = 0.0f;
387                                 }
388 
389                                 if (IsDoubleSubnormal(s3[j]))
390                                 { // but you have to know how!
391                                     correct2 = f->dfunc.f_fff(0.0, 0.0, 0.0f);
392                                     correct3 = f->dfunc.f_fff(-0.0, 0.0, 0.0f);
393                                     correct4 = f->dfunc.f_fff(0.0, -0.0, 0.0f);
394                                     correct5 = f->dfunc.f_fff(-0.0, -0.0, 0.0f);
395                                     long double correct6 =
396                                         f->dfunc.f_fff(0.0, 0.0, -0.0f);
397                                     long double correct7 =
398                                         f->dfunc.f_fff(-0.0, 0.0, -0.0f);
399                                     long double correct8 =
400                                         f->dfunc.f_fff(0.0, -0.0, -0.0f);
401                                     long double correct9 =
402                                         f->dfunc.f_fff(-0.0, -0.0, -0.0f);
403                                     err2 = Bruteforce_Ulp_Error_Double(
404                                         test, correct2);
405                                     err3 = Bruteforce_Ulp_Error_Double(
406                                         test, correct3);
407                                     err4 = Bruteforce_Ulp_Error_Double(
408                                         test, correct4);
409                                     err5 = Bruteforce_Ulp_Error_Double(
410                                         test, correct5);
411                                     float err6 = Bruteforce_Ulp_Error_Double(
412                                         test, correct6);
413                                     float err7 = Bruteforce_Ulp_Error_Double(
414                                         test, correct7);
415                                     float err8 = Bruteforce_Ulp_Error_Double(
416                                         test, correct8);
417                                     float err9 = Bruteforce_Ulp_Error_Double(
418                                         test, correct9);
419                                     fail = fail
420                                         && ((!(fabsf(err2) <= f->double_ulps))
421                                             && (!(fabsf(err3)
422                                                   <= f->double_ulps))
423                                             && (!(fabsf(err4)
424                                                   <= f->double_ulps))
425                                             && (!(fabsf(err5)
426                                                   <= f->double_ulps))
427                                             && (!(fabsf(err5)
428                                                   <= f->double_ulps))
429                                             && (!(fabsf(err6)
430                                                   <= f->double_ulps))
431                                             && (!(fabsf(err7)
432                                                   <= f->double_ulps))
433                                             && (!(fabsf(err8)
434                                                   <= f->double_ulps)));
435                                     if (fabsf(err2) < fabsf(err)) err = err2;
436                                     if (fabsf(err3) < fabsf(err)) err = err3;
437                                     if (fabsf(err4) < fabsf(err)) err = err4;
438                                     if (fabsf(err5) < fabsf(err)) err = err5;
439                                     if (fabsf(err6) < fabsf(err)) err = err6;
440                                     if (fabsf(err7) < fabsf(err)) err = err7;
441                                     if (fabsf(err8) < fabsf(err)) err = err8;
442                                     if (fabsf(err9) < fabsf(err)) err = err9;
443 
444                                     // retry per section 6.5.3.4
445                                     if (IsDoubleResultSubnormal(correct2,
446                                                                 f->double_ulps)
447                                         || IsDoubleResultSubnormal(
448                                             correct3, f->double_ulps)
449                                         || IsDoubleResultSubnormal(
450                                             correct4, f->double_ulps)
451                                         || IsDoubleResultSubnormal(
452                                             correct5, f->double_ulps)
453                                         || IsDoubleResultSubnormal(
454                                             correct6, f->double_ulps)
455                                         || IsDoubleResultSubnormal(
456                                             correct7, f->double_ulps)
457                                         || IsDoubleResultSubnormal(
458                                             correct8, f->double_ulps)
459                                         || IsDoubleResultSubnormal(
460                                             correct9, f->double_ulps))
461                                     {
462                                         fail = fail && (test != 0.0f);
463                                         if (!fail) err = 0.0f;
464                                     }
465                                 }
466                             }
467                             else if (IsDoubleSubnormal(s3[j]))
468                             {
469                                 correct2 = f->dfunc.f_fff(0.0, s2[j], 0.0);
470                                 correct3 = f->dfunc.f_fff(-0.0, s2[j], 0.0);
471                                 long double correct4 =
472                                     f->dfunc.f_fff(0.0, s2[j], -0.0);
473                                 long double correct5 =
474                                     f->dfunc.f_fff(-0.0, s2[j], -0.0);
475                                 err2 =
476                                     Bruteforce_Ulp_Error_Double(test, correct2);
477                                 err3 =
478                                     Bruteforce_Ulp_Error_Double(test, correct3);
479                                 float err4 =
480                                     Bruteforce_Ulp_Error_Double(test, correct4);
481                                 float err5 =
482                                     Bruteforce_Ulp_Error_Double(test, correct5);
483                                 fail = fail
484                                     && ((!(fabsf(err2) <= f->double_ulps))
485                                         && (!(fabsf(err3) <= f->double_ulps))
486                                         && (!(fabsf(err4) <= f->double_ulps))
487                                         && (!(fabsf(err5) <= f->double_ulps)));
488                                 if (fabsf(err2) < fabsf(err)) err = err2;
489                                 if (fabsf(err3) < fabsf(err)) err = err3;
490                                 if (fabsf(err4) < fabsf(err)) err = err4;
491                                 if (fabsf(err5) < fabsf(err)) err = err5;
492 
493                                 // retry per section 6.5.3.4
494                                 if (IsDoubleResultSubnormal(correct2,
495                                                             f->double_ulps)
496                                     || IsDoubleResultSubnormal(correct3,
497                                                                f->double_ulps)
498                                     || IsDoubleResultSubnormal(correct4,
499                                                                f->double_ulps)
500                                     || IsDoubleResultSubnormal(correct5,
501                                                                f->double_ulps))
502                                 {
503                                     fail = fail && (test != 0.0f);
504                                     if (!fail) err = 0.0f;
505                                 }
506                             }
507                         }
508                         else if (fail && IsDoubleSubnormal(s2[j]))
509                         {
510                             long double correct2 =
511                                 f->dfunc.f_fff(s[j], 0.0, s3[j]);
512                             long double correct3 =
513                                 f->dfunc.f_fff(s[j], -0.0, s3[j]);
514                             float err2 =
515                                 Bruteforce_Ulp_Error_Double(test, correct2);
516                             float err3 =
517                                 Bruteforce_Ulp_Error_Double(test, correct3);
518                             fail = fail
519                                 && ((!(fabsf(err2) <= f->double_ulps))
520                                     && (!(fabsf(err3) <= f->double_ulps)));
521                             if (fabsf(err2) < fabsf(err)) err = err2;
522                             if (fabsf(err3) < fabsf(err)) err = err3;
523 
524                             // retry per section 6.5.3.4
525                             if (IsDoubleResultSubnormal(correct2,
526                                                         f->double_ulps)
527                                 || IsDoubleResultSubnormal(correct3,
528                                                            f->double_ulps))
529                             {
530                                 fail = fail && (test != 0.0f);
531                                 if (!fail) err = 0.0f;
532                             }
533 
534                             // try with second two args as zero
535                             if (IsDoubleSubnormal(s3[j]))
536                             {
537                                 correct2 = f->dfunc.f_fff(s[j], 0.0, 0.0);
538                                 correct3 = f->dfunc.f_fff(s[j], -0.0, 0.0);
539                                 long double correct4 =
540                                     f->dfunc.f_fff(s[j], 0.0, -0.0);
541                                 long double correct5 =
542                                     f->dfunc.f_fff(s[j], -0.0, -0.0);
543                                 err2 =
544                                     Bruteforce_Ulp_Error_Double(test, correct2);
545                                 err3 =
546                                     Bruteforce_Ulp_Error_Double(test, correct3);
547                                 float err4 =
548                                     Bruteforce_Ulp_Error_Double(test, correct4);
549                                 float err5 =
550                                     Bruteforce_Ulp_Error_Double(test, correct5);
551                                 fail = fail
552                                     && ((!(fabsf(err2) <= f->double_ulps))
553                                         && (!(fabsf(err3) <= f->double_ulps))
554                                         && (!(fabsf(err4) <= f->double_ulps))
555                                         && (!(fabsf(err5) <= f->double_ulps)));
556                                 if (fabsf(err2) < fabsf(err)) err = err2;
557                                 if (fabsf(err3) < fabsf(err)) err = err3;
558                                 if (fabsf(err4) < fabsf(err)) err = err4;
559                                 if (fabsf(err5) < fabsf(err)) err = err5;
560 
561                                 // retry per section 6.5.3.4
562                                 if (IsDoubleResultSubnormal(correct2,
563                                                             f->double_ulps)
564                                     || IsDoubleResultSubnormal(correct3,
565                                                                f->double_ulps)
566                                     || IsDoubleResultSubnormal(correct4,
567                                                                f->double_ulps)
568                                     || IsDoubleResultSubnormal(correct5,
569                                                                f->double_ulps))
570                                 {
571                                     fail = fail && (test != 0.0f);
572                                     if (!fail) err = 0.0f;
573                                 }
574                             }
575                         }
576                         else if (fail && IsDoubleSubnormal(s3[j]))
577                         {
578                             long double correct2 =
579                                 f->dfunc.f_fff(s[j], s2[j], 0.0);
580                             long double correct3 =
581                                 f->dfunc.f_fff(s[j], s2[j], -0.0);
582                             float err2 =
583                                 Bruteforce_Ulp_Error_Double(test, correct2);
584                             float err3 =
585                                 Bruteforce_Ulp_Error_Double(test, correct3);
586                             fail = fail
587                                 && ((!(fabsf(err2) <= f->double_ulps))
588                                     && (!(fabsf(err3) <= f->double_ulps)));
589                             if (fabsf(err2) < fabsf(err)) err = err2;
590                             if (fabsf(err3) < fabsf(err)) err = err3;
591 
592                             // retry per section 6.5.3.4
593                             if (IsDoubleResultSubnormal(correct2,
594                                                         f->double_ulps)
595                                 || IsDoubleResultSubnormal(correct3,
596                                                            f->double_ulps))
597                             {
598                                 fail = fail && (test != 0.0f);
599                                 if (!fail) err = 0.0f;
600                             }
601                         }
602                     }
603 
604                     if (fabsf(err) > maxError)
605                     {
606                         maxError = fabsf(err);
607                         maxErrorVal = s[j];
608                         maxErrorVal2 = s2[j];
609                         maxErrorVal3 = s3[j];
610                     }
611 
612                     if (fail)
613                     {
614                         vlog_error("\nERROR: %sD%s: %f ulp error at {%.13la, "
615                                    "%.13la, %.13la}: *%.13la vs. %.13la\n",
616                                    f->name, sizeNames[k], err, s[j], s2[j],
617                                    s3[j], ((double *)gOut_Ref)[j], test);
618                         return -1;
619                     }
620                 }
621             }
622         }
623 
624         if (0 == (i & 0x0fffffff))
625         {
626             if (gVerboseBruteForce)
627             {
628                 vlog("base:%14" PRIu64 " step:%10" PRIu64
629                      "  bufferSize:%10d \n",
630                      i, step, BUFFER_SIZE);
631             }
632             else
633             {
634                 vlog(".");
635             }
636             fflush(stdout);
637         }
638     }
639 
640     if (!gSkipCorrectnessTesting)
641     {
642         if (gWimpyMode)
643             vlog("Wimp pass");
644         else
645             vlog("passed");
646 
647         vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
648              maxErrorVal3);
649     }
650 
651     vlog("\n");
652 
653     return CL_SUCCESS;
654 }
655