• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21 
22 #include <cinttypes>
23 #include <cstring>
24 
25 namespace {
26 
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29     BuildKernelInfo &info = *(BuildKernelInfo *)p;
30     auto generator = [](const std::string &kernel_name, const char *builtin,
31                         cl_uint vector_size_index) {
32         return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
33                               ParameterType::Float, ParameterType::Float,
34                               vector_size_index);
35     };
36     return BuildKernels(info, job_id, generator);
37 }
38 
39 } // anonymous namespace
40 
TestFunc_Float2_Float(const Func * f,MTdata d,bool relaxedMode)41 int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
42 {
43     int error;
44     Programs programs;
45     const unsigned thread_id = 0; // Test is currently not multithreaded.
46     KernelMatrix kernels;
47     float maxError0 = 0.0f;
48     float maxError1 = 0.0f;
49     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
50     float maxErrorVal0 = 0.0f;
51     float maxErrorVal1 = 0.0f;
52     uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
53     int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
54     cl_uchar overflow[BUFFER_SIZE / sizeof(float)];
55     int isFract = 0 == strcmp("fract", f->nameInCode);
56     int skipNanInf = isFract && !gInfNanSupport;
57 
58     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
59 
60     float float_ulps = getAllowedUlpError(f, relaxedMode);
61     // Init the kernels
62     BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
63                                 relaxedMode };
64     if ((error = ThreadPool_Do(BuildKernelFn,
65                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
66                                &build_info)))
67         return error;
68 
69     for (uint64_t i = 0; i < (1ULL << 32); i += step)
70     {
71         // Init input array
72         uint32_t *p = (uint32_t *)gIn;
73         if (gWimpyMode)
74         {
75             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
76             {
77                 p[j] = (uint32_t)i + j * scale;
78                 if (relaxedMode && strcmp(f->name, "sincos") == 0)
79                 {
80                     float pj = *(float *)&p[j];
81                     if (fabs(pj) > M_PI) ((float *)p)[j] = NAN;
82                 }
83             }
84         }
85         else
86         {
87             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
88             {
89                 p[j] = (uint32_t)i + j;
90                 if (relaxedMode && strcmp(f->name, "sincos") == 0)
91                 {
92                     float pj = *(float *)&p[j];
93                     if (fabs(pj) > M_PI) ((float *)p)[j] = NAN;
94                 }
95             }
96         }
97 
98         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
99                                           BUFFER_SIZE, gIn, 0, NULL, NULL)))
100         {
101             vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
102             return error;
103         }
104 
105         // Write garbage into output arrays
106         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
107         {
108             uint32_t pattern = 0xffffdead;
109             if (gHostFill)
110             {
111                 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
112                 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
113                                                   CL_FALSE, 0, BUFFER_SIZE,
114                                                   gOut[j], 0, NULL, NULL)))
115                 {
116                     vlog_error(
117                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
118                         error, j);
119                     return error;
120                 }
121 
122                 memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
123                 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
124                                                   CL_FALSE, 0, BUFFER_SIZE,
125                                                   gOut2[j], 0, NULL, NULL)))
126                 {
127                     vlog_error(
128                         "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
129                         error, j);
130                     return error;
131                 }
132             }
133             else
134             {
135                 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
136                                                  &pattern, sizeof(pattern), 0,
137                                                  BUFFER_SIZE, 0, NULL, NULL)))
138                 {
139                     vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
140                                error);
141                     return error;
142                 }
143 
144                 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
145                                                  &pattern, sizeof(pattern), 0,
146                                                  BUFFER_SIZE, 0, NULL, NULL)))
147                 {
148                     vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
149                                error);
150                     return error;
151                 }
152             }
153         }
154 
155         // Run the kernels
156         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
157         {
158             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
159             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
160             if ((error = clSetKernelArg(kernels[j][thread_id], 0,
161                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
162             {
163                 LogBuildError(programs[j]);
164                 return error;
165             }
166             if ((error =
167                      clSetKernelArg(kernels[j][thread_id], 1,
168                                     sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
169             {
170                 LogBuildError(programs[j]);
171                 return error;
172             }
173             if ((error = clSetKernelArg(kernels[j][thread_id], 2,
174                                         sizeof(gInBuffer), &gInBuffer)))
175             {
176                 LogBuildError(programs[j]);
177                 return error;
178             }
179 
180             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
181                                                 1, NULL, &localCount, NULL, 0,
182                                                 NULL, NULL)))
183             {
184                 vlog_error("FAILED -- could not execute kernel\n");
185                 return error;
186             }
187         }
188 
189         // Get that moving
190         if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
191 
192         FPU_mode_type oldMode = 0;
193         RoundingMode oldRoundMode = kRoundToNearestEven;
194         if (isFract)
195         {
196             // Calculate the correctly rounded reference result
197             if (ftz || relaxedMode) ForceFTZ(&oldMode);
198 
199             // Set the rounding mode to match the device
200             if (gIsInRTZMode)
201                 oldRoundMode = set_round(kRoundTowardZero, kfloat);
202         }
203 
204         // Calculate the correctly rounded reference result
205         float *r = (float *)gOut_Ref;
206         float *r2 = (float *)gOut_Ref2;
207         float *s = (float *)gIn;
208 
209         if (skipNanInf)
210         {
211             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
212             {
213                 double dd;
214                 feclearexcept(FE_OVERFLOW);
215 
216                 if (relaxedMode)
217                     r[j] = (float)f->rfunc.f_fpf(s[j], &dd);
218                 else
219                     r[j] = (float)f->func.f_fpf(s[j], &dd);
220 
221                 r2[j] = (float)dd;
222                 overflow[j] =
223                     FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
224             }
225         }
226         else
227         {
228             for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
229             {
230                 double dd;
231                 if (relaxedMode)
232                     r[j] = (float)f->rfunc.f_fpf(s[j], &dd);
233                 else
234                     r[j] = (float)f->func.f_fpf(s[j], &dd);
235 
236                 r2[j] = (float)dd;
237             }
238         }
239 
240         if (isFract && ftz) RestoreFPState(&oldMode);
241 
242         // Read the data back
243         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
244         {
245             if ((error =
246                      clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
247                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
248             {
249                 vlog_error("ReadArray failed %d\n", error);
250                 return error;
251             }
252             if ((error =
253                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
254                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
255             {
256                 vlog_error("ReadArray2 failed %d\n", error);
257                 return error;
258             }
259         }
260 
261         if (gSkipCorrectnessTesting)
262         {
263             if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
264             break;
265         }
266 
267         // Verify data
268         uint32_t *t = (uint32_t *)gOut_Ref;
269         uint32_t *t2 = (uint32_t *)gOut_Ref2;
270         for (size_t j = 0; j < BUFFER_SIZE / sizeof(float); j++)
271         {
272             for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
273             {
274                 uint32_t *q = (uint32_t *)gOut[k];
275                 uint32_t *q2 = (uint32_t *)gOut2[k];
276 
277                 // If we aren't getting the correctly rounded result
278                 if (t[j] != q[j] || t2[j] != q2[j])
279                 {
280                     double correct, correct2;
281                     float err, err2;
282                     float test = ((float *)q)[j];
283                     float test2 = ((float *)q2)[j];
284 
285                     if (relaxedMode)
286                         correct = f->rfunc.f_fpf(s[j], &correct2);
287                     else
288                         correct = f->func.f_fpf(s[j], &correct2);
289 
290                     // Per section 10 paragraph 6, accept any result if an input
291                     // or output is a infinity or NaN or overflow
292                     if (relaxedMode || skipNanInf)
293                     {
294                         if (skipNanInf && overflow[j]) continue;
295                         // Note: no double rounding here.  Reference functions
296                         // calculate in single precision.
297                         if (IsFloatInfinity(correct) || IsFloatNaN(correct)
298                             || IsFloatInfinity(correct2) || IsFloatNaN(correct2)
299                             || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
300                             continue;
301                     }
302 
303                     typedef int (*CheckForSubnormal)(
304                         double, float); // If we are in fast relaxed math, we
305                                         // have a different calculation for the
306                                         // subnormal threshold.
307                     CheckForSubnormal isFloatResultSubnormalPtr;
308                     if (relaxedMode)
309                     {
310                         err = Abs_Error(test, correct);
311                         err2 = Abs_Error(test2, correct2);
312                         isFloatResultSubnormalPtr =
313                             &IsFloatResultSubnormalAbsError;
314                     }
315                     else
316                     {
317                         err = Ulp_Error(test, correct);
318                         err2 = Ulp_Error(test2, correct2);
319                         isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
320                     }
321                     int fail = !(fabsf(err) <= float_ulps
322                                  && fabsf(err2) <= float_ulps);
323 
324                     if (ftz || relaxedMode)
325                     {
326                         // retry per section 6.5.3.2
327                         if ((*isFloatResultSubnormalPtr)(correct, float_ulps))
328                         {
329                             if ((*isFloatResultSubnormalPtr)(correct2,
330                                                              float_ulps))
331                             {
332                                 fail = fail && !(test == 0.0f && test2 == 0.0f);
333                                 if (!fail)
334                                 {
335                                     err = 0.0f;
336                                     err2 = 0.0f;
337                                 }
338                             }
339                             else
340                             {
341                                 fail = fail
342                                     && !(test == 0.0f
343                                          && fabsf(err2) <= float_ulps);
344                                 if (!fail) err = 0.0f;
345                             }
346                         }
347                         else if ((*isFloatResultSubnormalPtr)(correct2,
348                                                               float_ulps))
349                         {
350                             fail = fail
351                                 && !(test2 == 0.0f && fabsf(err) <= float_ulps);
352                             if (!fail) err2 = 0.0f;
353                         }
354 
355 
356                         // retry per section 6.5.3.3
357                         if (IsFloatSubnormal(s[j]))
358                         {
359                             double correctp, correctn;
360                             double correct2p, correct2n;
361                             float errp, err2p, errn, err2n;
362 
363                             if (skipNanInf) feclearexcept(FE_OVERFLOW);
364                             if (relaxedMode)
365                             {
366                                 correctp = f->rfunc.f_fpf(0.0, &correct2p);
367                                 correctn = f->rfunc.f_fpf(-0.0, &correct2n);
368                             }
369                             else
370                             {
371                                 correctp = f->func.f_fpf(0.0, &correct2p);
372                                 correctn = f->func.f_fpf(-0.0, &correct2n);
373                             }
374 
375                             // Per section 10 paragraph 6, accept any result if
376                             // an input or output is a infinity or NaN or
377                             // overflow
378                             if (skipNanInf)
379                             {
380                                 if (fetestexcept(FE_OVERFLOW)) continue;
381 
382                                 // Note: no double rounding here.  Reference
383                                 // functions calculate in single precision.
384                                 if (IsFloatInfinity(correctp)
385                                     || IsFloatNaN(correctp)
386                                     || IsFloatInfinity(correctn)
387                                     || IsFloatNaN(correctn)
388                                     || IsFloatInfinity(correct2p)
389                                     || IsFloatNaN(correct2p)
390                                     || IsFloatInfinity(correct2n)
391                                     || IsFloatNaN(correct2n))
392                                     continue;
393                             }
394 
395                             if (relaxedMode)
396                             {
397                                 errp = Abs_Error(test, correctp);
398                                 err2p = Abs_Error(test, correct2p);
399                                 errn = Abs_Error(test, correctn);
400                                 err2n = Abs_Error(test, correct2n);
401                             }
402                             else
403                             {
404                                 errp = Ulp_Error(test, correctp);
405                                 err2p = Ulp_Error(test, correct2p);
406                                 errn = Ulp_Error(test, correctn);
407                                 err2n = Ulp_Error(test, correct2n);
408                             }
409 
410                             fail = fail
411                                 && ((!(fabsf(errp) <= float_ulps))
412                                     && (!(fabsf(err2p) <= float_ulps))
413                                     && ((!(fabsf(errn) <= float_ulps))
414                                         && (!(fabsf(err2n) <= float_ulps))));
415                             if (fabsf(errp) < fabsf(err)) err = errp;
416                             if (fabsf(errn) < fabsf(err)) err = errn;
417                             if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
418                             if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
419 
420                             // retry per section 6.5.3.4
421                             if ((*isFloatResultSubnormalPtr)(correctp,
422                                                              float_ulps)
423                                 || (*isFloatResultSubnormalPtr)(correctn,
424                                                                 float_ulps))
425                             {
426                                 if ((*isFloatResultSubnormalPtr)(correct2p,
427                                                                  float_ulps)
428                                     || (*isFloatResultSubnormalPtr)(correct2n,
429                                                                     float_ulps))
430                                 {
431                                     fail = fail
432                                         && !(test == 0.0f && test2 == 0.0f);
433                                     if (!fail) err = err2 = 0.0f;
434                                 }
435                                 else
436                                 {
437                                     fail = fail
438                                         && !(test == 0.0f
439                                              && fabsf(err2) <= float_ulps);
440                                     if (!fail) err = 0.0f;
441                                 }
442                             }
443                             else if ((*isFloatResultSubnormalPtr)(correct2p,
444                                                                   float_ulps)
445                                      || (*isFloatResultSubnormalPtr)(
446                                          correct2n, float_ulps))
447                             {
448                                 fail = fail
449                                     && !(test2 == 0.0f
450                                          && (fabsf(err) <= float_ulps));
451                                 if (!fail) err2 = 0.0f;
452                             }
453                         }
454                     }
455                     if (fabsf(err) > maxError0)
456                     {
457                         maxError0 = fabsf(err);
458                         maxErrorVal0 = s[j];
459                     }
460                     if (fabsf(err2) > maxError1)
461                     {
462                         maxError1 = fabsf(err2);
463                         maxErrorVal1 = s[j];
464                     }
465                     if (fail)
466                     {
467                         vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: "
468                                    "*{%a, %a} vs. {%a, %a}\n",
469                                    f->name, sizeNames[k], err, err2,
470                                    ((float *)gIn)[j], ((float *)gOut_Ref)[j],
471                                    ((float *)gOut_Ref2)[j], test, test2);
472                         return -1;
473                     }
474                 }
475             }
476         }
477 
478         if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
479 
480         if (0 == (i & 0x0fffffff))
481         {
482             if (gVerboseBruteForce)
483             {
484                 vlog("base:%14" PRIu64 " step:%10" PRIu64
485                      "  bufferSize:%10d \n",
486                      i, step, BUFFER_SIZE);
487             }
488             else
489             {
490                 vlog(".");
491             }
492             fflush(stdout);
493         }
494     }
495 
496     if (!gSkipCorrectnessTesting)
497     {
498         if (gWimpyMode)
499             vlog("Wimp pass");
500         else
501             vlog("passed");
502 
503         vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
504              maxErrorVal1);
505     }
506 
507     vlog("\n");
508 
509     return CL_SUCCESS;
510 }
511