1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "common.h"
18 #include "function_list.h"
19 #include "test_functions.h"
20 #include "utility.h"
21
22 #include <cinttypes>
23 #include <cstring>
24
25 namespace {
26
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)27 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
28 {
29 BuildKernelInfo &info = *(BuildKernelInfo *)p;
30 auto generator = [](const std::string &kernel_name, const char *builtin,
31 cl_uint vector_size_index) {
32 return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
33 ParameterType::ULong, vector_size_index);
34 };
35 return BuildKernels(info, job_id, generator);
36 }
37
random64(MTdata d)38 cl_ulong random64(MTdata d)
39 {
40 return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
41 }
42
43 } // anonymous namespace
44
TestFunc_Double_ULong(const Func * f,MTdata d,bool relaxedMode)45 int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
46 {
47 int error;
48 Programs programs;
49 const unsigned thread_id = 0; // Test is currently not multithreaded.
50 KernelMatrix kernels;
51 float maxError = 0.0f;
52 int ftz = f->ftz || gForceFTZ;
53 double maxErrorVal = 0.0f;
54 uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
55
56 logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
57
58 Force64BitFPUPrecision();
59
60 // Init the kernels
61 BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
62 relaxedMode };
63 if ((error = ThreadPool_Do(BuildKernelFn,
64 gMaxVectorSizeIndex - gMinVectorSizeIndex,
65 &build_info)))
66 return error;
67
68 for (uint64_t i = 0; i < (1ULL << 32); i += step)
69 {
70 // Init input array
71 cl_ulong *p = (cl_ulong *)gIn;
72 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_ulong); j++)
73 p[j] = random64(d);
74
75 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
76 BUFFER_SIZE, gIn, 0, NULL, NULL)))
77 {
78 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
79 return error;
80 }
81
82 // Write garbage into output arrays
83 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
84 {
85 uint32_t pattern = 0xffffdead;
86 if (gHostFill)
87 {
88 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
89 if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
90 CL_FALSE, 0, BUFFER_SIZE,
91 gOut[j], 0, NULL, NULL)))
92 {
93 vlog_error(
94 "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
95 error, j);
96 return error;
97 }
98 }
99 else
100 {
101 if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
102 &pattern, sizeof(pattern), 0,
103 BUFFER_SIZE, 0, NULL, NULL)))
104 {
105 vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
106 error);
107 return error;
108 }
109 }
110 }
111
112 // Run the kernels
113 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
114 {
115 size_t vectorSize = sizeValues[j] * sizeof(cl_double);
116 size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
117 if ((error = clSetKernelArg(kernels[j][thread_id], 0,
118 sizeof(gOutBuffer[j]), &gOutBuffer[j])))
119 {
120 LogBuildError(programs[j]);
121 return error;
122 }
123 if ((error = clSetKernelArg(kernels[j][thread_id], 1,
124 sizeof(gInBuffer), &gInBuffer)))
125 {
126 LogBuildError(programs[j]);
127 return error;
128 }
129
130 if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
131 1, NULL, &localCount, NULL, 0,
132 NULL, NULL)))
133 {
134 vlog_error("FAILED -- could not execute kernel\n");
135 return error;
136 }
137 }
138
139 // Get that moving
140 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
141
142 // Calculate the correctly rounded reference result
143 double *r = (double *)gOut_Ref;
144 cl_ulong *s = (cl_ulong *)gIn;
145 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
146 r[j] = (double)f->dfunc.f_u(s[j]);
147
148 // Read the data back
149 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
150 {
151 if ((error =
152 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
153 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
154 {
155 vlog_error("ReadArray failed %d\n", error);
156 return error;
157 }
158 }
159
160 if (gSkipCorrectnessTesting) break;
161
162 // Verify data
163 uint64_t *t = (uint64_t *)gOut_Ref;
164 for (size_t j = 0; j < BUFFER_SIZE / sizeof(cl_double); j++)
165 {
166 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
167 {
168 uint64_t *q = (uint64_t *)(gOut[k]);
169
170 // If we aren't getting the correctly rounded result
171 if (t[j] != q[j])
172 {
173 double test = ((double *)q)[j];
174 long double correct = f->dfunc.f_u(s[j]);
175 float err = Bruteforce_Ulp_Error_Double(test, correct);
176 int fail = !(fabsf(err) <= f->double_ulps);
177
178 if (fail)
179 {
180 if (ftz || relaxedMode)
181 {
182 // retry per section 6.5.3.2
183 if (IsDoubleResultSubnormal(correct,
184 f->double_ulps))
185 {
186 fail = fail && (test != 0.0);
187 if (!fail) err = 0.0f;
188 }
189 }
190 }
191 if (fabsf(err) > maxError)
192 {
193 maxError = fabsf(err);
194 maxErrorVal = s[j];
195 }
196 if (fail)
197 {
198 vlog_error(
199 "\n%s%sD: %f ulp error at 0x%16.16" PRIx64 ": "
200 "*%.13la vs. %.13la\n",
201 f->name, sizeNames[k], err, ((uint64_t *)gIn)[j],
202 ((double *)gOut_Ref)[j], test);
203 return -1;
204 }
205 }
206 }
207 }
208
209 if (0 == (i & 0x0fffffff))
210 {
211 if (gVerboseBruteForce)
212 {
213 vlog("base:%14" PRIu64 " step:%10" PRIu64
214 " bufferSize:%10d \n",
215 i, step, BUFFER_SIZE);
216 }
217 else
218 {
219 vlog(".");
220 }
221 fflush(stdout);
222 }
223 }
224
225 if (!gSkipCorrectnessTesting)
226 {
227 if (gWimpyMode)
228 vlog("Wimp pass");
229 else
230 vlog("passed");
231
232 vlog("\t%8.2f @ %a", maxError, maxErrorVal);
233 }
234
235 vlog("\n");
236
237 return CL_SUCCESS;
238 }
239