• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/compat.h"
18 #include "harness/ThreadPool.h"
19 
20 #if defined(__APPLE__)
21 #include <sys/sysctl.h>
22 #include <mach/mach_time.h>
23 #endif
24 
25 #if defined(__linux__)
26 #include <unistd.h>
27 #include <sys/syscall.h>
28 #include <linux/sysctl.h>
29 #endif
30 #if defined(__linux__)
31 #include <sys/param.h>
32 #include <libgen.h>
33 #endif
34 
35 #if defined(__MINGW32__)
36 #include <sys/param.h>
37 #endif
38 
39 #include <sstream>
40 #include <stdarg.h>
41 #if !defined(_WIN32)
42 #include <libgen.h>
43 #include <sys/mman.h>
44 #endif
45 #include <time.h>
46 
47 #include <algorithm>
48 
49 #include <vector>
50 #include <type_traits>
51 
52 #include "basic_test_conversions.h"
53 
54 #if defined(_WIN32)
55 #include <mmintrin.h>
56 #include <emmintrin.h>
57 #else // !_WIN32
58 #if defined(__SSE__)
59 #include <xmmintrin.h>
60 #endif
61 #if defined(__SSE2__)
62 #include <emmintrin.h>
63 #endif
64 #endif // _WIN32
65 
66 cl_context gContext = NULL;
67 cl_command_queue gQueue = NULL;
68 int gStartTestNumber = -1;
69 int gEndTestNumber = 0;
70 #if defined(__APPLE__)
71 int gTimeResults = 1;
72 #else
73 int gTimeResults = 0;
74 #endif
75 int gReportAverageTimes = 0;
76 void *gIn = NULL;
77 void *gRef = NULL;
78 void *gAllowZ = NULL;
79 void *gOut[kCallStyleCount] = { NULL };
80 cl_mem gInBuffer;
81 cl_mem gOutBuffers[kCallStyleCount];
82 size_t gComputeDevices = 0;
83 uint32_t gDeviceFrequency = 0;
84 int gWimpyMode = 0;
85 int gWimpyReductionFactor = 128;
86 int gSkipTesting = 0;
87 int gForceFTZ = 0;
88 int gIsRTZ = 0;
89 uint32_t gSimdSize = 1;
90 int gHasDouble = 0;
91 int gTestDouble = 1;
92 const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
93 int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
94 int gMinVectorSize = 0;
95 int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]);
96 MTdata gMTdata;
97 const char **argList = NULL;
98 int argCount = 0;
99 
100 
101 double SubtractTime(uint64_t endTime, uint64_t startTime);
102 
103 
104 // clang-format off
105 // for readability sake keep this section unformatted
106 
107 std::vector<unsigned int> DataInitInfo::specialValuesUInt = {
108       uint32_t(INT_MIN), uint32_t(INT_MIN + 1), uint32_t(INT_MIN + 2),
109       uint32_t(-(1 << 30) - 3), uint32_t(-(1 << 30) - 2), uint32_t(-(1 << 30) - 1), uint32_t(-(1 << 30)),
110       uint32_t(-(1 << 30) + 1), uint32_t(-(1 << 30) + 2), uint32_t(-(1 << 30) + 3),
111       uint32_t(-(1 << 24) - 3), uint32_t(-(1 << 24) - 2),uint32_t(-(1 << 24) - 1),
112       uint32_t(-(1 << 24)), uint32_t(-(1 << 24) + 1), uint32_t(-(1 << 24) + 2), uint32_t(-(1 << 24) + 3),
113       uint32_t(-(1 << 23) - 3), uint32_t(-(1 << 23) - 2),uint32_t(-(1 << 23) - 1),
114       uint32_t(-(1 << 23)), uint32_t(-(1 << 23) + 1), uint32_t(-(1 << 23) + 2), uint32_t(-(1 << 23) + 3),
115       uint32_t(-(1 << 22) - 3), uint32_t(-(1 << 22) - 2),uint32_t(-(1 << 22) - 1),
116       uint32_t(-(1 << 22)), uint32_t(-(1 << 22) + 1), uint32_t(-(1 << 22) + 2), uint32_t(-(1 << 22) + 3),
117       uint32_t(-(1 << 21) - 3), uint32_t(-(1 << 21) - 2),uint32_t(-(1 << 21) - 1),
118       uint32_t(-(1 << 21)), uint32_t(-(1 << 21) + 1), uint32_t(-(1 << 21) + 2), uint32_t(-(1 << 21) + 3),
119       uint32_t(-(1 << 16) - 3), uint32_t(-(1 << 16) - 2),uint32_t(-(1 << 16) - 1),
120       uint32_t(-(1 << 16)), uint32_t(-(1 << 16) + 1), uint32_t(-(1 << 16) + 2), uint32_t(-(1 << 16) + 3),
121       uint32_t(-(1 << 15) - 3), uint32_t(-(1 << 15) - 2),uint32_t(-(1 << 15) - 1),
122       uint32_t(-(1 << 15)), uint32_t(-(1 << 15) + 1), uint32_t(-(1 << 15) + 2), uint32_t(-(1 << 15) + 3),
123       uint32_t(-(1 << 8) - 3), uint32_t(-(1 << 8) - 2),uint32_t(-(1 << 8) - 1),
124       uint32_t(-(1 << 8)), uint32_t(-(1 << 8) + 1), uint32_t(-(1 << 8) + 2), uint32_t(-(1 << 8) + 3),
125       uint32_t(-(1 << 7) - 3), uint32_t(-(1 << 7) - 2),uint32_t(-(1 << 7) - 1),
126       uint32_t(-(1 << 7)), uint32_t(-(1 << 7) + 1), uint32_t(-(1 << 7) + 2), uint32_t(-(1 << 7) + 3),
127       uint32_t(-4), uint32_t(-3), uint32_t(-2), uint32_t(-1), 0, 1, 2, 3, 4,
128       (1 << 7) - 3,(1 << 7) - 2,(1 << 7) - 1, (1 << 7), (1 << 7) + 1, (1 << 7) + 2, (1 << 7) + 3,
129       (1 << 8) - 3,(1 << 8) - 2,(1 << 8) - 1, (1 << 8), (1 << 8) + 1, (1 << 8) + 2, (1 << 8) + 3,
130       (1 << 15) - 3,(1 << 15) - 2,(1 << 15) - 1, (1 << 15), (1 << 15) + 1, (1 << 15) + 2, (1 << 15) + 3,
131       (1 << 16) - 3,(1 << 16) - 2,(1 << 16) - 1, (1 << 16), (1 << 16) + 1, (1 << 16) + 2, (1 << 16) + 3,
132       (1 << 21) - 3,(1 << 21) - 2,(1 << 21) - 1, (1 << 21), (1 << 21) + 1, (1 << 21) + 2, (1 << 21) + 3,
133       (1 << 22) - 3,(1 << 22) - 2,(1 << 22) - 1, (1 << 22), (1 << 22) + 1, (1 << 22) + 2, (1 << 22) + 3,
134       (1 << 23) - 3,(1 << 23) - 2,(1 << 23) - 1, (1 << 23), (1 << 23) + 1, (1 << 23) + 2, (1 << 23) + 3,
135       (1 << 24) - 3,(1 << 24) - 2,(1 << 24) - 1, (1 << 24), (1 << 24) + 1, (1 << 24) + 2, (1 << 24) + 3,
136       (1 << 30) - 3,(1 << 30) - 2,(1 << 30) - 1, (1 << 30), (1 << 30) + 1, (1 << 30) + 2, (1 << 30) + 3,
137       INT_MAX - 3, INT_MAX - 2, INT_MAX - 1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
138       UINT_MAX - 3, UINT_MAX - 2, UINT_MAX - 1, UINT_MAX
139 };
140 
141 std::vector<float> DataInitInfo::specialValuesFloat = {
142     -NAN, -INFINITY, -FLT_MAX,
143     MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
144     MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
145     MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
146     MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
147     -1000.f, -100.f, -4.0f, -3.5f, -3.0f,
148     MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f,
149     MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f,
150     MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f,
151     MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f,
152     MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f,
153     MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f,
154     MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN,
155     MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
156     MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
157     MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
158     MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
159     MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
160     MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, +NAN, +INFINITY, +FLT_MAX,
161     MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
162     MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
163     MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
164     MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
165     +1000.f, +100.f, +4.0f, +3.5f, +3.0f,
166     MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), +2.0f,
167     MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
168     MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
169     MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
170     MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
171     MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
172     MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
173     MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
174     MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
175     MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
176     MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
177 };
178 
179 // A table of more difficult cases to get right
180 std::vector<double> DataInitInfo::specialValuesDouble = {
181     -NAN, -INFINITY, -DBL_MAX,
182     MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
183     MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8),
184     MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12),
185     MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59),
186     MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
187     MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
188     MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28),
189     MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
190     MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
191     MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27),
192     MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
193     MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
194     -1000., -100., -4.0, -3.5, -3.0,
195     MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5,
196     MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0,
197     MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5,
198     MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0,
199     MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5,
200     MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25,
201     MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
202     -DBL_MIN,
203     MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
204     MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
205     MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
206     MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
207     MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
208     MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
209     MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
210     -0.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
211     MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59),
212     MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
213     MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
214     MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28),
215     MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
216     MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
217     MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27),
218     MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
219     MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
220     +1000., +100., +4.0, +3.5, +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5,
221     MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
222     +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
223     +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
224     +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
225     +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
226     +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
227     MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
228     MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
229     MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
230     MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
231     MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
232     MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
233     MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10),
234     MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
235     MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10),
236     MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1),
237     MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1),
238     MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1),
239     MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0),
240     MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0),
241     MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0),
242     MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1),
243     MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1),
244     MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1),
245     MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0),
246     MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52),
247     MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0),
248     MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1),
249     MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53),
250     MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1),
251     MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2),
252     MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54),
253     MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2),
254     MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10),
255     MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10),
256     MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
257     MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
258 };
259 // clang-format on
260 
261 
262 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
263 // (that's x87 default state).  This causes problems with the tests that
264 // convert long and ulong to float and double or otherwise deal with values
265 // that need more precision than 53-bit. So, set the x87 to 64-bit precision.
Force64BitFPUPrecision(void)266 static inline void Force64BitFPUPrecision(void)
267 {
268 #if __MINGW32__
269     // The usual method is to use _controlfp as follows:
270     //     #include <float.h>
271     //     _controlfp(_PC_64, _MCW_PC);
272     //
273     // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
274     // divergent code just use inline assembly which works for both.
275     unsigned short int orig_cw = 0;
276     unsigned short int new_cw = 0;
277     __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
278     new_cw = orig_cw | 0x0300; // set precision to 64-bit
279     __asm__ __volatile__("fldcw  %0" ::"m"(new_cw));
280 #else
281     /* Implement for other platforms if needed */
282 #endif
283 }
284 
285 
286 template <typename InType, typename OutType>
check_result(void * test,uint32_t count,int vectorSize)287 int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
288                                                   int vectorSize)
289 {
290     const cl_uchar *a = (const cl_uchar *)gAllowZ;
291 
292     if (std::is_integral<OutType>::value)
293     { // char/uchar/short/ushort/int/uint/long/ulong
294         const OutType *t = (const OutType *)test;
295         const OutType *c = (const OutType *)gRef;
296         for (uint32_t i = 0; i < count; i++)
297             if (t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (OutType)0))
298             {
299                 size_t s = sizeof(OutType) * 2;
300                 std::stringstream sstr;
301                 sstr << "\nError for vector size %d found at 0x%8.8x:  *0x%"
302                      << s << "." << s << "x vs 0x%" << s << "." << s << "x\n";
303                 vlog(sstr.str().c_str(), vectorSize, i, c[i], t[i]);
304                 return i + 1;
305             }
306     }
307     else if (std::is_same<OutType, cl_float>::value)
308     {
309         // cast to integral - from original test
310         const cl_uint *t = (const cl_uint *)test;
311         const cl_uint *c = (const cl_uint *)gRef;
312 
313         for (uint32_t i = 0; i < count; i++)
314             if (t[i] != c[i] &&
315                 // Allow nan's to be binary different
316                 !((t[i] & 0x7fffffffU) > 0x7f800000U
317                   && (c[i] & 0x7fffffffU) > 0x7f800000U)
318                 && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x80000000U)))
319             {
320                 vlog(
321                     "\nError for vector size %d found at 0x%8.8x:  *%a vs %a\n",
322                     vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
323                 return i + 1;
324             }
325     }
326     else
327     {
328         const cl_ulong *t = (const cl_ulong *)test;
329         const cl_ulong *c = (const cl_ulong *)gRef;
330 
331         for (uint32_t i = 0; i < count; i++)
332             if (t[i] != c[i] &&
333                 // Allow nan's to be binary different
334                 !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL
335                   && (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL)
336                 && !(a[i] != (cl_uchar)0
337                      && t[i] == (c[i] & 0x8000000000000000ULL)))
338             {
339                 vlog(
340                     "\nError for vector size %d found at 0x%8.8x:  *%a vs %a\n",
341                     vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
342                 return i + 1;
343             }
344     }
345 
346     return 0;
347 }
348 
349 
RoundUpToNextPowerOfTwo(cl_uint x)350 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
351 {
352     if (0 == (x & (x - 1))) return x;
353 
354     while (x & (x - 1)) x &= x - 1;
355 
356     return x + x;
357 }
358 
359 
Run()360 cl_int CustomConversionsTest::Run()
361 {
362     int startMinVectorSize = gMinVectorSize;
363     Type inType, outType;
364     RoundingMode round;
365     SaturationMode sat;
366 
367     for (int i = 0; i < argCount; i++)
368     {
369         if (conv_test::GetTestCase(argList[i], &outType, &inType, &sat, &round))
370         {
371             vlog_error("\n\t\t**** ERROR:  Unable to parse function name "
372                        "%s.  Skipping....  *****\n\n",
373                        argList[i]);
374             continue;
375         }
376 
377         // skip double if we don't have it
378         if (!gTestDouble && (inType == kdouble || outType == kdouble))
379         {
380             if (gHasDouble)
381             {
382                 vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
383                            gTypeNames[outType], gSaturationNames[sat],
384                            gRoundingModeNames[round], gTypeNames[inType]);
385                 vlog("\t\tcl_khr_fp64 enabled, but double testing turned "
386                      "off.\n");
387             }
388             continue;
389         }
390 
391         // skip longs on embedded
392         if (!gHasLong
393             && (inType == klong || outType == klong || inType == kulong
394                 || outType == kulong))
395         {
396             continue;
397         }
398 
399         // Skip the implicit converts if the rounding mode is not default or
400         // test is saturated
401         if (0 == startMinVectorSize)
402         {
403             if (sat || round != kDefaultRoundingMode)
404                 gMinVectorSize = 1;
405             else
406                 gMinVectorSize = 0;
407         }
408 
409         IterOverSelectedTypes iter(typeIterator, *this, inType, outType, round,
410                                    sat);
411 
412         iter.Run();
413 
414         if (gFailCount)
415         {
416             vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
417                        gTypeNames[outType], gSaturationNames[sat],
418                        gRoundingModeNames[round], gTypeNames[inType]);
419         }
420     }
421 
422     return gFailCount;
423 }
424 
425 
ConversionsTest(cl_device_id device,cl_context context,cl_command_queue queue)426 ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
427                                  cl_command_queue queue)
428     : context(context), device(device), queue(queue), num_elements(0),
429       typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
430                      cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
431                      cl_ulong(0), cl_long(0) })
432 {}
433 
434 
Run()435 cl_int ConversionsTest::Run()
436 {
437     IterOverTypes iter(typeIterator, *this);
438 
439     iter.Run();
440 
441     return gFailCount;
442 }
443 
444 
SetUp(int elements)445 cl_int ConversionsTest::SetUp(int elements)
446 {
447     num_elements = elements;
448     return CL_SUCCESS;
449 }
450 
451 
452 template <typename InType, typename OutType>
TestTypesConversion(const Type & inType,const Type & outType,int & testNumber,int startMinVectorSize)453 void ConversionsTest::TestTypesConversion(const Type &inType,
454                                           const Type &outType, int &testNumber,
455                                           int startMinVectorSize)
456 {
457     SaturationMode sat;
458     RoundingMode round;
459     int error;
460 
461     // skip longs on embedded
462     if (!gHasLong
463         && (inType == klong || outType == klong || inType == kulong
464             || outType == kulong))
465     {
466         return;
467     }
468 
469     for (sat = (SaturationMode)0; sat < kSaturationModeCount;
470          sat = (SaturationMode)(sat + 1))
471     {
472         // skip illegal saturated conversions to float type
473         if (kSaturated == sat && (outType == kfloat || outType == kdouble))
474         {
475             continue;
476         }
477 
478         for (round = (RoundingMode)0; round < kRoundingModeCount;
479              round = (RoundingMode)(round + 1))
480         {
481             if (++testNumber < gStartTestNumber)
482             {
483                 continue;
484             }
485             else
486             {
487                 if (gEndTestNumber > 0 && testNumber >= gEndTestNumber) return;
488             }
489 
490             vlog("%d) Testing convert_%sn%s%s( %sn ):\n", testNumber,
491                  gTypeNames[outType], gSaturationNames[sat],
492                  gRoundingModeNames[round], gTypeNames[inType]);
493 
494             // skip double if we don't have it
495             if (!gTestDouble && (inType == kdouble || outType == kdouble))
496             {
497                 if (gHasDouble)
498                 {
499                     vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
500                                "FAILED ** \n",
501                                testNumber, gTypeNames[outType],
502                                gSaturationNames[sat], gRoundingModeNames[round],
503                                gTypeNames[inType]);
504                     vlog("\t\tcl_khr_fp64 enabled, but double "
505                          "testing turned off.\n");
506                 }
507                 continue;
508             }
509 
510             // Skip the implicit converts if the rounding mode is
511             // not default or test is saturated
512             if (0 == startMinVectorSize)
513             {
514                 if (sat || round != kDefaultRoundingMode)
515                     gMinVectorSize = 1;
516                 else
517                     gMinVectorSize = 0;
518             }
519 
520             if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
521             {
522                 vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
523                            "FAILED ** \n",
524                            testNumber, gTypeNames[outType],
525                            gSaturationNames[sat], gRoundingModeNames[round],
526                            gTypeNames[inType]);
527             }
528         }
529     }
530 }
531 
532 
533 template <typename InType, typename OutType>
DoTest(Type outType,Type inType,SaturationMode sat,RoundingMode round)534 int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
535                             RoundingMode round)
536 {
537 #ifdef __APPLE__
538     cl_ulong wall_start = mach_absolute_time();
539 #endif
540 
541     cl_uint threads = GetThreadCount();
542 
543     DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
544     DataInfoSpec<InType, OutType> init_info(info);
545     WriteInputBufferInfo writeInputBufferInfo;
546     int vectorSize;
547     int error = 0;
548     uint64_t i;
549 
550     gTestCount++;
551     size_t blockCount =
552         BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
553     size_t step = blockCount;
554 
555     for (i = 0; i < threads; i++)
556     {
557         init_info.mdv.emplace_back(MTdataHolder(gRandomSeed));
558     }
559 
560     writeInputBufferInfo.outType = outType;
561     writeInputBufferInfo.inType = inType;
562 
563     writeInputBufferInfo.calcInfo.resize(gMaxVectorSize);
564     for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
565     {
566         writeInputBufferInfo.calcInfo[vectorSize].reset(
567             new CalcRefValsPat<InType, OutType>());
568         writeInputBufferInfo.calcInfo[vectorSize]->program =
569             conv_test::MakeProgram(
570                 outType, inType, sat, round, vectorSize,
571                 &writeInputBufferInfo.calcInfo[vectorSize]->kernel);
572         if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->program)
573         {
574             gFailCount++;
575             return -1;
576         }
577         if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->kernel)
578         {
579             gFailCount++;
580             vlog_error("\t\tFAILED -- Failed to create kernel.\n");
581             return -2;
582         }
583 
584         writeInputBufferInfo.calcInfo[vectorSize]->parent =
585             &writeInputBufferInfo;
586         writeInputBufferInfo.calcInfo[vectorSize]->vectorSize = vectorSize;
587         writeInputBufferInfo.calcInfo[vectorSize]->result = -1;
588     }
589 
590     if (gSkipTesting) return error;
591 
592     // Patch up rounding mode if default is RTZ
593     // We leave the part above in default rounding mode so that the right kernel
594     // is compiled.
595     if (std::is_same<OutType, cl_float>::value)
596     {
597         if (round == kDefaultRoundingMode && gIsRTZ)
598             init_info.round = round = kRoundTowardZero;
599     }
600 
601     // Figure out how many elements are in a work block
602     // we handle 64-bit types a bit differently.
603     uint64_t lastCase = (8 * gTypeSizes[inType] > 32)
604         ? 0x100000000ULL
605         : 1ULL << (8 * gTypeSizes[inType]);
606 
607     if (!gWimpyMode && gIsEmbedded)
608         step = blockCount * EMBEDDED_REDUCTION_FACTOR;
609 
610     if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
611     vlog("Testing... ");
612     fflush(stdout);
613     for (i = 0; i < (uint64_t)lastCase; i += step)
614     {
615 
616         if (0 == (i & ((lastCase >> 3) - 1)))
617         {
618             vlog(".");
619             fflush(stdout);
620         }
621 
622         cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
623         writeInputBufferInfo.count = count;
624 
625         // Crate a user event to represent the status of the reference value
626         // computation completion
627         writeInputBufferInfo.calcReferenceValues =
628             clCreateUserEvent(gContext, &error);
629         if (error || NULL == writeInputBufferInfo.calcReferenceValues)
630         {
631             vlog_error("ERROR: Unable to create user event. (%d)\n", error);
632             gFailCount++;
633             return error;
634         }
635 
636         // retain for consumption by MapOutputBufferComplete
637         for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
638              vectorSize++)
639         {
640             if ((error =
641                      clRetainEvent(writeInputBufferInfo.calcReferenceValues)))
642             {
643                 vlog_error("ERROR: Unable to retain user event. (%d)\n", error);
644                 gFailCount++;
645                 return error;
646             }
647         }
648 
649         // Crate a user event to represent when the callbacks are done verifying
650         // correctness
651         writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error);
652         if (error || NULL == writeInputBufferInfo.doneBarrier)
653         {
654             vlog_error("ERROR: Unable to create user event for barrier. (%d)\n",
655                        error);
656             gFailCount++;
657             return error;
658         }
659 
660         // retain for use by the callback that calls this
661         if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier)))
662         {
663             vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n",
664                        error);
665             gFailCount++;
666             return error;
667         }
668 
669         //      Call this in a multithreaded manner
670         cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2;
671         init_info.start = i;
672         init_info.size = count / chunks;
673         if (init_info.size < 16384)
674         {
675             chunks = RoundUpToNextPowerOfTwo(threads);
676             init_info.size = count / chunks;
677             if (init_info.size < 16384)
678             {
679                 init_info.size = count;
680                 chunks = 1;
681             }
682         }
683 
684         ThreadPool_Do(conv_test::InitData, chunks, &init_info);
685 
686         // Copy the results to the device
687         if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
688                                           count * gTypeSizes[inType], gIn, 0,
689                                           NULL, NULL)))
690         {
691             vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error);
692             gFailCount++;
693             return error;
694         }
695 
696         // Call completion callback for the write, which will enqueue the rest
697         // of the work.
698         conv_test::WriteInputBufferComplete((void *)&writeInputBufferInfo);
699 
700         // Make sure the work is actually running, so we don't deadlock
701         if ((error = clFlush(gQueue)))
702         {
703             vlog_error("clFlush failed with error %d\n", error);
704             gFailCount++;
705             return error;
706         }
707 
708         ThreadPool_Do(conv_test::PrepareReference, chunks, &init_info);
709 
710         // signal we are done calculating the reference results
711         if ((error = clSetUserEventStatus(
712                  writeInputBufferInfo.calcReferenceValues, CL_COMPLETE)))
713         {
714             vlog_error(
715                 "Error:  Failed to set user event status to CL_COMPLETE:  %d\n",
716                 error);
717             gFailCount++;
718             return error;
719         }
720 
721         // Wait for the event callbacks to finish verifying correctness.
722         if ((error = clWaitForEvents(
723                  1, (cl_event *)&writeInputBufferInfo.doneBarrier)))
724         {
725             vlog_error("Error:  Failed to wait for barrier:  %d\n", error);
726             gFailCount++;
727             return error;
728         }
729 
730         if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues)))
731         {
732             vlog_error("Error:  Failed to release calcReferenceValues:  %d\n",
733                        error);
734             gFailCount++;
735             return error;
736         }
737 
738         if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier)))
739         {
740             vlog_error("Error:  Failed to release done barrier:  %d\n", error);
741             gFailCount++;
742             return error;
743         }
744 
745         for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
746              vectorSize++)
747         {
748             if ((error = writeInputBufferInfo.calcInfo[vectorSize]->result))
749             {
750                 switch (inType)
751                 {
752                     case kuchar:
753                     case kchar:
754                         vlog("Input value: 0x%2.2x ",
755                              ((unsigned char *)gIn)[error - 1]);
756                         break;
757                     case kushort:
758                     case kshort:
759                         vlog("Input value: 0x%4.4x ",
760                              ((unsigned short *)gIn)[error - 1]);
761                         break;
762                     case kuint:
763                     case kint:
764                         vlog("Input value: 0x%8.8x ",
765                              ((unsigned int *)gIn)[error - 1]);
766                         break;
767                     case kfloat:
768                         vlog("Input value: %a ", ((float *)gIn)[error - 1]);
769                         break;
770                     case kulong:
771                     case klong:
772                         vlog("Input value: 0x%16.16llx ",
773                              ((unsigned long long *)gIn)[error - 1]);
774                         break;
775                     case kdouble:
776                         vlog("Input value: %a ", ((double *)gIn)[error - 1]);
777                         break;
778                     default:
779                         vlog_error("Internal error at %s: %d\n", __FILE__,
780                                    __LINE__);
781                         abort();
782                         break;
783                 }
784 
785                 // tell the user which conversion it was.
786                 if (0 == vectorSize)
787                     vlog(" (implicit scalar conversion from %s to %s)\n",
788                          gTypeNames[inType], gTypeNames[outType]);
789                 else
790                     vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType],
791                          sizeNames[vectorSize], gSaturationNames[sat],
792                          gRoundingModeNames[round], gTypeNames[inType],
793                          sizeNames[vectorSize]);
794 
795                 gFailCount++;
796                 return error;
797             }
798         }
799     }
800 
801     log_info("done.\n");
802 
803     if (gTimeResults)
804     {
805         // Kick off tests for the various vector lengths
806         for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
807              vectorSize++)
808         {
809             size_t workItemCount = blockCount / vectorSizes[vectorSize];
810             if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4)
811                 workItemCount /=
812                     4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
813 
814             double sum = 0.0;
815             double bestTime = INFINITY;
816             cl_uint k;
817             for (k = 0; k < PERF_LOOP_COUNT; k++)
818             {
819                 uint64_t startTime = conv_test::GetTime();
820                 if ((error = conv_test::RunKernel(
821                          writeInputBufferInfo.calcInfo[vectorSize]->kernel,
822                          gInBuffer, gOutBuffers[vectorSize], workItemCount)))
823                 {
824                     gFailCount++;
825                     return error;
826                 }
827 
828                 // Make sure OpenCL is done
829                 if ((error = clFinish(gQueue)))
830                 {
831                     vlog_error("Error %d at clFinish\n", error);
832                     return error;
833                 }
834 
835                 uint64_t endTime = conv_test::GetTime();
836                 double time = SubtractTime(endTime, startTime);
837                 sum += time;
838                 if (time < bestTime) bestTime = time;
839             }
840 
841             if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
842             double clocksPerOp = bestTime * (double)gDeviceFrequency
843                 * gComputeDevices * gSimdSize * 1e6
844                 / (workItemCount * vectorSizes[vectorSize]);
845             if (0 == vectorSize)
846                 vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
847                           "implicit convert %s -> %s", gTypeNames[inType],
848                           gTypeNames[outType]);
849             else
850                 vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
851                           "convert_%s%s%s%s( %s%s )", gTypeNames[outType],
852                           sizeNames[vectorSize], gSaturationNames[sat],
853                           gRoundingModeNames[round], gTypeNames[inType],
854                           sizeNames[vectorSize]);
855         }
856     }
857 
858     if (gWimpyMode)
859         vlog("\tWimp pass");
860     else
861         vlog("\tpassed");
862 
863 #ifdef __APPLE__
864     // record the run time
865     vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start));
866 #endif
867     vlog("\n\n");
868     fflush(stdout);
869 
870     return error;
871 }
872 
873 #if !defined(__APPLE__)
874 void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
875 #endif
876 
877 #if defined(_MSC_VER)
878 /* function is defined in "compat.h" */
879 #else
SubtractTime(uint64_t endTime,uint64_t startTime)880 double SubtractTime(uint64_t endTime, uint64_t startTime)
881 {
882     uint64_t diff = endTime - startTime;
883     static double conversion = 0.0;
884 
885     if (0.0 == conversion)
886     {
887 #if defined(__APPLE__)
888         mach_timebase_info_data_t info = { 0, 0 };
889         kern_return_t err = mach_timebase_info(&info);
890         if (0 == err)
891             conversion = 1e-9 * (double)info.numer / (double)info.denom;
892 #else
893         // This function consumes output from GetTime() above, and converts the
894         // time to secionds.
895 #warning need accurate ticks to seconds conversion factor here. Times are invalid.
896 #endif
897     }
898 
899     // strictly speaking we should also be subtracting out timer latency here
900     return conversion * (double)diff;
901 }
902 #endif
903 
904 ////////////////////////////////////////////////////////////////////////////////
905 
setAllowZ(uint8_t * allow,uint32_t * x,cl_uint count)906 static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
907 {
908     cl_uint i;
909     for (i = 0; i < count; ++i)
910         allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
911 }
912 
913 
914 void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
915 
916 void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
917                                              void *data);
918 
919 // Note: May be called reentrantly
MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> & info)920 void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
921 {
922     cl_int status;
923     // CalcRefValsBase *info = (CalcRefValsBase *)data;
924     cl_event calcReferenceValues = info->parent->calcReferenceValues;
925 
926     // we know that the map is done, wait for the main thread to finish
927     // calculating the reference values
928     if ((status =
929              clSetEventCallback(calcReferenceValues, CL_COMPLETE,
930                                 CalcReferenceValuesComplete, (void *)&info)))
931     {
932         vlog_error("ERROR: clSetEventCallback failed in "
933                    "MapResultValuesComplete with status: %d\n",
934                    status);
935         gFailCount++; // not thread safe -- being lazy here
936     }
937 
938     // this thread no longer needs its reference to info->calcReferenceValues,
939     // so release it
940     if ((status = clReleaseEvent(calcReferenceValues)))
941     {
942         vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed "
943                    "with status: %d\n",
944                    status);
945         gFailCount++; // not thread safe -- being lazy here
946     }
947 
948     // no need to flush since we didn't enqueue anything
949 
950     // e was already released by WriteInputBufferComplete. It should be
951     // destroyed automatically soon after we exit.
952 }
953 
954 
CalcReferenceValuesComplete(cl_event e,cl_int status,void * data)955 void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
956                                              void *data)
957 {
958     std::unique_ptr<CalcRefValsBase> &info =
959         *(std::unique_ptr<CalcRefValsBase> *)data;
960 
961     cl_uint vectorSize = info->vectorSize;
962     cl_uint count = info->parent->count;
963     Type outType =
964         info->parent->outType; // the data type of the conversion result
965     Type inType = info->parent->inType; // the data type of the conversion input
966     size_t j;
967     cl_int error;
968     cl_event doneBarrier = info->parent->doneBarrier;
969 
970     // report spurious error condition
971     if (CL_SUCCESS != status)
972     {
973         vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n",
974                    status);
975         gFailCount++; // lazy about thread safety here
976         return;
977     }
978 
979     // Now we know that both results have been mapped back from the device, and
980     // the main thread is done calculating the reference results. It is now time
981     // to check the results.
982 
983     // verify results
984     void *mapped = info->p;
985 
986     // Patch up NaNs conversions to integer to zero -- these can be converted to
987     // any integer
988     if (outType != kfloat && outType != kdouble)
989     {
990         if (inType == kfloat)
991         {
992             float *inp = (float *)gIn;
993             for (j = 0; j < count; j++)
994             {
995                 if (isnan(inp[j]))
996                     memset((char *)mapped + j * gTypeSizes[outType], 0,
997                            gTypeSizes[outType]);
998             }
999         }
1000         if (inType == kdouble)
1001         {
1002             double *inp = (double *)gIn;
1003             for (j = 0; j < count; j++)
1004             {
1005                 if (isnan(inp[j]))
1006                     memset((char *)mapped + j * gTypeSizes[outType], 0,
1007                            gTypeSizes[outType]);
1008             }
1009         }
1010     }
1011     else if (inType == kfloat || inType == kdouble)
1012     { // outtype and intype is float or double.  NaN conversions for float <->
1013       // double can be any NaN
1014         if (inType == kfloat && outType == kdouble)
1015         {
1016             float *inp = (float *)gIn;
1017             double *outp = (double *)mapped;
1018             for (j = 0; j < count; j++)
1019             {
1020                 if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
1021             }
1022         }
1023         if (inType == kdouble && outType == kfloat)
1024         {
1025             double *inp = (double *)gIn;
1026             float *outp = (float *)mapped;
1027             for (j = 0; j < count; j++)
1028             {
1029                 if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
1030             }
1031         }
1032     }
1033 
1034     if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
1035         info->result =
1036             info->check_result(mapped, count, vectorSizes[vectorSize]);
1037     else
1038         info->result = 0;
1039 
1040     // Fill the output buffer with junk and release it
1041     {
1042         cl_uint pattern = 0xffffdead;
1043         memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]);
1044         if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize],
1045                                              mapped, 0, NULL, NULL)))
1046         {
1047             vlog_error("ERROR: clEnqueueUnmapMemObject failed in "
1048                        "CalcReferenceValuesComplete  (%d)\n",
1049                        error);
1050             gFailCount++;
1051         }
1052     }
1053 
1054     if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1))
1055     {
1056         if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE)))
1057         {
1058             vlog_error("ERROR: clSetUserEventStatus failed in "
1059                        "CalcReferenceValuesComplete (err: %d). We're probably "
1060                        "going to deadlock.\n",
1061                        status);
1062             gFailCount++;
1063             return;
1064         }
1065 
1066         if ((status = clReleaseEvent(doneBarrier)))
1067         {
1068             vlog_error("ERROR: clReleaseEvent failed in "
1069                        "CalcReferenceValuesComplete (err: %d).\n",
1070                        status);
1071             gFailCount++;
1072             return;
1073         }
1074     }
1075     // e was already released by WriteInputBufferComplete. It should be
1076     // destroyed automatically soon after all the calls to
1077     // CalcReferenceValuesComplete exit.
1078 }
1079 
1080 //
1081 
1082 namespace conv_test {
1083 
1084 ////////////////////////////////////////////////////////////////////////////////
1085 
InitData(cl_uint job_id,cl_uint thread_id,void * p)1086 cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
1087 {
1088     DataInitBase *info = (DataInitBase *)p;
1089 
1090     info->init(job_id, thread_id);
1091 
1092     return CL_SUCCESS;
1093 }
1094 
1095 ////////////////////////////////////////////////////////////////////////////////
1096 
PrepareReference(cl_uint job_id,cl_uint thread_id,void * p)1097 cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
1098 {
1099     DataInitBase *info = (DataInitBase *)p;
1100 
1101     cl_uint count = info->size;
1102     Type inType = info->inType;
1103     Type outType = info->outType;
1104     RoundingMode round = info->round;
1105     size_t j;
1106 
1107     Force64BitFPUPrecision();
1108 
1109     void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType];
1110     void *a = (cl_uchar *)gAllowZ + job_id * count;
1111     void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
1112 
1113 
1114     if (outType != inType)
1115     {
1116         // create the reference while we wait
1117 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
1118         /* ARM VFP doesn't have hardware instruction for converting from 64-bit
1119          * integer to float types, hence GCC ARM uses the floating-point
1120          * emulation code despite which -mfloat-abi setting it is. But the
1121          * emulation code in libgcc.a has only one rounding mode (round to
1122          * nearest even in this case) and ignores the user rounding mode setting
1123          * in hardware. As a result setting rounding modes in hardware won't
1124          * give correct rounding results for type covert from 64-bit integer to
1125          * float using GCC for ARM compiler so for testing different rounding
1126          * modes, we need to use alternative reference function. ARM64 does have
1127          * an instruction, however we cannot guarantee the compiler will use it.
1128          * On all ARM architechures use emulation to calculate reference.*/
1129         switch (round)
1130         {
1131             /* conversions to floating-point type use the current rounding mode.
1132              * The only default floating-point rounding mode supported is round
1133              * to nearest even i.e the current rounding mode will be _rte for
1134              * floating-point types. */
1135             case kDefaultRoundingMode: qcom_rm = qcomRTE; break;
1136             case kRoundToNearestEven: qcom_rm = qcomRTE; break;
1137             case kRoundUp: qcom_rm = qcomRTP; break;
1138             case kRoundDown: qcom_rm = qcomRTN; break;
1139             case kRoundTowardZero: qcom_rm = qcomRTZ; break;
1140             default:
1141                 vlog_error("ERROR: undefined rounding mode %d\n", round);
1142                 break;
1143         }
1144         qcom_sat = info->sat;
1145 #endif
1146 
1147         RoundingMode oldRound = set_round(round, outType);
1148 
1149         if (info->sat)
1150             info->conv_array_sat(d, s, count);
1151         else
1152             info->conv_array(d, s, count);
1153 
1154         set_round(oldRound, outType);
1155 
1156         // Decide if we allow a zero result in addition to the correctly rounded
1157         // one
1158         memset(a, 0, count);
1159         if (gForceFTZ)
1160         {
1161             if (inType == kfloat || outType == kfloat)
1162                 setAllowZ((uint8_t *)a, (uint32_t *)s, count);
1163         }
1164     }
1165     else
1166     {
1167         // Copy the input to the reference
1168         memcpy(d, s, info->size * gTypeSizes[inType]);
1169     }
1170 
1171     // Patch up NaNs conversions to integer to zero -- these can be converted to
1172     // any integer
1173     if (info->outType != kfloat && info->outType != kdouble)
1174     {
1175         if (inType == kfloat)
1176         {
1177             float *inp = (float *)s;
1178             for (j = 0; j < count; j++)
1179             {
1180                 if (isnan(inp[j]))
1181                     memset((char *)d + j * gTypeSizes[outType], 0,
1182                            gTypeSizes[outType]);
1183             }
1184         }
1185         if (inType == kdouble)
1186         {
1187             double *inp = (double *)s;
1188             for (j = 0; j < count; j++)
1189             {
1190                 if (isnan(inp[j]))
1191                     memset((char *)d + j * gTypeSizes[outType], 0,
1192                            gTypeSizes[outType]);
1193             }
1194         }
1195     }
1196     else if (inType == kfloat || inType == kdouble)
1197     { // outtype and intype is float or double.  NaN conversions for float <->
1198       // double can be any NaN
1199         if (inType == kfloat && outType == kdouble)
1200         {
1201             float *inp = (float *)s;
1202             for (j = 0; j < count; j++)
1203             {
1204                 if (isnan(inp[j])) ((double *)d)[j] = NAN;
1205             }
1206         }
1207         if (inType == kdouble && outType == kfloat)
1208         {
1209             double *inp = (double *)s;
1210             for (j = 0; j < count; j++)
1211             {
1212                 if (isnan(inp[j])) ((float *)d)[j] = NAN;
1213             }
1214         }
1215     }
1216 
1217     return CL_SUCCESS;
1218 }
1219 
1220 ////////////////////////////////////////////////////////////////////////////////
1221 
GetTime(void)1222 uint64_t GetTime(void)
1223 {
1224 #if defined(__APPLE__)
1225     return mach_absolute_time();
1226 #elif defined(_MSC_VER)
1227     return ReadTime();
1228 #else
1229     // mach_absolute_time is a high precision timer with precision < 1
1230     // microsecond.
1231 #warning need accurate clock here.  Times are invalid.
1232     return 0;
1233 #endif
1234 }
1235 
1236 ////////////////////////////////////////////////////////////////////////////////
1237 
1238 // Note: not called reentrantly
WriteInputBufferComplete(void * data)1239 void WriteInputBufferComplete(void *data)
1240 {
1241     cl_int status;
1242     WriteInputBufferInfo *info = (WriteInputBufferInfo *)data;
1243     cl_uint count = info->count;
1244     int vectorSize;
1245 
1246     info->barrierCount = gMaxVectorSize - gMinVectorSize;
1247 
1248     // now that we know that the write buffer is complete, enqueue callbacks to
1249     // wait for the main thread to finish calculating the reference results.
1250     for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
1251     {
1252         size_t workItemCount =
1253             (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]);
1254 
1255         if ((status = conv_test::RunKernel(info->calcInfo[vectorSize]->kernel,
1256                                            gInBuffer, gOutBuffers[vectorSize],
1257                                            workItemCount)))
1258         {
1259             gFailCount++;
1260             return;
1261         }
1262 
1263         info->calcInfo[vectorSize]->p = clEnqueueMapBuffer(
1264             gQueue, gOutBuffers[vectorSize], CL_TRUE,
1265             CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0,
1266             NULL, NULL, &status);
1267         {
1268             if (status)
1269             {
1270                 vlog_error("ERROR: WriteInputBufferComplete calback failed "
1271                            "with status: %d\n",
1272                            status);
1273                 gFailCount++;
1274                 return;
1275             }
1276         }
1277     }
1278 
1279     for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
1280     {
1281         MapResultValuesComplete(info->calcInfo[vectorSize]);
1282     }
1283 
1284     // Make sure the work starts moving -- otherwise we may deadlock
1285     if ((status = clFlush(gQueue)))
1286     {
1287         vlog_error(
1288             "ERROR: WriteInputBufferComplete calback failed with status: %d\n",
1289             status);
1290         gFailCount++;
1291         return;
1292     }
1293 
1294     // e was already released by the main thread. It should be destroyed
1295     // automatically soon after we exit.
1296 }
1297 
1298 ////////////////////////////////////////////////////////////////////////////////
1299 
MakeProgram(Type outType,Type inType,SaturationMode sat,RoundingMode round,int vectorSize,cl_kernel * outKernel)1300 cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
1301                        RoundingMode round, int vectorSize, cl_kernel *outKernel)
1302 {
1303     cl_program program;
1304     char testName[256];
1305     int error = 0;
1306 
1307     std::ostringstream source;
1308     if (outType == kdouble || inType == kdouble)
1309         source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
1310 
1311     // Create the program. This is a bit complicated because we are trying to
1312     // avoid byte and short stores.
1313     if (0 == vectorSize)
1314     {
1315         // Create the type names.
1316         char inName[32];
1317         char outName[32];
1318         strncpy(inName, gTypeNames[inType], sizeof(inName));
1319         strncpy(outName, gTypeNames[outType], sizeof(outName));
1320         sprintf(testName, "test_implicit_%s_%s", outName, inName);
1321 
1322         source << "__kernel void " << testName << "( __global " << inName
1323                << " *src, __global " << outName << " *dest )\n";
1324         source << "{\n";
1325         source << "   size_t i = get_global_id(0);\n";
1326         source << "   dest[i] =  src[i];\n";
1327         source << "}\n";
1328 
1329         vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType],
1330              gTypeNames[outType]);
1331         fflush(stdout);
1332     }
1333     else
1334     {
1335         int vectorSizetmp = vectorSizes[vectorSize];
1336 
1337         // Create the type names.
1338         char convertString[128];
1339         char inName[32];
1340         char outName[32];
1341         switch (vectorSizetmp)
1342         {
1343             case 1:
1344                 strncpy(inName, gTypeNames[inType], sizeof(inName));
1345                 strncpy(outName, gTypeNames[outType], sizeof(outName));
1346                 snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
1347                          outName, gSaturationNames[sat],
1348                          gRoundingModeNames[round]);
1349                 snprintf(testName, 256, "test_%s_%s", convertString, inName);
1350                 vlog("Building %s( %s ) test\n", convertString, inName);
1351                 break;
1352             case 3:
1353                 strncpy(inName, gTypeNames[inType], sizeof(inName));
1354                 strncpy(outName, gTypeNames[outType], sizeof(outName));
1355                 snprintf(convertString, sizeof(convertString),
1356                          "convert_%s3%s%s", outName, gSaturationNames[sat],
1357                          gRoundingModeNames[round]);
1358                 snprintf(testName, 256, "test_%s_%s3", convertString, inName);
1359                 vlog("Building %s( %s3 ) test\n", convertString, inName);
1360                 break;
1361             default:
1362                 snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType],
1363                          vectorSizetmp);
1364                 snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType],
1365                          vectorSizetmp);
1366                 snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
1367                          outName, gSaturationNames[sat],
1368                          gRoundingModeNames[round]);
1369                 snprintf(testName, 256, "test_%s_%s", convertString, inName);
1370                 vlog("Building %s( %s ) test\n", convertString, inName);
1371                 break;
1372         }
1373         fflush(stdout);
1374 
1375         if (vectorSizetmp == 3)
1376         {
1377             source << "__kernel void " << testName << "( __global " << inName
1378                    << " *src, __global " << outName << " *dest )\n";
1379             source << "{\n";
1380             source << "   size_t i = get_global_id(0);\n";
1381             source << "   if( i + 1 < get_global_size(0))\n";
1382             source << "       vstore3( " << convertString
1383                    << "( vload3( i, src)), i, dest );\n";
1384             source << "   else\n";
1385             source << "   {\n";
1386             source << "       " << inName << "3 in;\n";
1387             source << "       " << outName << "3 out;\n";
1388             source << "       if( 0 == (i & 1) )\n";
1389             source << "           in.y = src[3*i+1];\n";
1390             source << "       in.x = src[3*i];\n";
1391             source << "       out = " << convertString << "( in ); \n";
1392             source << "       dest[3*i] = out.x;\n";
1393             source << "       if( 0 == (i & 1) )\n";
1394             source << "           dest[3*i+1] = out.y;\n";
1395             source << "   }\n";
1396             source << "}\n";
1397         }
1398         else
1399         {
1400             source << "__kernel void " << testName << "( __global " << inName
1401                    << " *src, __global " << outName << " *dest )\n";
1402             source << "{\n";
1403             source << "   size_t i = get_global_id(0);\n";
1404             source << "   dest[i] = " << convertString << "( src[i] );\n";
1405             source << "}\n";
1406         }
1407     }
1408     *outKernel = NULL;
1409 
1410     const char *flags = NULL;
1411     if (gForceFTZ) flags = "-cl-denorms-are-zero";
1412 
1413     // build it
1414     std::string sourceString = source.str();
1415     const char *programSource = sourceString.c_str();
1416     error = create_single_kernel_helper(gContext, &program, outKernel, 1,
1417                                         &programSource, testName, flags);
1418     if (error)
1419     {
1420         vlog_error("Failed to build kernel/program (err = %d).\n", error);
1421         return NULL;
1422     }
1423 
1424     return program;
1425 }
1426 
1427 //
1428 
RunKernel(cl_kernel kernel,void * inBuf,void * outBuf,size_t blockCount)1429 int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount)
1430 {
1431     // The global dimensions are just the blockCount to execute since we haven't
1432     // set up multiple queues for multiple devices.
1433     int error;
1434 
1435     error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf);
1436     error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf);
1437 
1438     if (error)
1439     {
1440         vlog_error("FAILED -- could not set kernel args (%d)\n", error);
1441         return error;
1442     }
1443 
1444     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount,
1445                                         NULL, 0, NULL, NULL)))
1446     {
1447         vlog_error("FAILED -- could not execute kernel (%d)\n", error);
1448         return error;
1449     }
1450 
1451     return 0;
1452 }
1453 
1454 
GetTestCase(const char * name,Type * outType,Type * inType,SaturationMode * sat,RoundingMode * round)1455 int GetTestCase(const char *name, Type *outType, Type *inType,
1456                 SaturationMode *sat, RoundingMode *round)
1457 {
1458     int i;
1459 
1460     // Find the return type
1461     for (i = 0; i < kTypeCount; i++)
1462         if (name == strstr(name, gTypeNames[i]))
1463         {
1464             *outType = (Type)i;
1465             name += strlen(gTypeNames[i]);
1466 
1467             break;
1468         }
1469 
1470     if (i == kTypeCount) return -1;
1471 
1472     // Check to see if _sat appears next
1473     *sat = (SaturationMode)0;
1474     for (i = 1; i < kSaturationModeCount; i++)
1475         if (name == strstr(name, gSaturationNames[i]))
1476         {
1477             *sat = (SaturationMode)i;
1478             name += strlen(gSaturationNames[i]);
1479             break;
1480         }
1481 
1482     *round = (RoundingMode)0;
1483     for (i = 1; i < kRoundingModeCount; i++)
1484         if (name == strstr(name, gRoundingModeNames[i]))
1485         {
1486             *round = (RoundingMode)i;
1487             name += strlen(gRoundingModeNames[i]);
1488             break;
1489         }
1490 
1491     if (*name != '_') return -2;
1492     name++;
1493 
1494     for (i = 0; i < kTypeCount; i++)
1495         if (name == strstr(name, gTypeNames[i]))
1496         {
1497             *inType = (Type)i;
1498             name += strlen(gTypeNames[i]);
1499 
1500             break;
1501         }
1502 
1503     if (i == kTypeCount) return -3;
1504 
1505     if (*name != '\0') return -4;
1506 
1507     return 0;
1508 }
1509 
1510 } // namespace conv_test
1511