1 // Copyright (c) 2015-2016 The Khronos Group Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <cfloat>
16 #include <cmath>
17 #include <cstdio>
18 #include <sstream>
19 #include <string>
20 #include <tuple>
21
22 #include <gmock/gmock.h>
23 #include "SPIRV/hex_float.h"
24
25 namespace {
26 using ::testing::Eq;
27 using spvutils::BitwiseCast;
28 using spvutils::Float16;
29 using spvutils::FloatProxy;
30 using spvutils::HexFloat;
31 using spvutils::ParseNormalFloat;
32
33 // In this file "encode" means converting a number into a string,
34 // and "decode" means converting a string into a number.
35
36 using HexFloatTest =
37 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
38 using DecodeHexFloatTest =
39 ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
40 using HexDoubleTest =
41 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
42 using DecodeHexDoubleTest =
43 ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
44
45 // Hex-encodes a float value.
46 template <typename T>
EncodeViaHexFloat(const T & value)47 std::string EncodeViaHexFloat(const T& value) {
48 std::stringstream ss;
49 ss << spvutils::HexFloat<T>(value);
50 return ss.str();
51 }
52
53 // The following two tests can't be DRY because they take different parameter
54 // types.
55
TEST_P(HexFloatTest,EncodeCorrectly)56 TEST_P(HexFloatTest, EncodeCorrectly) {
57 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
58 }
59
TEST_P(HexDoubleTest,EncodeCorrectly)60 TEST_P(HexDoubleTest, EncodeCorrectly) {
61 EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
62 }
63
64 // Decodes a hex-float string.
65 template <typename T>
Decode(const std::string & str)66 FloatProxy<T> Decode(const std::string& str) {
67 spvutils::HexFloat<FloatProxy<T>> decoded(0.f);
68 EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
69 return decoded.value();
70 }
71
TEST_P(HexFloatTest,DecodeCorrectly)72 TEST_P(HexFloatTest, DecodeCorrectly) {
73 EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
74 }
75
TEST_P(HexDoubleTest,DecodeCorrectly)76 TEST_P(HexDoubleTest, DecodeCorrectly) {
77 EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
78 }
79
80 INSTANTIATE_TEST_SUITE_P(
81 Float32Tests, HexFloatTest,
82 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
83 {0.f, "0x0p+0"},
84 {1.f, "0x1p+0"},
85 {2.f, "0x1p+1"},
86 {3.f, "0x1.8p+1"},
87 {0.5f, "0x1p-1"},
88 {0.25f, "0x1p-2"},
89 {0.75f, "0x1.8p-1"},
90 {-0.f, "-0x0p+0"},
91 {-1.f, "-0x1p+0"},
92 {-0.5f, "-0x1p-1"},
93 {-0.25f, "-0x1p-2"},
94 {-0.75f, "-0x1.8p-1"},
95
96 // Larger numbers
97 {512.f, "0x1p+9"},
98 {-512.f, "-0x1p+9"},
99 {1024.f, "0x1p+10"},
100 {-1024.f, "-0x1p+10"},
101 {1024.f + 8.f, "0x1.02p+10"},
102 {-1024.f - 8.f, "-0x1.02p+10"},
103
104 // Small numbers
105 {1.0f / 512.f, "0x1p-9"},
106 {1.0f / -512.f, "-0x1p-9"},
107 {1.0f / 1024.f, "0x1p-10"},
108 {1.0f / -1024.f, "-0x1p-10"},
109 {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
110 {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
111
112 // lowest non-denorm
113 {float(ldexp(1.0f, -126)), "0x1p-126"},
114 {float(ldexp(-1.0f, -126)), "-0x1p-126"},
115
116 // Denormalized values
117 {float(ldexp(1.0f, -127)), "0x1p-127"},
118 {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
119 {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
120 {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
121 {float(ldexp(-1.0f, -127)), "-0x1p-127"},
122 {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
123 {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
124 {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
125
126 {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
127 {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
128 "0x1.8p-128"},
129
130 })));
131
132 INSTANTIATE_TEST_SUITE_P(
133 Float32NanTests, HexFloatTest,
134 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
135 // Various NAN and INF cases
136 {uint32_t(0xFF800000), "-0x1p+128"}, // -inf
137 {uint32_t(0x7F800000), "0x1p+128"}, // inf
138 {uint32_t(0xFFC00000), "-0x1.8p+128"}, // -nan
139 {uint32_t(0xFF800100), "-0x1.0002p+128"}, // -nan
140 {uint32_t(0xFF800c00), "-0x1.0018p+128"}, // -nan
141 {uint32_t(0xFF80F000), "-0x1.01ep+128"}, // -nan
142 {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"}, // -nan
143 {uint32_t(0x7FC00000), "0x1.8p+128"}, // +nan
144 {uint32_t(0x7F800100), "0x1.0002p+128"}, // +nan
145 {uint32_t(0x7f800c00), "0x1.0018p+128"}, // +nan
146 {uint32_t(0x7F80F000), "0x1.01ep+128"}, // +nan
147 {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"}, // +nan
148 })));
149
150 INSTANTIATE_TEST_SUITE_P(
151 Float64Tests, HexDoubleTest,
152 ::testing::ValuesIn(
153 std::vector<std::pair<FloatProxy<double>, std::string>>({
154 {0., "0x0p+0"},
155 {1., "0x1p+0"},
156 {2., "0x1p+1"},
157 {3., "0x1.8p+1"},
158 {0.5, "0x1p-1"},
159 {0.25, "0x1p-2"},
160 {0.75, "0x1.8p-1"},
161 {-0., "-0x0p+0"},
162 {-1., "-0x1p+0"},
163 {-0.5, "-0x1p-1"},
164 {-0.25, "-0x1p-2"},
165 {-0.75, "-0x1.8p-1"},
166
167 // Larger numbers
168 {512., "0x1p+9"},
169 {-512., "-0x1p+9"},
170 {1024., "0x1p+10"},
171 {-1024., "-0x1p+10"},
172 {1024. + 8., "0x1.02p+10"},
173 {-1024. - 8., "-0x1.02p+10"},
174
175 // Large outside the range of normal floats
176 {ldexp(1.0, 128), "0x1p+128"},
177 {ldexp(1.0, 129), "0x1p+129"},
178 {ldexp(-1.0, 128), "-0x1p+128"},
179 {ldexp(-1.0, 129), "-0x1p+129"},
180 {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
181 {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
182 {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
183 {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
184
185 // Small numbers
186 {1.0 / 512., "0x1p-9"},
187 {1.0 / -512., "-0x1p-9"},
188 {1.0 / 1024., "0x1p-10"},
189 {1.0 / -1024., "-0x1p-10"},
190 {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
191 {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
192
193 // Small outside the range of normal floats
194 {ldexp(1.0, -128), "0x1p-128"},
195 {ldexp(1.0, -129), "0x1p-129"},
196 {ldexp(-1.0, -128), "-0x1p-128"},
197 {ldexp(-1.0, -129), "-0x1p-129"},
198 {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
199 {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
200 {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
201 {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
202
203 // lowest non-denorm
204 {ldexp(1.0, -1022), "0x1p-1022"},
205 {ldexp(-1.0, -1022), "-0x1p-1022"},
206
207 // Denormalized values
208 {ldexp(1.0, -1023), "0x1p-1023"},
209 {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
210 {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
211 {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
212 {ldexp(-1.0, -1024), "-0x1p-1024"},
213 {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
214 {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
215 {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
216
217 {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
218 {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
219 "0x1.8p-1024"},
220
221 })));
222
223 INSTANTIATE_TEST_SUITE_P(
224 Float64NanTests, HexDoubleTest,
225 ::testing::ValuesIn(std::vector<
226 std::pair<FloatProxy<double>, std::string>>({
227 // Various NAN and INF cases
228 {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"}, //-inf
229 {uint64_t(0x7FF0000000000000LL), "0x1p+1024"}, //+inf
230 {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"}, // -nan
231 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"}, // -nan
232 {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"}, // -nan
233 {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"}, // -nan
234 {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"}, // -nan
235 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"}, // +nan
236 {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"}, // +nan
237 {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"}, // -nan
238 {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"}, // -nan
239 {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"}, // -nan
240 })));
241
TEST(HexFloatStreamTest,OperatorLeftShiftPreservesFloatAndFill)242 TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
243 std::stringstream s;
244 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
245 << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
246 EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
247 }
248
TEST(HexDoubleStreamTest,OperatorLeftShiftPreservesFloatAndFill)249 TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
250 std::stringstream s;
251 s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
252 << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
253 << 9;
254 EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
255 }
256
TEST_P(DecodeHexFloatTest,DecodeCorrectly)257 TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
258 EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
259 }
260
TEST_P(DecodeHexDoubleTest,DecodeCorrectly)261 TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
262 EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
263 }
264
265 INSTANTIATE_TEST_SUITE_P(
266 Float32DecodeTests, DecodeHexFloatTest,
267 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
268 {"0x0p+000", 0.f},
269 {"0x0p0", 0.f},
270 {"0x0p-0", 0.f},
271
272 // flush to zero cases
273 {"0x1p-500", 0.f}, // Exponent underflows.
274 {"-0x1p-500", -0.f},
275 {"0x0.00000000001p-126", 0.f}, // Fraction causes underflow.
276 {"-0x0.0000000001p-127", -0.f},
277 {"-0x0.01p-142", -0.f}, // Fraction causes additional underflow.
278 {"0x0.01p-142", 0.f},
279
280 // Some floats that do not encode the same way as they decode.
281 {"0x2p+0", 2.f},
282 {"0xFFp+0", 255.f},
283 {"0x0.8p+0", 0.5f},
284 {"0x0.4p+0", 0.25f},
285 })));
286
287 INSTANTIATE_TEST_SUITE_P(
288 Float32DecodeInfTests, DecodeHexFloatTest,
289 ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
290 // inf cases
291 {"-0x1p+128", uint32_t(0xFF800000)}, // -inf
292 {"0x32p+127", uint32_t(0x7F800000)}, // inf
293 {"0x32p+500", uint32_t(0x7F800000)}, // inf
294 {"-0x32p+127", uint32_t(0xFF800000)}, // -inf
295 })));
296
297 INSTANTIATE_TEST_SUITE_P(
298 Float64DecodeTests, DecodeHexDoubleTest,
299 ::testing::ValuesIn(
300 std::vector<std::pair<std::string, FloatProxy<double>>>({
301 {"0x0p+000", 0.},
302 {"0x0p0", 0.},
303 {"0x0p-0", 0.},
304
305 // flush to zero cases
306 {"0x1p-5000", 0.}, // Exponent underflows.
307 {"-0x1p-5000", -0.},
308 {"0x0.0000000000000001p-1023", 0.}, // Fraction causes underflow.
309 {"-0x0.000000000000001p-1024", -0.},
310 {"-0x0.01p-1090", -0.f}, // Fraction causes additional underflow.
311 {"0x0.01p-1090", 0.},
312
313 // Some floats that do not encode the same way as they decode.
314 {"0x2p+0", 2.},
315 {"0xFFp+0", 255.},
316 {"0x0.8p+0", 0.5},
317 {"0x0.4p+0", 0.25},
318 })));
319
320 INSTANTIATE_TEST_SUITE_P(
321 Float64DecodeInfTests, DecodeHexDoubleTest,
322 ::testing::ValuesIn(
323 std::vector<std::pair<std::string, FloatProxy<double>>>({
324 // inf cases
325 {"-0x1p+1024", uint64_t(0xFFF0000000000000)}, // -inf
326 {"0x32p+1023", uint64_t(0x7FF0000000000000)}, // inf
327 {"0x32p+5000", uint64_t(0x7FF0000000000000)}, // inf
328 {"-0x32p+1023", uint64_t(0xFFF0000000000000)}, // -inf
329 })));
330
TEST(FloatProxy,ValidConversion)331 TEST(FloatProxy, ValidConversion) {
332 EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
333 EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
334 EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
335 EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
336 EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
337 EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
338
339 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
340 EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
341 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
342 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
343 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
344 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
345 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
346 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
347 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
348 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
349 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
350 EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
351
352 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
353 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
354 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
355 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
356 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
357 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
358 EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
359 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
360 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
361 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
362 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
363 EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
364 }
365
TEST(FloatProxy,Nan)366 TEST(FloatProxy, Nan) {
367 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
368 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
369 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
370 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
371 EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
372 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
373 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
374 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
375 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
376 EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
377 }
378
TEST(FloatProxy,Negation)379 TEST(FloatProxy, Negation) {
380 EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
381 EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
382
383 EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
384 EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
385
386 EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
387 EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
388
389 EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
390 EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
391
392 EXPECT_THAT(
393 (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
394 Eq(-std::numeric_limits<float>::infinity()));
395 EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
396 .getAsFloat(),
397 Eq(std::numeric_limits<float>::infinity()));
398 }
399
400 // Test conversion of FloatProxy values to strings.
401 //
402 // In previous cases, we always wrapped the FloatProxy value in a HexFloat
403 // before conversion to a string. In the following cases, the FloatProxy
404 // decides for itself whether to print as a regular number or as a hex float.
405
406 using FloatProxyFloatTest =
407 ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
408 using FloatProxyDoubleTest =
409 ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
410
411 // Converts a float value to a string via a FloatProxy.
412 template <typename T>
EncodeViaFloatProxy(const T & value)413 std::string EncodeViaFloatProxy(const T& value) {
414 std::stringstream ss;
415 ss << value;
416 return ss.str();
417 }
418
419 // Converts a floating point string so that the exponent prefix
420 // is 'e', and the exponent value does not have leading zeros.
421 // The Microsoft runtime library likes to write things like "2.5E+010".
422 // Convert that to "2.5e+10".
423 // We don't care what happens to strings that are not floating point
424 // strings.
NormalizeExponentInFloatString(std::string in)425 std::string NormalizeExponentInFloatString(std::string in) {
426 std::string result;
427 // Reserve one spot for the terminating null, even when the sscanf fails.
428 std::vector<char> prefix(in.size() + 1);
429 char e;
430 char plus_or_minus;
431 int exponent; // in base 10
432 if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
433 &plus_or_minus, &exponent)) &&
434 (e == 'e' || e == 'E') &&
435 (plus_or_minus == '-' || plus_or_minus == '+')) {
436 // It looks like a floating point value with exponent.
437 std::stringstream out;
438 out << prefix.data() << 'e' << plus_or_minus << exponent;
439 result = out.str();
440 } else {
441 result = in;
442 }
443 return result;
444 }
445
TEST(NormalizeFloat,Sample)446 TEST(NormalizeFloat, Sample) {
447 EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
448 EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
449 EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
450 EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
451 EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
452 }
453
454 // The following two tests can't be DRY because they take different parameter
455 // types.
TEST_P(FloatProxyFloatTest,EncodeCorrectly)456 TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
457 EXPECT_THAT(
458 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
459 Eq(GetParam().second));
460 }
461
TEST_P(FloatProxyDoubleTest,EncodeCorrectly)462 TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
463 EXPECT_THAT(
464 NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
465 Eq(GetParam().second));
466 }
467
468 INSTANTIATE_TEST_SUITE_P(
469 Float32Tests, FloatProxyFloatTest,
470 ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
471 // Zero
472 {0.f, "0"},
473 // Normal numbers
474 {1.f, "1"},
475 {-0.25f, "-0.25"},
476 {1000.0f, "1000"},
477
478 // Still normal numbers, but with large magnitude exponents.
479 {float(ldexp(1.f, 126)), "8.50706e+37"},
480 {float(ldexp(-1.f, -126)), "-1.17549e-38"},
481
482 // denormalized values are printed as hex floats.
483 {float(ldexp(1.0f, -127)), "0x1p-127"},
484 {float(ldexp(1.5f, -128)), "0x1.8p-128"},
485 {float(ldexp(1.25, -129)), "0x1.4p-129"},
486 {float(ldexp(1.125, -130)), "0x1.2p-130"},
487 {float(ldexp(-1.0f, -127)), "-0x1p-127"},
488 {float(ldexp(-1.0f, -128)), "-0x1p-128"},
489 {float(ldexp(-1.0f, -129)), "-0x1p-129"},
490 {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
491
492 // NaNs
493 {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
494 {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
495
496 {std::numeric_limits<float>::infinity(), "0x1p+128"},
497 {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
498 })));
499
500 INSTANTIATE_TEST_SUITE_P(
501 Float64Tests, FloatProxyDoubleTest,
502 ::testing::ValuesIn(
503 std::vector<std::pair<FloatProxy<double>, std::string>>({
504 {0., "0"},
505 {1., "1"},
506 {-0.25, "-0.25"},
507 {1000.0, "1000"},
508
509 // Large outside the range of normal floats
510 {ldexp(1.0, 128), "3.40282366920938e+38"},
511 {ldexp(1.5, 129), "1.02084710076282e+39"},
512 {ldexp(-1.0, 128), "-3.40282366920938e+38"},
513 {ldexp(-1.5, 129), "-1.02084710076282e+39"},
514
515 // Small outside the range of normal floats
516 {ldexp(1.5, -129), "2.20405190779179e-39"},
517 {ldexp(-1.5, -129), "-2.20405190779179e-39"},
518
519 // lowest non-denorm
520 {ldexp(1.0, -1022), "2.2250738585072e-308"},
521 {ldexp(-1.0, -1022), "-2.2250738585072e-308"},
522
523 // Denormalized values
524 {ldexp(1.125, -1023), "0x1.2p-1023"},
525 {ldexp(-1.375, -1024), "-0x1.6p-1024"},
526
527 // NaNs
528 {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
529 {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
530
531 // Infinity
532 {std::numeric_limits<double>::infinity(), "0x1p+1024"},
533 {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
534
535 })));
536
537 // double is used so that unbiased_exponent can be used with the output
538 // of ldexp directly.
unbiased_exponent(double f)539 int32_t unbiased_exponent(double f) {
540 return spvutils::HexFloat<spvutils::FloatProxy<float>>(
541 static_cast<float>(f)).getUnbiasedNormalizedExponent();
542 }
543
unbiased_half_exponent(uint16_t f)544 int16_t unbiased_half_exponent(uint16_t f) {
545 return spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>(f)
546 .getUnbiasedNormalizedExponent();
547 }
548
TEST(HexFloatOperationTest,UnbiasedExponent)549 TEST(HexFloatOperationTest, UnbiasedExponent) {
550 // Float cases
551 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
552 EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
553 EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
554 EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
555 // Saturates to 128
556 EXPECT_EQ(128, unbiased_exponent(ldexp(1.0f, 256)));
557
558 EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
559 EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127))); // First denorm
560 EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
561 EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
562 EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
563 // Smallest representable number
564 EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
565 // Should get rounded to 0 first.
566 EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
567
568 // Float16 cases
569 // The exponent is represented in the bits 0x7C00
570 // The offset is -15
571 EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
572 EXPECT_EQ(3, unbiased_half_exponent(0x4800));
573 EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
574 EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
575 EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
576 EXPECT_EQ(10, unbiased_half_exponent(0x6400));
577
578 // Smallest representable number
579 EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
580 }
581
582 // Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
float_fractions(const std::vector<uint32_t> & fractions)583 float float_fractions(const std::vector<uint32_t>& fractions) {
584 float f = 0;
585 for(int32_t i: fractions) {
586 f += std::ldexp(1.0f, -i);
587 }
588 return f;
589 }
590
591 // Returns the normalized significand of a HexFloat<FloatProxy<float>>
592 // that was created by calling float_fractions with the input fractions,
593 // raised to the power of exp.
normalized_significand(const std::vector<uint32_t> & fractions,uint32_t exp)594 uint32_t normalized_significand(const std::vector<uint32_t>& fractions, uint32_t exp) {
595 return spvutils::HexFloat<spvutils::FloatProxy<float>>(
596 static_cast<float>(ldexp(float_fractions(fractions), exp)))
597 .getNormalizedSignificand();
598 }
599
600 // Sets the bits from MSB to LSB of the significand part of a float.
601 // For example 0 would set the bit 23 (counting from LSB to MSB),
602 // and 1 would set the 22nd bit.
bits_set(const std::vector<uint32_t> & bits)603 uint32_t bits_set(const std::vector<uint32_t>& bits) {
604 const uint32_t top_bit = 1u << 22u;
605 uint32_t val= 0;
606 for(uint32_t i: bits) {
607 val |= top_bit >> i;
608 }
609 return val;
610 }
611
612 // The same as bits_set but for a Float16 value instead of 32-bit floating
613 // point.
half_bits_set(const std::vector<uint32_t> & bits)614 uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
615 const uint32_t top_bit = 1u << 9u;
616 uint32_t val= 0;
617 for(uint32_t i: bits) {
618 val |= top_bit >> i;
619 }
620 return static_cast<uint16_t>(val);
621 }
622
TEST(HexFloatOperationTest,NormalizedSignificand)623 TEST(HexFloatOperationTest, NormalizedSignificand) {
624 // For normalized numbers (the following) it should be a simple matter
625 // of getting rid of the top implicit bit
626 EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
627 EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
628 EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
629 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
630 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
631 EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
632
633 // For denormalized numbers we expect the normalized significand to
634 // shift as if it were normalized. This means, in practice that the
635 // top_most set bit will be cut off. Looks very similar to above (on purpose)
636 EXPECT_EQ(bits_set({}), normalized_significand({0}, -127));
637 EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -128));
638 EXPECT_EQ(bits_set({3}), normalized_significand({0, 4}, -127));
639 EXPECT_EQ(bits_set({}), normalized_significand({22}, -127));
640 EXPECT_EQ(bits_set({0}), normalized_significand({21, 22}, -127));
641 }
642
643 // Returns the 32-bit floating point value created by
644 // calling setFromSignUnbiasedExponentAndNormalizedSignificand
645 // on a HexFloat<FloatProxy<float>>
set_from_sign(bool negative,int32_t unbiased_exponent,uint32_t significand,bool round_denorm_up)646 float set_from_sign(bool negative, int32_t unbiased_exponent,
647 uint32_t significand, bool round_denorm_up) {
648 spvutils::HexFloat<spvutils::FloatProxy<float>> f(0.f);
649 f.setFromSignUnbiasedExponentAndNormalizedSignificand(
650 negative, unbiased_exponent, significand, round_denorm_up);
651 return f.value().getAsFloat();
652 }
653
TEST(HexFloatOperationTests,SetFromSignUnbiasedExponentAndNormalizedSignificand)654 TEST(HexFloatOperationTests,
655 SetFromSignUnbiasedExponentAndNormalizedSignificand) {
656
657 EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
658
659 // Tests insertion of various denormalized numbers with and without round up.
660 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, false));
661 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -149, 0, true));
662 EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
663 EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)), set_from_sign(false, -150, 1, true));
664
665 EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
666 EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
667 EXPECT_EQ(float_fractions({0, 1, 2, 5}),
668 set_from_sign(false, 0, bits_set({0, 1, 4}), false));
669 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
670 set_from_sign(false, -32, bits_set({0, 1, 4}), false));
671 EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
672 set_from_sign(false, -128, bits_set({0, 1, 4}), false));
673
674 // The negative cases from above.
675 EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
676 EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
677 EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
678 EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
679 set_from_sign(true, 0, bits_set({0, 1, 4}), false));
680 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
681 set_from_sign(true, -32, bits_set({0, 1, 4}), false));
682 EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
683 set_from_sign(true, -128, bits_set({0, 1, 4}), false));
684 }
685
TEST(HexFloatOperationTests,NonRounding)686 TEST(HexFloatOperationTests, NonRounding) {
687 // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
688 // except in the denorm case which is a bit more complex.
689 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
690 bool carry_bit = false;
691
692 spvutils::round_direction rounding[] = {
693 spvutils::kRoundToZero,
694 spvutils::kRoundToNearestEven,
695 spvutils::kRoundToPositiveInfinity,
696 spvutils::kRoundToNegativeInfinity};
697
698 // Everything fits, so this should be straight-forward
699 for (spvutils::round_direction round : rounding) {
700 EXPECT_EQ(bits_set({}), HF(0.f).getRoundedNormalizedSignificand<HF>(
701 round, &carry_bit));
702 EXPECT_FALSE(carry_bit);
703
704 EXPECT_EQ(bits_set({0}),
705 HF(float_fractions({0, 1}))
706 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
707 EXPECT_FALSE(carry_bit);
708
709 EXPECT_EQ(bits_set({1, 3}),
710 HF(float_fractions({0, 2, 4}))
711 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
712 EXPECT_FALSE(carry_bit);
713
714 EXPECT_EQ(
715 bits_set({0, 1, 4}),
716 HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
717 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
718 EXPECT_FALSE(carry_bit);
719
720 EXPECT_EQ(
721 bits_set({0, 1, 4, 22}),
722 HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
723 .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
724 EXPECT_FALSE(carry_bit);
725 }
726 }
727
728 struct RoundSignificandCase {
729 float source_float;
730 std::pair<int16_t, bool> expected_results;
731 spvutils::round_direction round;
732 };
733
734 using HexFloatRoundTest =
735 ::testing::TestWithParam<RoundSignificandCase>;
736
TEST_P(HexFloatRoundTest,RoundDownToFP16)737 TEST_P(HexFloatRoundTest, RoundDownToFP16) {
738 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
739 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
740
741 HF input_value(GetParam().source_float);
742 bool carry_bit = false;
743 EXPECT_EQ(GetParam().expected_results.first,
744 input_value.getRoundedNormalizedSignificand<HF16>(
745 GetParam().round, &carry_bit));
746 EXPECT_EQ(carry_bit, GetParam().expected_results.second);
747 }
748
749 // clang-format off
750 INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatRoundTest,
751 ::testing::ValuesIn(std::vector<RoundSignificandCase>(
752 {
753 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToZero},
754 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNearestEven},
755 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToPositiveInfinity},
756 {float_fractions({0}), std::make_pair(half_bits_set({}), false), spvutils::kRoundToNegativeInfinity},
757 {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
758
759 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
760 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
761 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
762 {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNearestEven},
763
764 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToZero},
765 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToPositiveInfinity},
766 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity},
767 {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), spvutils::kRoundToNearestEven},
768
769 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
770 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
771 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
772 {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
773
774 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
775 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToPositiveInfinity},
776 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNegativeInfinity},
777 {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
778
779 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
780 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
781 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
782 {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
783
784 // Carries
785 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToZero},
786 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToPositiveInfinity},
787 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), spvutils::kRoundToNegativeInfinity},
788 {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), spvutils::kRoundToNearestEven},
789
790 // Cases where original number was denorm. Note: this should have no effect
791 // the number is pre-normalized.
792 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToZero},
793 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToPositiveInfinity},
794 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), spvutils::kRoundToNegativeInfinity},
795 {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), spvutils::kRoundToNearestEven},
796 })));
797 // clang-format on
798
799 struct UpCastSignificandCase {
800 uint16_t source_half;
801 uint32_t expected_result;
802 };
803
804 using HexFloatRoundUpSignificandTest =
805 ::testing::TestWithParam<UpCastSignificandCase>;
TEST_P(HexFloatRoundUpSignificandTest,Widening)806 TEST_P(HexFloatRoundUpSignificandTest, Widening) {
807 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
808 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
809 bool carry_bit = false;
810
811 spvutils::round_direction rounding[] = {
812 spvutils::kRoundToZero,
813 spvutils::kRoundToNearestEven,
814 spvutils::kRoundToPositiveInfinity,
815 spvutils::kRoundToNegativeInfinity};
816
817 // Everything fits, so everything should just be bit-shifts.
818 for (spvutils::round_direction round : rounding) {
819 carry_bit = false;
820 HF16 input_value(GetParam().source_half);
821 EXPECT_EQ(
822 GetParam().expected_result,
823 input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
824 << std::hex << "0x"
825 << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
826 << " 0x" << GetParam().expected_result;
827 EXPECT_FALSE(carry_bit);
828 }
829 }
830
831 INSTANTIATE_TEST_SUITE_P(F16toF32, HexFloatRoundUpSignificandTest,
832 // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
833 // They are ignored for this test.
834 ::testing::ValuesIn(std::vector<UpCastSignificandCase>(
835 {
836 {0x3F00, 0x600000},
837 {0x0F00, 0x600000},
838 {0x0F01, 0x602000},
839 {0x0FFF, 0x7FE000},
840 })));
841
842 struct DownCastTest {
843 float source_float;
844 uint16_t expected_half;
845 std::vector<spvutils::round_direction> directions;
846 };
847
get_round_text(spvutils::round_direction direction)848 std::string get_round_text(spvutils::round_direction direction) {
849 #define CASE(round_direction) \
850 case round_direction: \
851 return #round_direction
852
853 switch (direction) {
854 CASE(spvutils::kRoundToZero);
855 CASE(spvutils::kRoundToPositiveInfinity);
856 CASE(spvutils::kRoundToNegativeInfinity);
857 CASE(spvutils::kRoundToNearestEven);
858 }
859 #undef CASE
860 return "";
861 }
862
863 using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
864
TEST_P(HexFloatFP32To16Tests,NarrowingCasts)865 TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
866 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
867 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
868 HF f(GetParam().source_float);
869 for (auto round : GetParam().directions) {
870 HF16 half(0);
871 f.castTo(half, round);
872 EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
873 << get_round_text(round) << " " << std::hex
874 << spvutils::BitwiseCast<uint32_t>(GetParam().source_float)
875 << " cast to: " << half.value().getAsFloat().get_value();
876 }
877 }
878
879 const uint16_t positive_infinity = 0x7C00;
880 const uint16_t negative_infinity = 0xFC00;
881
882 INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatFP32To16Tests,
883 ::testing::ValuesIn(std::vector<DownCastTest>(
884 {
885 // Exactly representable as half.
886 {0.f, 0x0, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
887 {-0.f, 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
888 {1.0f, 0x3C00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
889 {-1.0f, 0xBC00, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
890
891 {float_fractions({0, 1, 10}) , 0x3E01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
892 {-float_fractions({0, 1, 10}) , 0xBE01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
893 {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)), 0x4A01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
894 {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)), 0xCA01, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
895
896
897 // Underflow
898 {static_cast<float>(ldexp(1.0f, -25)), 0x0, {spvutils::kRoundToZero, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
899 {static_cast<float>(ldexp(1.0f, -25)), 0x1, {spvutils::kRoundToPositiveInfinity}},
900 {static_cast<float>(-ldexp(1.0f, -25)), 0x8000, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNearestEven}},
901 {static_cast<float>(-ldexp(1.0f, -25)), 0x8001, {spvutils::kRoundToNegativeInfinity}},
902 {static_cast<float>(ldexp(1.0f, -24)), 0x1, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
903
904 // Overflow
905 {static_cast<float>(ldexp(1.0f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
906 {static_cast<float>(ldexp(1.0f, 18)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
907 {static_cast<float>(ldexp(1.3f, 16)), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
908 {static_cast<float>(-ldexp(1.0f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
909 {static_cast<float>(-ldexp(1.0f, 18)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
910 {static_cast<float>(-ldexp(1.3f, 16)), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
911
912 // Transfer of Infinities
913 {std::numeric_limits<float>::infinity(), positive_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
914 {-std::numeric_limits<float>::infinity(), negative_infinity, {spvutils::kRoundToZero, spvutils::kRoundToPositiveInfinity, spvutils::kRoundToNegativeInfinity, spvutils::kRoundToNearestEven}},
915
916 // Nans are below because we cannot test for equality.
917 })));
918
919 struct UpCastCase{
920 uint16_t source_half;
921 float expected_float;
922 };
923
924 using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
TEST_P(HexFloatFP16To32Tests,WideningCasts)925 TEST_P(HexFloatFP16To32Tests, WideningCasts) {
926 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
927 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
928 HF16 f(GetParam().source_half);
929
930 spvutils::round_direction rounding[] = {
931 spvutils::kRoundToZero,
932 spvutils::kRoundToNearestEven,
933 spvutils::kRoundToPositiveInfinity,
934 spvutils::kRoundToNegativeInfinity};
935
936 // Everything fits, so everything should just be bit-shifts.
937 for (spvutils::round_direction round : rounding) {
938 HF flt(0.f);
939 f.castTo(flt, round);
940 EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
941 << get_round_text(round) << " " << std::hex
942 << spvutils::BitwiseCast<uint16_t>(GetParam().source_half)
943 << " cast to: " << flt.value().getAsFloat();
944 }
945 }
946
947 INSTANTIATE_TEST_SUITE_P(F16ToF32, HexFloatFP16To32Tests,
948 ::testing::ValuesIn(std::vector<UpCastCase>(
949 {
950 {0x0000, 0.f},
951 {0x8000, -0.f},
952 {0x3C00, 1.0f},
953 {0xBC00, -1.0f},
954 {0x3F00, float_fractions({0, 1, 2})},
955 {0xBF00, -float_fractions({0, 1, 2})},
956 {0x3F01, float_fractions({0, 1, 2, 10})},
957 {0xBF01, -float_fractions({0, 1, 2, 10})},
958
959 // denorm
960 {0x0001, static_cast<float>(ldexp(1.0, -24))},
961 {0x0002, static_cast<float>(ldexp(1.0, -23))},
962 {0x8001, static_cast<float>(-ldexp(1.0, -24))},
963 {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
964
965 // inf
966 {0x7C00, std::numeric_limits<float>::infinity()},
967 {0xFC00, -std::numeric_limits<float>::infinity()},
968 })));
969
TEST(HexFloatOperationTests,NanTests)970 TEST(HexFloatOperationTests, NanTests) {
971 using HF = spvutils::HexFloat<spvutils::FloatProxy<float>>;
972 using HF16 = spvutils::HexFloat<spvutils::FloatProxy<spvutils::Float16>>;
973 spvutils::round_direction rounding[] = {
974 spvutils::kRoundToZero,
975 spvutils::kRoundToNearestEven,
976 spvutils::kRoundToPositiveInfinity,
977 spvutils::kRoundToNegativeInfinity};
978
979 // Everything fits, so everything should just be bit-shifts.
980 for (spvutils::round_direction round : rounding) {
981 HF16 f16(0);
982 HF f(0.f);
983 HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
984 EXPECT_TRUE(f16.value().isNan());
985 HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
986 EXPECT_TRUE(f16.value().isNan());
987
988 HF16(0x7C01).castTo(f, round);
989 EXPECT_TRUE(f.value().isNan());
990 HF16(0x7C11).castTo(f, round);
991 EXPECT_TRUE(f.value().isNan());
992 HF16(0xFC01).castTo(f, round);
993 EXPECT_TRUE(f.value().isNan());
994 HF16(0x7C10).castTo(f, round);
995 EXPECT_TRUE(f.value().isNan());
996 HF16(0xFF00).castTo(f, round);
997 EXPECT_TRUE(f.value().isNan());
998 }
999 }
1000
1001 // A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
1002 template <typename T>
1003 struct FloatParseCase {
1004 std::string literal;
1005 bool negate_value;
1006 bool expect_success;
1007 HexFloat<FloatProxy<T>> expected_value;
1008 };
1009
1010 using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
1011
TEST_P(ParseNormalFloatTest,Samples)1012 TEST_P(ParseNormalFloatTest, Samples) {
1013 std::stringstream input(GetParam().literal);
1014 HexFloat<FloatProxy<float>> parsed_value(0.0f);
1015 ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1016 EXPECT_NE(GetParam().expect_success, input.fail())
1017 << " literal: " << GetParam().literal
1018 << " negate: " << GetParam().negate_value;
1019 if (GetParam().expect_success) {
1020 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1021 << " literal: " << GetParam().literal
1022 << " negate: " << GetParam().negate_value;
1023 }
1024 }
1025
1026 // Returns a FloatParseCase with expected failure.
1027 template <typename T>
BadFloatParseCase(std::string literal,bool negate_value,T expected_value)1028 FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
1029 T expected_value) {
1030 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1031 return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
1032 }
1033
1034 // Returns a FloatParseCase that should successfully parse to a given value.
1035 template <typename T>
GoodFloatParseCase(std::string literal,bool negate_value,T expected_value)1036 FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
1037 T expected_value) {
1038 HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1039 return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
1040 }
1041
1042 INSTANTIATE_TEST_SUITE_P(
1043 FloatParse, ParseNormalFloatTest,
1044 ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
1045 // Failing cases due to trivially incorrect syntax.
1046 BadFloatParseCase("abc", false, 0.0f),
1047 BadFloatParseCase("abc", true, 0.0f),
1048
1049 // Valid cases.
1050 GoodFloatParseCase("0", false, 0.0f),
1051 GoodFloatParseCase("0.0", false, 0.0f),
1052 GoodFloatParseCase("-0.0", false, -0.0f),
1053 GoodFloatParseCase("2.0", false, 2.0f),
1054 GoodFloatParseCase("-2.0", false, -2.0f),
1055 GoodFloatParseCase("+2.0", false, 2.0f),
1056 // Cases with negate_value being true.
1057 GoodFloatParseCase("0.0", true, -0.0f),
1058 GoodFloatParseCase("2.0", true, -2.0f),
1059
1060 // When negate_value is true, we should not accept a
1061 // leading minus or plus.
1062 BadFloatParseCase("-0.0", true, 0.0f),
1063 BadFloatParseCase("-2.0", true, 0.0f),
1064 BadFloatParseCase("+0.0", true, 0.0f),
1065 BadFloatParseCase("+2.0", true, 0.0f),
1066
1067 // Overflow is an error for 32-bit float parsing.
1068 BadFloatParseCase("1e40", false, FLT_MAX),
1069 BadFloatParseCase("1e40", true, -FLT_MAX),
1070 BadFloatParseCase("-1e40", false, -FLT_MAX),
1071 // We can't have -1e40 and negate_value == true since
1072 // that represents an original case of "--1e40" which
1073 // is invalid.
1074 }));
1075
1076 using ParseNormalFloat16Test =
1077 ::testing::TestWithParam<FloatParseCase<Float16>>;
1078
TEST_P(ParseNormalFloat16Test,Samples)1079 TEST_P(ParseNormalFloat16Test, Samples) {
1080 std::stringstream input(GetParam().literal);
1081 HexFloat<FloatProxy<Float16>> parsed_value(0);
1082 ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1083 EXPECT_NE(GetParam().expect_success, input.fail())
1084 << " literal: " << GetParam().literal
1085 << " negate: " << GetParam().negate_value;
1086 if (GetParam().expect_success) {
1087 EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1088 << " literal: " << GetParam().literal
1089 << " negate: " << GetParam().negate_value;
1090 }
1091 }
1092
1093 INSTANTIATE_TEST_SUITE_P(
1094 Float16Parse, ParseNormalFloat16Test,
1095 ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
1096 // Failing cases due to trivially incorrect syntax.
1097 BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
1098 BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
1099
1100 // Valid cases.
1101 GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
1102 GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
1103 GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
1104 GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
1105 GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
1106 GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
1107 // Cases with negate_value being true.
1108 GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
1109 GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
1110
1111 // When negate_value is true, we should not accept a leading minus or
1112 // plus.
1113 BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
1114 BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
1115 BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
1116 BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
1117 }));
1118
1119 // A test case for detecting infinities.
1120 template <typename T>
1121 struct OverflowParseCase {
1122 std::string input;
1123 bool expect_success;
1124 T expected_value;
1125 };
1126
1127 using FloatProxyParseOverflowFloatTest =
1128 ::testing::TestWithParam<OverflowParseCase<float>>;
1129
TEST_P(FloatProxyParseOverflowFloatTest,Sample)1130 TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
1131 std::istringstream input(GetParam().input);
1132 HexFloat<FloatProxy<float>> value(0.0f);
1133 input >> value;
1134 EXPECT_NE(GetParam().expect_success, input.fail());
1135 if (GetParam().expect_success) {
1136 EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
1137 }
1138 }
1139
1140 INSTANTIATE_TEST_SUITE_P(
1141 FloatOverflow, FloatProxyParseOverflowFloatTest,
1142 ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
1143 {"0", true, 0.0f},
1144 {"0.0", true, 0.0f},
1145 {"1.0", true, 1.0f},
1146 {"1e38", true, 1e38f},
1147 {"-1e38", true, -1e38f},
1148 {"1e40", false, FLT_MAX},
1149 {"-1e40", false, -FLT_MAX},
1150 {"1e400", false, FLT_MAX},
1151 {"-1e400", false, -FLT_MAX},
1152 })));
1153
1154 using FloatProxyParseOverflowDoubleTest =
1155 ::testing::TestWithParam<OverflowParseCase<double>>;
1156
TEST_P(FloatProxyParseOverflowDoubleTest,Sample)1157 TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
1158 std::istringstream input(GetParam().input);
1159 HexFloat<FloatProxy<double>> value(0.0);
1160 input >> value;
1161 EXPECT_NE(GetParam().expect_success, input.fail());
1162 if (GetParam().expect_success) {
1163 EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
1164 }
1165 }
1166
1167 INSTANTIATE_TEST_SUITE_P(
1168 DoubleOverflow, FloatProxyParseOverflowDoubleTest,
1169 ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
1170 {"0", true, 0.0},
1171 {"0.0", true, 0.0},
1172 {"1.0", true, 1.0},
1173 {"1e38", true, 1e38},
1174 {"-1e38", true, -1e38},
1175 {"1e40", true, 1e40},
1176 {"-1e40", true, -1e40},
1177 {"1e400", false, DBL_MAX},
1178 {"-1e400", false, -DBL_MAX},
1179 })));
1180
1181 using FloatProxyParseOverflowFloat16Test =
1182 ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
1183
TEST_P(FloatProxyParseOverflowFloat16Test,Sample)1184 TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
1185 std::istringstream input(GetParam().input);
1186 HexFloat<FloatProxy<Float16>> value(0);
1187 input >> value;
1188 EXPECT_NE(GetParam().expect_success, input.fail()) << " literal: "
1189 << GetParam().input;
1190 if (GetParam().expect_success) {
1191 EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
1192 << " literal: " << GetParam().input;
1193 }
1194 }
1195
1196 INSTANTIATE_TEST_SUITE_P(
1197 Float16Overflow, FloatProxyParseOverflowFloat16Test,
1198 ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
1199 {"0", true, uint16_t{0}},
1200 {"0.0", true, uint16_t{0}},
1201 {"1.0", true, uint16_t{0x3c00}},
1202 // Overflow for 16-bit float is an error, and returns max or
1203 // lowest value.
1204 {"1e38", false, uint16_t{0x7bff}},
1205 {"1e40", false, uint16_t{0x7bff}},
1206 {"1e400", false, uint16_t{0x7bff}},
1207 {"-1e38", false, uint16_t{0xfbff}},
1208 {"-1e40", false, uint16_t{0xfbff}},
1209 {"-1e400", false, uint16_t{0xfbff}},
1210 })));
1211
TEST(FloatProxy,Max)1212 TEST(FloatProxy, Max) {
1213 EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
1214 Eq(uint16_t{0x7bff}));
1215 EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
1216 Eq(std::numeric_limits<float>::max()));
1217 EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
1218 Eq(std::numeric_limits<double>::max()));
1219 }
1220
TEST(FloatProxy,Lowest)1221 TEST(FloatProxy, Lowest) {
1222 EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
1223 Eq(uint16_t{0xfbff}));
1224 EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
1225 Eq(std::numeric_limits<float>::lowest()));
1226 EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
1227 Eq(std::numeric_limits<double>::lowest()));
1228 }
1229
1230 // TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
1231 } // anonymous namespace
1232