1 #ifndef _TCUFLOAT_HPP
2 #define _TCUFLOAT_HPP
3 /*-------------------------------------------------------------------------
4 * drawElements Quality Program Tester Core
5 * ----------------------------------------
6 *
7 * Copyright 2014 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Reconfigurable floating-point value template.
24 *//*--------------------------------------------------------------------*/
25
26 #include "tcuDefs.hpp"
27
28 // For memcpy().
29 #include <string.h>
30
31 namespace tcu
32 {
33
34 enum FloatFlags
35 {
36 FLOAT_HAS_SIGN = (1<<0),
37 FLOAT_SUPPORT_DENORM = (1<<1)
38 };
39
40 enum RoundingDirection
41 {
42 ROUND_TO_EVEN = 0,
43 ROUND_DOWNWARD, // Towards -Inf.
44 ROUND_UPWARD, // Towards +Inf.
45 ROUND_TO_ZERO
46 };
47
48 /*--------------------------------------------------------------------*//*!
49 * \brief Floating-point format template
50 *
51 * This template implements arbitrary floating-point handling. Template
52 * can be used for conversion between different formats and checking
53 * various properties of floating-point values.
54 *//*--------------------------------------------------------------------*/
55 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
56 class Float
57 {
58 public:
59 typedef StorageType_ StorageType;
60
61 enum
62 {
63 EXPONENT_BITS = ExponentBits,
64 MANTISSA_BITS = MantissaBits,
65 EXPONENT_BIAS = ExponentBias,
66 FLAGS = Flags,
67 };
68
69 Float (void);
70 explicit Float (StorageType value);
71 explicit Float (float v, RoundingDirection rd = ROUND_TO_EVEN);
72 explicit Float (double v, RoundingDirection rd = ROUND_TO_EVEN);
73
74 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
75 static Float convert (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src, RoundingDirection rd = ROUND_TO_EVEN);
76
convert(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & src,RoundingDirection=ROUND_TO_EVEN)77 static inline Float convert (const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src, RoundingDirection = ROUND_TO_EVEN) { return src; }
78
79 /*--------------------------------------------------------------------*//*!
80 * \brief Construct floating point value
81 * \param sign Sign. Must be +1/-1
82 * \param exponent Exponent in range [1-ExponentBias, ExponentBias+1]
83 * \param mantissa Mantissa bits with implicit leading bit explicitly set
84 * \return The specified float
85 *
86 * This function constructs a floating point value from its inputs.
87 * The normally implicit leading bit of the mantissa must be explicitly set.
88 * The exponent normally used for zero/subnormals is an invalid input. Such
89 * values are specified with the leading mantissa bit of zero and the lowest
90 * normal exponent (1-ExponentBias). Additionally having both exponent and
91 * mantissa set to zero is a shorthand notation for the correctly signed
92 * floating point zero. Inf and NaN must be specified directly with an
93 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
94 * bit set)
95 *//*--------------------------------------------------------------------*/
96 static inline Float construct (int sign, int exponent, StorageType mantissa);
97
98 /*--------------------------------------------------------------------*//*!
99 * \brief Construct floating point value. Explicit version
100 * \param sign Sign. Must be +1/-1
101 * \param exponent Exponent in range [-ExponentBias, ExponentBias+1]
102 * \param mantissa Mantissa bits
103 * \return The specified float
104 *
105 * This function constructs a floating point value from its inputs with
106 * minimal intervention.
107 * The sign is turned into a sign bit and the exponent bias is added.
108 * See IEEE-754 for additional information on the inputs and
109 * the encoding of special values.
110 *//*--------------------------------------------------------------------*/
111 static Float constructBits (int sign, int exponent, StorageType mantissaBits);
112
bits(void) const113 StorageType bits (void) const { return m_value; }
114 float asFloat (void) const;
115 double asDouble (void) const;
116
signBit(void) const117 inline int signBit (void) const { return (int)(m_value >> (ExponentBits+MantissaBits)) & 1; }
exponentBits(void) const118 inline StorageType exponentBits (void) const { return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1); }
mantissaBits(void) const119 inline StorageType mantissaBits (void) const { return m_value & ((StorageType(1)<<MantissaBits)-1); }
120
sign(void) const121 inline int sign (void) const { return signBit() ? -1 : 1; }
exponent(void) const122 inline int exponent (void) const { return isDenorm() ? 1 - ExponentBias : (int)exponentBits() - ExponentBias; }
mantissa(void) const123 inline StorageType mantissa (void) const { return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits)); }
124
isInf(void) const125 inline bool isInf (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() == 0; }
isNaN(void) const126 inline bool isNaN (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() != 0; }
isZero(void) const127 inline bool isZero (void) const { return exponentBits() == 0 && mantissaBits() == 0; }
isDenorm(void) const128 inline bool isDenorm (void) const { return exponentBits() == 0 && mantissaBits() != 0; }
129
operator <(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & other) const130 inline bool operator< (const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& other) const { return this->asDouble() < other.asDouble(); }
131
132 static Float zero (int sign);
133 static Float inf (int sign);
134 static Float nan (void);
135
136 static Float largestNormal (int sign);
137 static Float smallestNormal (int sign);
138
139 private:
140 StorageType m_value;
141 } DE_WARN_UNUSED_TYPE;
142
143 // Common floating-point types.
144 typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float16; //!< IEEE 754-2008 16-bit floating-point value
145 typedef Float<deUint32, 8, 23, 127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float32; //!< IEEE 754 32-bit floating-point value
146 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float64; //!< IEEE 754 64-bit floating-point value
147
148 typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN> Float16Denormless; //!< IEEE 754-2008 16-bit floating-point value without denormalized support
149
150 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(void)151 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
152 : m_value(0)
153 {
154 }
155
156 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(StorageType value)157 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
158 : m_value(value)
159 {
160 }
161
162 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(float value,RoundingDirection rd)163 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value, RoundingDirection rd)
164 : m_value(0)
165 {
166 deUint32 u32;
167 memcpy(&u32, &value, sizeof(deUint32));
168 *this = convert(Float32(u32), rd);
169 }
170
171 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(double value,RoundingDirection rd)172 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value, RoundingDirection rd)
173 : m_value(0)
174 {
175 deUint64 u64;
176 memcpy(&u64, &value, sizeof(deUint64));
177 *this = convert(Float64(u64), rd);
178 }
179
180 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asFloat(void) const181 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
182 {
183 float v;
184 deUint32 u32 = Float32::convert(*this).bits();
185 memcpy(&v, &u32, sizeof(deUint32));
186 return v;
187 }
188
189 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asDouble(void) const190 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
191 {
192 double v;
193 deUint64 u64 = Float64::convert(*this).bits();
194 memcpy(&v, &u64, sizeof(deUint64));
195 return v;
196 }
197
198 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
zero(int sign)199 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
200 {
201 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
202 return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
203 }
204
205 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
inf(int sign)206 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
207 {
208 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
209 return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
210 }
211
212 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
nan(void)213 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
214 {
215 return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
216 }
217
218 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
largestNormal(int sign)219 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::largestNormal (int sign)
220 {
221 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
222 return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(sign, ExponentBias, (static_cast<StorageType>(1) << (MantissaBits + 1)) - 1);
223 }
224
225 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
smallestNormal(int sign)226 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::smallestNormal (int sign)
227 {
228 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
229 return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(sign, 1 - ExponentBias, (static_cast<StorageType>(1) << MantissaBits));
230 }
231
232 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
233 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
construct(int sign,int exponent,StorageType mantissa)234 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
235 (int sign, int exponent, StorageType mantissa)
236 {
237 // Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
238 const bool isShorthandZero = exponent == 0 && mantissa == 0;
239
240 // Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
241 // Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
242 const bool isDenormOrZero = (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
243 const StorageType s = StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
244 const StorageType exp = (isShorthandZero || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
245
246 DE_ASSERT(sign == +1 || sign == -1);
247 DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
248 DE_ASSERT(exp >> ExponentBits == 0);
249
250 return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
251 }
252
253 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
254 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
constructBits(int sign,int exponent,StorageType mantissaBits)255 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
256 (int sign, int exponent, StorageType mantissaBits)
257 {
258 const StorageType signBit = static_cast<StorageType>(sign < 0 ? 1 : 0);
259 const StorageType exponentBits = static_cast<StorageType>(exponent + ExponentBias);
260
261 DE_ASSERT(sign == +1 || sign == -1 );
262 DE_ASSERT(exponentBits >> ExponentBits == 0);
263 DE_ASSERT(mantissaBits >> MantissaBits == 0);
264
265 return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
266 }
267
268 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
269 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
270 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
convert(const Float<OtherStorageType,OtherExponentBits,OtherMantissaBits,OtherExponentBias,OtherFlags> & other,RoundingDirection rd)271 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
272 (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other, RoundingDirection rd)
273 {
274 if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
275 {
276 // Negative number, truncate to zero.
277 return zero(+1);
278 }
279
280 if (other.isInf())
281 {
282 return inf(other.sign());
283 }
284
285 if (other.isNaN())
286 {
287 return nan();
288 }
289
290 if (other.isZero())
291 {
292 return zero(other.sign());
293 }
294
295 const int eMin = 1 - ExponentBias;
296 const int eMax = ((1<<ExponentBits)-2) - ExponentBias;
297
298 const StorageType s = StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
299 int e = other.exponent();
300 deUint64 m = other.mantissa();
301
302 // Normalize denormalized values prior to conversion.
303 while (!(m & (1ull<<OtherMantissaBits)))
304 {
305 m <<= 1;
306 e -= 1;
307 }
308
309 if (e < eMin)
310 {
311 // Underflow.
312 if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
313 {
314 // Shift and round.
315 int bitDiff = (OtherMantissaBits-MantissaBits) + (eMin-e);
316 deUint64 lastBitsMask = (1ull << bitDiff) - 1ull;
317 deUint64 lastBits = (static_cast<deUint64>(m) & lastBitsMask);
318 deUint64 half = (1ull << (bitDiff - 1)) - 1;
319 deUint64 bias = (m >> bitDiff) & 1;
320
321 switch (rd)
322 {
323 case ROUND_TO_EVEN:
324 return Float(StorageType(s | (m + half + bias) >> bitDiff));
325
326 case ROUND_DOWNWARD:
327 m = (m >> bitDiff);
328 if (lastBits != 0ull && other.sign() < 0)
329 {
330 m += 1;
331 }
332 return Float(StorageType(s | m));
333
334 case ROUND_UPWARD:
335 m = (m >> bitDiff);
336 if (lastBits != 0ull && other.sign() > 0)
337 {
338 m += 1;
339 }
340 return Float(StorageType(s | m));
341
342 case ROUND_TO_ZERO:
343 return Float(StorageType(s | (m >> bitDiff)));
344
345 default:
346 DE_ASSERT(false);
347 break;
348 }
349 }
350
351 return zero(other.sign());
352 }
353
354 // Remove leading 1.
355 m = m & ~(1ull<<OtherMantissaBits);
356
357 if (MantissaBits < OtherMantissaBits)
358 {
359 // Round mantissa.
360 int bitDiff = OtherMantissaBits-MantissaBits;
361 deUint64 lastBitsMask = (1ull << bitDiff) - 1ull;
362 deUint64 lastBits = (static_cast<deUint64>(m) & lastBitsMask);
363 deUint64 half = (1ull << (bitDiff - 1)) - 1;
364 deUint64 bias = (m >> bitDiff) & 1;
365
366 switch (rd)
367 {
368 case ROUND_TO_EVEN:
369 m = (m + half + bias) >> bitDiff;
370 break;
371
372 case ROUND_DOWNWARD:
373 m = (m >> bitDiff);
374 if (lastBits != 0ull && other.sign() < 0)
375 {
376 m += 1;
377 }
378 break;
379
380 case ROUND_UPWARD:
381 m = (m >> bitDiff);
382 if (lastBits != 0ull && other.sign() > 0)
383 {
384 m += 1;
385 }
386 break;
387
388 case ROUND_TO_ZERO:
389 m = (m >> bitDiff);
390 break;
391
392 default:
393 DE_ASSERT(false);
394 break;
395 }
396
397 if (m & (1ull<<MantissaBits))
398 {
399 // Overflow in mantissa.
400 m = 0;
401 e += 1;
402 }
403 }
404 else
405 {
406 int bitDiff = MantissaBits-OtherMantissaBits;
407 m = m << bitDiff;
408 }
409
410 if (e > eMax)
411 {
412 // Overflow.
413 return (((other.sign() < 0 && rd == ROUND_UPWARD) || (other.sign() > 0 && rd == ROUND_DOWNWARD)) ? largestNormal(other.sign()) : inf(other.sign()));
414 }
415
416 DE_ASSERT(de::inRange(e, eMin, eMax));
417 DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
418 DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
419
420 return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
421 }
422
423 } // tcu
424
425 #endif // _TCUFLOAT_HPP
426