• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef _TCUFLOAT_HPP
2 #define _TCUFLOAT_HPP
3 /*-------------------------------------------------------------------------
4  * drawElements Quality Program Tester Core
5  * ----------------------------------------
6  *
7  * Copyright 2014 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Reconfigurable floating-point value template.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "tcuDefs.hpp"
27 
28 // For memcpy().
29 #include <limits>
30 #include <string.h>
31 
32 namespace tcu
33 {
34 
35 enum FloatFlags
36 {
37     FLOAT_HAS_SIGN       = (1 << 0),
38     FLOAT_SUPPORT_DENORM = (1 << 1)
39 };
40 
41 enum RoundingDirection
42 {
43     ROUND_TO_EVEN = 0,
44     ROUND_DOWNWARD, // Towards -Inf.
45     ROUND_UPWARD,   // Towards +Inf.
46     ROUND_TO_ZERO
47 };
48 
49 /*--------------------------------------------------------------------*//*!
50  * \brief Floating-point format template
51  *
52  * This template implements arbitrary floating-point handling. Template
53  * can be used for conversion between different formats and checking
54  * various properties of floating-point values.
55  *//*--------------------------------------------------------------------*/
56 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
57 class Float
58 {
59 public:
60     typedef StorageType_ StorageType;
61 
62     enum
63     {
64         EXPONENT_BITS = ExponentBits,
65         MANTISSA_BITS = MantissaBits,
66         EXPONENT_BIAS = ExponentBias,
67         FLAGS         = Flags,
68     };
69 
70     Float(void);
71     explicit Float(StorageType value);
72     explicit Float(float v, RoundingDirection rd = ROUND_TO_EVEN);
73     explicit Float(double v, RoundingDirection rd = ROUND_TO_EVEN);
74 
75     template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias,
76               uint32_t OtherFlags>
77     static Float convert(
78         const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags> &src,
79         RoundingDirection rd = ROUND_TO_EVEN);
80 
convert(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & src,RoundingDirection=ROUND_TO_EVEN)81     static inline Float convert(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> &src,
82                                 RoundingDirection = ROUND_TO_EVEN)
83     {
84         return src;
85     }
86 
87     /*--------------------------------------------------------------------*//*!
88      * \brief Construct floating point value
89      * \param sign        Sign. Must be +1/-1
90      * \param exponent    Exponent in range [1-ExponentBias, ExponentBias+1]
91      * \param mantissa    Mantissa bits with implicit leading bit explicitly set
92      * \return The specified float
93      *
94      * This function constructs a floating point value from its inputs.
95      * The normally implicit leading bit of the mantissa must be explicitly set.
96      * The exponent normally used for zero/subnormals is an invalid input. Such
97      * values are specified with the leading mantissa bit of zero and the lowest
98      * normal exponent (1-ExponentBias). Additionally having both exponent and
99      * mantissa set to zero is a shorthand notation for the correctly signed
100      * floating point zero. Inf and NaN must be specified directly with an
101      * exponent of ExponentBias+1 and the appropriate mantissa (with leading
102      * bit set)
103      *//*--------------------------------------------------------------------*/
104     static inline Float construct(int sign, int exponent, StorageType mantissa);
105 
106     /*--------------------------------------------------------------------*//*!
107      * \brief Construct floating point value. Explicit version
108      * \param sign        Sign. Must be +1/-1
109      * \param exponent    Exponent in range [-ExponentBias, ExponentBias+1]
110      * \param mantissa    Mantissa bits
111      * \return The specified float
112      *
113      * This function constructs a floating point value from its inputs with
114      * minimal intervention.
115      * The sign is turned into a sign bit and the exponent bias is added.
116      * See IEEE-754 for additional information on the inputs and
117      * the encoding of special values.
118      *//*--------------------------------------------------------------------*/
119     static Float constructBits(int sign, int exponent, StorageType mantissaBits);
120 
bits(void) const121     StorageType bits(void) const
122     {
123         return m_value;
124     }
125     float asFloat(void) const;
126     double asDouble(void) const;
127 
signBit(void) const128     inline int signBit(void) const
129     {
130         return (int)(m_value >> (ExponentBits + MantissaBits)) & 1;
131     }
exponentBits(void) const132     inline StorageType exponentBits(void) const
133     {
134         return (m_value >> MantissaBits) & ((StorageType(1) << ExponentBits) - 1);
135     }
mantissaBits(void) const136     inline StorageType mantissaBits(void) const
137     {
138         return m_value & ((StorageType(1) << MantissaBits) - 1);
139     }
140 
sign(void) const141     inline int sign(void) const
142     {
143         return signBit() ? -1 : 1;
144     }
exponent(void) const145     inline int exponent(void) const
146     {
147         return isDenorm() ? 1 - ExponentBias : (int)exponentBits() - ExponentBias;
148     }
mantissa(void) const149     inline StorageType mantissa(void) const
150     {
151         return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1) << MantissaBits));
152     }
153 
isInf(void) const154     inline bool isInf(void) const
155     {
156         return exponentBits() == ((1 << ExponentBits) - 1) && mantissaBits() == 0;
157     }
isNaN(void) const158     inline bool isNaN(void) const
159     {
160         return exponentBits() == ((1 << ExponentBits) - 1) && mantissaBits() != 0;
161     }
isZero(void) const162     inline bool isZero(void) const
163     {
164         return exponentBits() == 0 && mantissaBits() == 0;
165     }
isDenorm(void) const166     inline bool isDenorm(void) const
167     {
168         return exponentBits() == 0 && mantissaBits() != 0;
169     }
170 
operator <(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & other) const171     inline bool operator<(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> &other) const
172     {
173         return this->asDouble() < other.asDouble();
174     }
175 
176     static Float zero(int sign);
177     static Float inf(int sign);
178     static Float nan(void);
179 
180     static Float largestNormal(int sign);
181     static Float smallestNormal(int sign);
182 
183 private:
184     StorageType m_value;
185 } DE_WARN_UNUSED_TYPE;
186 
187 // Common floating-point types.
188 typedef Float<uint16_t, 5, 10, 15, FLOAT_HAS_SIGN | FLOAT_SUPPORT_DENORM>
189     Float16; //!< IEEE 754-2008 16-bit floating-point value
190 typedef Float<uint32_t, 8, 23, 127, FLOAT_HAS_SIGN | FLOAT_SUPPORT_DENORM>
191     Float32; //!< IEEE 754 32-bit floating-point value
192 typedef Float<uint64_t, 11, 52, 1023, FLOAT_HAS_SIGN | FLOAT_SUPPORT_DENORM>
193     Float64; //!< IEEE 754 64-bit floating-point value
194 
195 typedef Float<uint16_t, 5, 10, 15, FLOAT_HAS_SIGN>
196     Float16Denormless; //!< IEEE 754-2008 16-bit floating-point value without denormalized support
197 
198 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
Float(void)199 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float(void) : m_value(0)
200 {
201 }
202 
203 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
Float(StorageType value)204 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float(StorageType value) : m_value(value)
205 {
206 }
207 
208 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
Float(float value,RoundingDirection rd)209 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float(float value, RoundingDirection rd)
210     : m_value(0)
211 {
212     uint32_t u32;
213     memcpy(&u32, &value, sizeof(uint32_t));
214     *this = convert(Float32(u32), rd);
215 }
216 
217 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
Float(double value,RoundingDirection rd)218 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float(double value, RoundingDirection rd)
219     : m_value(0)
220 {
221     uint64_t u64;
222     memcpy(&u64, &value, sizeof(uint64_t));
223     *this = convert(Float64(u64), rd);
224 }
225 
226 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
asFloat(void) const227 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat(void) const
228 {
229     float v;
230     uint32_t u32 = Float32::convert(*this).bits();
231     memcpy(&v, &u32, sizeof(uint32_t));
232     return v;
233 }
234 
235 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
asDouble(void) const236 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble(void) const
237 {
238     double v;
239     uint64_t u64 = Float64::convert(*this).bits();
240     memcpy(&v, &u64, sizeof(uint64_t));
241     return v;
242 }
243 
244 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
245 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<
zero(int sign)246     StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero(int sign)
247 {
248     DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
249     return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits + MantissaBits)));
250 }
251 
252 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
253 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<
inf(int sign)254     StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf(int sign)
255 {
256     DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
257     return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits + MantissaBits)) |
258                              (((1ull << ExponentBits) - 1) << MantissaBits)));
259 }
260 
261 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
262 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<
nan(void)263     StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan(void)
264 {
265     return Float(StorageType((1ull << (ExponentBits + MantissaBits)) - 1));
266 }
267 
268 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
269 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<
largestNormal(int sign)270     StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::largestNormal(int sign)
271 {
272     DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
273     return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(
274         sign, ExponentBias, (static_cast<StorageType>(1) << (MantissaBits + 1)) - 1);
275 }
276 
277 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
278 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<
smallestNormal(int sign)279     StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::smallestNormal(int sign)
280 {
281     DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
282     return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(
283         sign, 1 - ExponentBias, (static_cast<StorageType>(1) << MantissaBits));
284 }
285 
286 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
287 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<
construct(int sign,int exponent,StorageType mantissa)288     StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(int sign, int exponent,
289                                                                              StorageType mantissa)
290 {
291     // Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
292     const bool isShorthandZero = exponent == 0 && mantissa == 0;
293 
294     // Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
295     // Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
296     const bool isDenormOrZero = (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
297     const StorageType s   = StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits + MantissaBits)));
298     const StorageType exp = (isShorthandZero || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
299 
300     DE_ASSERT(sign == +1 || sign == -1);
301     DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
302     DE_ASSERT(exp >> ExponentBits == 0);
303 
304     return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1) << MantissaBits) - 1))));
305 }
306 
307 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
308 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<
constructBits(int sign,int exponent,StorageType mantissaBits)309     StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits(int sign, int exponent,
310                                                                                  StorageType mantissaBits)
311 {
312     const StorageType signBit      = static_cast<StorageType>(sign < 0 ? 1 : 0);
313     const StorageType exponentBits = static_cast<StorageType>(exponent + ExponentBias);
314 
315     DE_ASSERT(sign == +1 || sign == -1);
316     DE_ASSERT(exponentBits >> ExponentBits == 0);
317     DE_ASSERT(mantissaBits >> MantissaBits == 0);
318 
319     return Float(
320         StorageType((signBit << (ExponentBits + MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
321 }
322 
323 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, uint32_t Flags>
324 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias,
325           uint32_t OtherFlags>
326 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits,
327                                                                           ExponentBias, Flags>::
convert(const Float<OtherStorageType,OtherExponentBits,OtherMantissaBits,OtherExponentBias,OtherFlags> & other,RoundingDirection rd)328     convert(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags> &other,
329             RoundingDirection rd)
330 {
331     if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
332     {
333         // Negative number, truncate to zero.
334         return zero(+1);
335     }
336 
337     if (other.isInf())
338     {
339         return inf(other.sign());
340     }
341 
342     if (other.isNaN())
343     {
344         return nan();
345     }
346 
347     if (other.isZero())
348     {
349         return zero(other.sign());
350     }
351 
352     const int eMin = 1 - ExponentBias;
353     const int eMax = ((1 << ExponentBits) - 2) - ExponentBias;
354 
355     const StorageType s = StorageType((StorageType(other.signBit()))
356                                       << (StorageType(ExponentBits + MantissaBits))); // \note Not sign, but sign bit.
357     int e               = other.exponent();
358     uint64_t m          = other.mantissa();
359 
360     // Normalize denormalized values prior to conversion.
361     while (!(m & (1ull << OtherMantissaBits)))
362     {
363         m <<= 1;
364         e -= 1;
365     }
366 
367     if (e < eMin)
368     {
369         // Underflow.
370         if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin - e - 1 <= MantissaBits))
371         {
372             // Shift and round.
373             int bitDiff           = (OtherMantissaBits - MantissaBits) + (eMin - e);
374             uint64_t lastBitsMask = (1ull << bitDiff) - 1ull;
375             uint64_t lastBits     = (static_cast<uint64_t>(m) & lastBitsMask);
376             uint64_t half         = (1ull << (bitDiff - 1)) - 1;
377             uint64_t bias         = (m >> bitDiff) & 1;
378 
379             switch (rd)
380             {
381             case ROUND_TO_EVEN:
382                 return Float(StorageType(s | (m + half + bias) >> bitDiff));
383 
384             case ROUND_DOWNWARD:
385                 m = (m >> bitDiff);
386                 if (lastBits != 0ull && other.sign() < 0)
387                 {
388                     m += 1;
389                 }
390                 return Float(StorageType(s | m));
391 
392             case ROUND_UPWARD:
393                 m = (m >> bitDiff);
394                 if (lastBits != 0ull && other.sign() > 0)
395                 {
396                     m += 1;
397                 }
398                 return Float(StorageType(s | m));
399 
400             case ROUND_TO_ZERO:
401                 return Float(StorageType(s | (m >> bitDiff)));
402 
403             default:
404                 DE_ASSERT(false);
405                 break;
406             }
407         }
408 
409         return zero(other.sign());
410     }
411 
412     // Remove leading 1.
413     m = m & ~(1ull << OtherMantissaBits);
414 
415     if (MantissaBits < OtherMantissaBits)
416     {
417         // Round mantissa.
418         int bitDiff           = OtherMantissaBits - MantissaBits;
419         uint64_t lastBitsMask = (1ull << bitDiff) - 1ull;
420         uint64_t lastBits     = (static_cast<uint64_t>(m) & lastBitsMask);
421         uint64_t half         = (1ull << (bitDiff - 1)) - 1;
422         uint64_t bias         = (m >> bitDiff) & 1;
423 
424         switch (rd)
425         {
426         case ROUND_TO_EVEN:
427             m = (m + half + bias) >> bitDiff;
428             break;
429 
430         case ROUND_DOWNWARD:
431             m = (m >> bitDiff);
432             if (lastBits != 0ull && other.sign() < 0)
433             {
434                 m += 1;
435             }
436             break;
437 
438         case ROUND_UPWARD:
439             m = (m >> bitDiff);
440             if (lastBits != 0ull && other.sign() > 0)
441             {
442                 m += 1;
443             }
444             break;
445 
446         case ROUND_TO_ZERO:
447             m = (m >> bitDiff);
448             break;
449 
450         default:
451             DE_ASSERT(false);
452             break;
453         }
454 
455         if (m & (1ull << MantissaBits))
456         {
457             // Overflow in mantissa.
458             m = 0;
459             e += 1;
460         }
461     }
462     else
463     {
464         int bitDiff = MantissaBits - OtherMantissaBits;
465         m           = m << bitDiff;
466     }
467 
468     if (e > eMax)
469     {
470         // Overflow.
471         return (((other.sign() < 0 && rd == ROUND_UPWARD) || (other.sign() > 0 && rd == ROUND_DOWNWARD)) ?
472                     largestNormal(other.sign()) :
473                     inf(other.sign()));
474     }
475 
476     DE_ASSERT(de::inRange(e, eMin, eMax));
477     DE_ASSERT(((e + ExponentBias) & ~((1ull << ExponentBits) - 1)) == 0);
478     DE_ASSERT((m & ~((1ull << MantissaBits) - 1)) == 0);
479 
480     return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
481 }
482 
483 typedef typename Float16::StorageType float16_t;
484 template <class F>
485 inline constexpr F floatQuietNaN = std::numeric_limits<F>::quiet_NaN();
486 template <>
487 inline constexpr float16_t floatQuietNaN<float16_t> = 0x7e01;
488 template <class F>
489 inline constexpr F floatSignalingNaN = std::numeric_limits<F>::signaling_NaN();
490 template <>
491 inline constexpr float16_t floatSignalingNaN<float16_t> = 0x7c01;
492 
493 } // namespace tcu
494 
495 #endif // _TCUFLOAT_HPP
496