1 #ifndef _TCUFLOAT_HPP
2 #define _TCUFLOAT_HPP
3 /*-------------------------------------------------------------------------
4 * drawElements Quality Program Tester Core
5 * ----------------------------------------
6 *
7 * Copyright 2014 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Reconfigurable floating-point value template.
24 *//*--------------------------------------------------------------------*/
25
26 #include "tcuDefs.hpp"
27
28 // For memcpy().
29 #include <string.h>
30
31 namespace tcu
32 {
33
34 enum FloatFlags
35 {
36 FLOAT_HAS_SIGN = (1<<0),
37 FLOAT_SUPPORT_DENORM = (1<<1)
38 };
39
40 /*--------------------------------------------------------------------*//*!
41 * \brief Floating-point format template
42 *
43 * This template implements arbitrary floating-point handling. Template
44 * can be used for conversion between different formats and checking
45 * various properties of floating-point values.
46 *//*--------------------------------------------------------------------*/
47 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
48 class Float
49 {
50 public:
51 typedef StorageType_ StorageType;
52
53 enum
54 {
55 EXPONENT_BITS = ExponentBits,
56 MANTISSA_BITS = MantissaBits,
57 EXPONENT_BIAS = ExponentBias,
58 FLAGS = Flags,
59 };
60
61 Float (void);
62 explicit Float (StorageType value);
63 explicit Float (float v);
64 explicit Float (double v);
65
66 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
67 static Float convert (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src);
68
convert(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & src)69 static inline Float convert (const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; }
70
71 /*--------------------------------------------------------------------*//*!
72 * \brief Construct floating point value
73 * \param sign Sign. Must be +1/-1
74 * \param exponent Exponent in range [1-ExponentBias, ExponentBias+1]
75 * \param mantissa Mantissa bits with implicit leading bit explicitly set
76 * \return The specified float
77 *
78 * This function constructs a floating point value from its inputs.
79 * The normally implicit leading bit of the mantissa must be explicitly set.
80 * The exponent normally used for zero/subnormals is an invalid input. Such
81 * values are specified with the leading mantissa bit of zero and the lowest
82 * normal exponent (1-ExponentBias). Additionally having both exponent and
83 * mantissa set to zero is a shorthand notation for the correctly signed
84 * floating point zero. Inf and NaN must be specified directly with an
85 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
86 * bit set)
87 *//*--------------------------------------------------------------------*/
88 static inline Float construct (int sign, int exponent, StorageType mantissa);
89
90 /*--------------------------------------------------------------------*//*!
91 * \brief Construct floating point value. Explicit version
92 * \param sign Sign. Must be +1/-1
93 * \param exponent Exponent in range [-ExponentBias, ExponentBias+1]
94 * \param mantissa Mantissa bits
95 * \return The specified float
96 *
97 * This function constructs a floating point value from its inputs with
98 * minimal intervention.
99 * The sign is turned into a sign bit and the exponent bias is added.
100 * See IEEE-754 for additional information on the inputs and
101 * the encoding of special values.
102 *//*--------------------------------------------------------------------*/
103 static Float constructBits (int sign, int exponent, StorageType mantissaBits);
104
bits(void) const105 StorageType bits (void) const { return m_value; }
106 float asFloat (void) const;
107 double asDouble (void) const;
108
signBit(void) const109 inline int signBit (void) const { return (int)(m_value >> (ExponentBits+MantissaBits)) & 1; }
exponentBits(void) const110 inline StorageType exponentBits (void) const { return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1); }
mantissaBits(void) const111 inline StorageType mantissaBits (void) const { return m_value & ((StorageType(1)<<MantissaBits)-1); }
112
sign(void) const113 inline int sign (void) const { return signBit() ? -1 : 1; }
exponent(void) const114 inline int exponent (void) const { return isDenorm() ? 1 - ExponentBias : (int)exponentBits() - ExponentBias; }
mantissa(void) const115 inline StorageType mantissa (void) const { return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits)); }
116
isInf(void) const117 inline bool isInf (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() == 0; }
isNaN(void) const118 inline bool isNaN (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() != 0; }
isZero(void) const119 inline bool isZero (void) const { return exponentBits() == 0 && mantissaBits() == 0; }
isDenorm(void) const120 inline bool isDenorm (void) const { return exponentBits() == 0 && mantissaBits() != 0; }
121
122 static Float zero (int sign);
123 static Float inf (int sign);
124 static Float nan (void);
125
126 private:
127 StorageType m_value;
128 } DE_WARN_UNUSED_TYPE;
129
130 // Common floating-point types.
131 typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float16; //!< IEEE 754-2008 16-bit floating-point value
132 typedef Float<deUint32, 8, 23, 127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float32; //!< IEEE 754 32-bit floating-point value
133 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float64; //!< IEEE 754 64-bit floating-point value
134
135 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(void)136 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
137 : m_value(0)
138 {
139 }
140
141 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(StorageType value)142 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
143 : m_value(value)
144 {
145 }
146
147 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(float value)148 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value)
149 : m_value(0)
150 {
151 deUint32 u32;
152 memcpy(&u32, &value, sizeof(deUint32));
153 *this = convert(Float32(u32));
154 }
155
156 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(double value)157 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value)
158 : m_value(0)
159 {
160 deUint64 u64;
161 memcpy(&u64, &value, sizeof(deUint64));
162 *this = convert(Float64(u64));
163 }
164
165 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asFloat(void) const166 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
167 {
168 float v;
169 deUint32 u32 = Float32::convert(*this).bits();
170 memcpy(&v, &u32, sizeof(deUint32));
171 return v;
172 }
173
174 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asDouble(void) const175 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
176 {
177 double v;
178 deUint64 u64 = Float64::convert(*this).bits();
179 memcpy(&v, &u64, sizeof(deUint64));
180 return v;
181 }
182
183 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
zero(int sign)184 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
185 {
186 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
187 return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
188 }
189
190 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
inf(int sign)191 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
192 {
193 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
194 return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
195 }
196
197 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
nan(void)198 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
199 {
200 return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
201 }
202
203 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
204 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
construct(int sign,int exponent,StorageType mantissa)205 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
206 (int sign, int exponent, StorageType mantissa)
207 {
208 // Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
209 const bool isShorthandZero = exponent == 0 && mantissa == 0;
210
211 // Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
212 // Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
213 const bool isDenormOrZero = (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
214 const StorageType s = StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
215 const StorageType exp = (isShorthandZero || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
216
217 DE_ASSERT(sign == +1 || sign == -1);
218 DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
219 DE_ASSERT(exp >> ExponentBits == 0);
220
221 return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
222 }
223
224 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
225 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
constructBits(int sign,int exponent,StorageType mantissaBits)226 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
227 (int sign, int exponent, StorageType mantissaBits)
228 {
229 const StorageType signBit = sign < 0 ? 1 : 0;
230 const StorageType exponentBits = exponent + ExponentBias;
231
232 DE_ASSERT(sign == +1 || sign == -1 );
233 DE_ASSERT(exponentBits >> ExponentBits == 0);
234 DE_ASSERT(mantissaBits >> MantissaBits == 0);
235
236 return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
237 }
238
239 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
240 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
241 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
convert(const Float<OtherStorageType,OtherExponentBits,OtherMantissaBits,OtherExponentBias,OtherFlags> & other)242 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
243 (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other)
244 {
245 if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
246 {
247 // Negative number, truncate to zero.
248 return zero(+1);
249 }
250 else if (other.isInf())
251 {
252 return inf(other.sign());
253 }
254 else if (other.isNaN())
255 {
256 return nan();
257 }
258 else if (other.isZero())
259 {
260 return zero(other.sign());
261 }
262 else
263 {
264 const int eMin = 1 - ExponentBias;
265 const int eMax = ((1<<ExponentBits)-2) - ExponentBias;
266
267 const StorageType s = StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
268 int e = other.exponent();
269 deUint64 m = other.mantissa();
270
271 // Normalize denormalized values prior to conversion.
272 while (!(m & (1ull<<OtherMantissaBits)))
273 {
274 m <<= 1;
275 e -= 1;
276 }
277
278 if (e < eMin)
279 {
280 // Underflow.
281 if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
282 {
283 // Shift and round (RTE).
284 int bitDiff = (OtherMantissaBits-MantissaBits) + (eMin-e);
285 deUint64 half = (1ull << (bitDiff - 1)) - 1;
286 deUint64 bias = (m >> bitDiff) & 1;
287
288 return Float(StorageType(s | (m + half + bias) >> bitDiff));
289 }
290 else
291 return zero(other.sign());
292 }
293 else
294 {
295 // Remove leading 1.
296 m = m & ~(1ull<<OtherMantissaBits);
297
298 if (MantissaBits < OtherMantissaBits)
299 {
300 // Round mantissa (round to nearest even).
301 int bitDiff = OtherMantissaBits-MantissaBits;
302 deUint64 half = (1ull << (bitDiff - 1)) - 1;
303 deUint64 bias = (m >> bitDiff) & 1;
304
305 m = (m + half + bias) >> bitDiff;
306
307 if (m & (1ull<<MantissaBits))
308 {
309 // Overflow in mantissa.
310 m = 0;
311 e += 1;
312 }
313 }
314 else
315 {
316 int bitDiff = MantissaBits-OtherMantissaBits;
317 m = m << bitDiff;
318 }
319
320 if (e > eMax)
321 {
322 // Overflow.
323 return inf(other.sign());
324 }
325 else
326 {
327 DE_ASSERT(de::inRange(e, eMin, eMax));
328 DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
329 DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
330
331 return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
332 }
333 }
334 }
335 }
336
337 } // tcu
338
339 #endif // _TCUFLOAT_HPP
340