1 #ifndef _TCUFLOAT_HPP
2 #define _TCUFLOAT_HPP
3 /*-------------------------------------------------------------------------
4 * drawElements Quality Program Tester Core
5 * ----------------------------------------
6 *
7 * Copyright 2014 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Reconfigurable floating-point value template.
24 *//*--------------------------------------------------------------------*/
25
26 #include "tcuDefs.hpp"
27
28 // For memcpy().
29 #include <string.h>
30
31 namespace tcu
32 {
33
34 enum FloatFlags
35 {
36 FLOAT_HAS_SIGN = (1<<0),
37 FLOAT_SUPPORT_DENORM = (1<<1)
38 };
39
40 /*--------------------------------------------------------------------*//*!
41 * \brief Floating-point format template
42 *
43 * This template implements arbitrary floating-point handling. Template
44 * can be used for conversion between different formats and checking
45 * various properties of floating-point values.
46 *//*--------------------------------------------------------------------*/
47 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
48 class Float
49 {
50 public:
51 typedef StorageType_ StorageType;
52
53 enum
54 {
55 EXPONENT_BITS = ExponentBits,
56 MANTISSA_BITS = MantissaBits,
57 EXPONENT_BIAS = ExponentBias,
58 FLAGS = Flags,
59 };
60
61 Float (void);
62 explicit Float (StorageType value);
63 explicit Float (float v);
64 explicit Float (double v);
65
66 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
67 static Float convert (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src);
68
convert(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & src)69 static inline Float convert (const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src) { return src; }
70
71 /*--------------------------------------------------------------------*//*!
72 * \brief Construct floating point value
73 * \param sign Sign. Must be +1/-1
74 * \param exponent Exponent in range [1-ExponentBias, ExponentBias+1]
75 * \param mantissa Mantissa bits with implicit leading bit explicitly set
76 * \return The specified float
77 *
78 * This function constructs a floating point value from its inputs.
79 * The normally implicit leading bit of the mantissa must be explicitly set.
80 * The exponent normally used for zero/subnormals is an invalid input. Such
81 * values are specified with the leading mantissa bit of zero and the lowest
82 * normal exponent (1-ExponentBias). Additionally having both exponent and
83 * mantissa set to zero is a shorthand notation for the correctly signed
84 * floating point zero. Inf and NaN must be specified directly with an
85 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
86 * bit set)
87 *//*--------------------------------------------------------------------*/
88 static inline Float construct (int sign, int exponent, StorageType mantissa);
89
90 /*--------------------------------------------------------------------*//*!
91 * \brief Construct floating point value. Explicit version
92 * \param sign Sign. Must be +1/-1
93 * \param exponent Exponent in range [-ExponentBias, ExponentBias+1]
94 * \param mantissa Mantissa bits
95 * \return The specified float
96 *
97 * This function constructs a floating point value from its inputs with
98 * minimal intervention.
99 * The sign is turned into a sign bit and the exponent bias is added.
100 * See IEEE-754 for additional information on the inputs and
101 * the encoding of special values.
102 *//*--------------------------------------------------------------------*/
103 static Float constructBits (int sign, int exponent, StorageType mantissaBits);
104
bits(void) const105 StorageType bits (void) const { return m_value; }
106 float asFloat (void) const;
107 double asDouble (void) const;
108
signBit(void) const109 inline int signBit (void) const { return (int)(m_value >> (ExponentBits+MantissaBits)) & 1; }
exponentBits(void) const110 inline StorageType exponentBits (void) const { return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1); }
mantissaBits(void) const111 inline StorageType mantissaBits (void) const { return m_value & ((StorageType(1)<<MantissaBits)-1); }
112
sign(void) const113 inline int sign (void) const { return signBit() ? -1 : 1; }
exponent(void) const114 inline int exponent (void) const { return isDenorm() ? 1 - ExponentBias : (int)exponentBits() - ExponentBias; }
mantissa(void) const115 inline StorageType mantissa (void) const { return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits)); }
116
isInf(void) const117 inline bool isInf (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() == 0; }
isNaN(void) const118 inline bool isNaN (void) const { return exponentBits() == ((1<<ExponentBits)-1) && mantissaBits() != 0; }
isZero(void) const119 inline bool isZero (void) const { return exponentBits() == 0 && mantissaBits() == 0; }
isDenorm(void) const120 inline bool isDenorm (void) const { return exponentBits() == 0 && mantissaBits() != 0; }
121
122 static Float zero (int sign);
123 static Float inf (int sign);
124 static Float nan (void);
125
126 private:
127 StorageType m_value;
128 } DE_WARN_UNUSED_TYPE;
129
130 // Common floating-point types.
131 typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float16; //!< IEEE 754-2008 16-bit floating-point value
132 typedef Float<deUint32, 8, 23, 127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float32; //!< IEEE 754 32-bit floating-point value
133 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM> Float64; //!< IEEE 754 64-bit floating-point value
134
135 typedef Float<deUint16, 5, 10, 15, FLOAT_HAS_SIGN> Float16Denormless; //!< IEEE 754-2008 16-bit floating-point value without denormalized support
136
137 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(void)138 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
139 : m_value(0)
140 {
141 }
142
143 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(StorageType value)144 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
145 : m_value(value)
146 {
147 }
148
149 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(float value)150 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value)
151 : m_value(0)
152 {
153 deUint32 u32;
154 memcpy(&u32, &value, sizeof(deUint32));
155 *this = convert(Float32(u32));
156 }
157
158 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(double value)159 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value)
160 : m_value(0)
161 {
162 deUint64 u64;
163 memcpy(&u64, &value, sizeof(deUint64));
164 *this = convert(Float64(u64));
165 }
166
167 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asFloat(void) const168 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
169 {
170 float v;
171 deUint32 u32 = Float32::convert(*this).bits();
172 memcpy(&v, &u32, sizeof(deUint32));
173 return v;
174 }
175
176 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asDouble(void) const177 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
178 {
179 double v;
180 deUint64 u64 = Float64::convert(*this).bits();
181 memcpy(&v, &u64, sizeof(deUint64));
182 return v;
183 }
184
185 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
zero(int sign)186 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
187 {
188 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
189 return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
190 }
191
192 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
inf(int sign)193 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
194 {
195 DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
196 return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
197 }
198
199 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
nan(void)200 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
201 {
202 return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
203 }
204
205 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
206 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
construct(int sign,int exponent,StorageType mantissa)207 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
208 (int sign, int exponent, StorageType mantissa)
209 {
210 // Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
211 const bool isShorthandZero = exponent == 0 && mantissa == 0;
212
213 // Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
214 // Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
215 const bool isDenormOrZero = (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
216 const StorageType s = StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
217 const StorageType exp = (isShorthandZero || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
218
219 DE_ASSERT(sign == +1 || sign == -1);
220 DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
221 DE_ASSERT(exp >> ExponentBits == 0);
222
223 return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
224 }
225
226 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
227 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
constructBits(int sign,int exponent,StorageType mantissaBits)228 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
229 (int sign, int exponent, StorageType mantissaBits)
230 {
231 const StorageType signBit = static_cast<StorageType>(sign < 0 ? 1 : 0);
232 const StorageType exponentBits = static_cast<StorageType>(exponent + ExponentBias);
233
234 DE_ASSERT(sign == +1 || sign == -1 );
235 DE_ASSERT(exponentBits >> ExponentBits == 0);
236 DE_ASSERT(mantissaBits >> MantissaBits == 0);
237
238 return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
239 }
240
241 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
242 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
243 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
convert(const Float<OtherStorageType,OtherExponentBits,OtherMantissaBits,OtherExponentBias,OtherFlags> & other)244 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
245 (const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other)
246 {
247 if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
248 {
249 // Negative number, truncate to zero.
250 return zero(+1);
251 }
252 else if (other.isInf())
253 {
254 return inf(other.sign());
255 }
256 else if (other.isNaN())
257 {
258 return nan();
259 }
260 else if (other.isZero())
261 {
262 return zero(other.sign());
263 }
264 else
265 {
266 const int eMin = 1 - ExponentBias;
267 const int eMax = ((1<<ExponentBits)-2) - ExponentBias;
268
269 const StorageType s = StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
270 int e = other.exponent();
271 deUint64 m = other.mantissa();
272
273 // Normalize denormalized values prior to conversion.
274 while (!(m & (1ull<<OtherMantissaBits)))
275 {
276 m <<= 1;
277 e -= 1;
278 }
279
280 if (e < eMin)
281 {
282 // Underflow.
283 if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
284 {
285 // Shift and round (RTE).
286 int bitDiff = (OtherMantissaBits-MantissaBits) + (eMin-e);
287 deUint64 half = (1ull << (bitDiff - 1)) - 1;
288 deUint64 bias = (m >> bitDiff) & 1;
289
290 return Float(StorageType(s | (m + half + bias) >> bitDiff));
291 }
292 else
293 return zero(other.sign());
294 }
295 else
296 {
297 // Remove leading 1.
298 m = m & ~(1ull<<OtherMantissaBits);
299
300 if (MantissaBits < OtherMantissaBits)
301 {
302 // Round mantissa (round to nearest even).
303 int bitDiff = OtherMantissaBits-MantissaBits;
304 deUint64 half = (1ull << (bitDiff - 1)) - 1;
305 deUint64 bias = (m >> bitDiff) & 1;
306
307 m = (m + half + bias) >> bitDiff;
308
309 if (m & (1ull<<MantissaBits))
310 {
311 // Overflow in mantissa.
312 m = 0;
313 e += 1;
314 }
315 }
316 else
317 {
318 int bitDiff = MantissaBits-OtherMantissaBits;
319 m = m << bitDiff;
320 }
321
322 if (e > eMax)
323 {
324 // Overflow.
325 return inf(other.sign());
326 }
327 else
328 {
329 DE_ASSERT(de::inRange(e, eMin, eMax));
330 DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
331 DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
332
333 return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
334 }
335 }
336 }
337 }
338
339 } // tcu
340
341 #endif // _TCUFLOAT_HPP
342