• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef _TCUFLOAT_HPP
2 #define _TCUFLOAT_HPP
3 /*-------------------------------------------------------------------------
4  * drawElements Quality Program Tester Core
5  * ----------------------------------------
6  *
7  * Copyright 2014 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Reconfigurable floating-point value template.
24  *//*--------------------------------------------------------------------*/
25 
26 #include "tcuDefs.hpp"
27 
28 // For memcpy().
29 #include <string.h>
30 
31 namespace tcu
32 {
33 
34 enum FloatFlags
35 {
36 	FLOAT_HAS_SIGN			= (1<<0),
37 	FLOAT_SUPPORT_DENORM	= (1<<1)
38 };
39 
40 enum RoundingDirection
41 {
42 	ROUND_TO_EVEN = 0,
43 	ROUND_DOWNWARD,		// Towards -Inf.
44 	ROUND_UPWARD,		// Towards +Inf.
45 	ROUND_TO_ZERO
46 };
47 
48 /*--------------------------------------------------------------------*//*!
49  * \brief Floating-point format template
50  *
51  * This template implements arbitrary floating-point handling. Template
52  * can be used for conversion between different formats and checking
53  * various properties of floating-point values.
54  *//*--------------------------------------------------------------------*/
55 template <typename StorageType_, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
56 class Float
57 {
58 public:
59 	typedef StorageType_ StorageType;
60 
61 	enum
62 	{
63 		EXPONENT_BITS	= ExponentBits,
64 		MANTISSA_BITS	= MantissaBits,
65 		EXPONENT_BIAS	= ExponentBias,
66 		FLAGS			= Flags,
67 	};
68 
69 							Float			(void);
70 	explicit				Float			(StorageType value);
71 	explicit				Float			(float v, RoundingDirection rd = ROUND_TO_EVEN);
72 	explicit				Float			(double v, RoundingDirection rd = ROUND_TO_EVEN);
73 
74 	template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
75 	static Float			convert			(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& src, RoundingDirection rd = ROUND_TO_EVEN);
76 
convert(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & src,RoundingDirection=ROUND_TO_EVEN)77 	static inline Float		convert			(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& src, RoundingDirection = ROUND_TO_EVEN) { return src; }
78 
79 	/*--------------------------------------------------------------------*//*!
80 	 * \brief Construct floating point value
81 	 * \param sign		Sign. Must be +1/-1
82 	 * \param exponent	Exponent in range [1-ExponentBias, ExponentBias+1]
83 	 * \param mantissa	Mantissa bits with implicit leading bit explicitly set
84 	 * \return The specified float
85 	 *
86 	 * This function constructs a floating point value from its inputs.
87 	 * The normally implicit leading bit of the mantissa must be explicitly set.
88 	 * The exponent normally used for zero/subnormals is an invalid input. Such
89 	 * values are specified with the leading mantissa bit of zero and the lowest
90 	 * normal exponent (1-ExponentBias). Additionally having both exponent and
91 	 * mantissa set to zero is a shorthand notation for the correctly signed
92 	 * floating point zero. Inf and NaN must be specified directly with an
93 	 * exponent of ExponentBias+1 and the appropriate mantissa (with leading
94 	 * bit set)
95 	 *//*--------------------------------------------------------------------*/
96 	static inline Float		construct		(int sign, int exponent, StorageType mantissa);
97 
98 	/*--------------------------------------------------------------------*//*!
99 	 * \brief Construct floating point value. Explicit version
100 	 * \param sign		Sign. Must be +1/-1
101 	 * \param exponent	Exponent in range [-ExponentBias, ExponentBias+1]
102 	 * \param mantissa	Mantissa bits
103 	 * \return The specified float
104 	 *
105 	 * This function constructs a floating point value from its inputs with
106 	 * minimal intervention.
107 	 * The sign is turned into a sign bit and the exponent bias is added.
108 	 * See IEEE-754 for additional information on the inputs and
109 	 * the encoding of special values.
110 	 *//*--------------------------------------------------------------------*/
111 	static Float			constructBits	(int sign, int exponent, StorageType mantissaBits);
112 
bits(void) const113 	StorageType				bits			(void) const	{ return m_value;															}
114 	float					asFloat			(void) const;
115 	double					asDouble		(void) const;
116 
signBit(void) const117 	inline int				signBit			(void) const	{ return (int)(m_value >> (ExponentBits+MantissaBits)) & 1;					}
exponentBits(void) const118 	inline StorageType		exponentBits	(void) const	{ return (m_value >> MantissaBits) & ((StorageType(1)<<ExponentBits)-1);	}
mantissaBits(void) const119 	inline StorageType		mantissaBits	(void) const	{ return m_value & ((StorageType(1)<<MantissaBits)-1);						}
120 
sign(void) const121 	inline int				sign			(void) const	{ return signBit() ? -1 : 1;																			}
exponent(void) const122 	inline int				exponent		(void) const	{ return isDenorm() ? 1	- ExponentBias : (int)exponentBits() - ExponentBias;							}
mantissa(void) const123 	inline StorageType		mantissa		(void) const	{ return isZero() || isDenorm() ? mantissaBits() : (mantissaBits() | (StorageType(1)<<MantissaBits));	}
124 
isInf(void) const125 	inline bool				isInf			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() == 0;	}
isNaN(void) const126 	inline bool				isNaN			(void) const	{ return exponentBits() == ((1<<ExponentBits)-1)	&& mantissaBits() != 0;	}
isZero(void) const127 	inline bool				isZero			(void) const	{ return exponentBits() == 0						&& mantissaBits() == 0;	}
isDenorm(void) const128 	inline bool				isDenorm		(void) const	{ return exponentBits() == 0						&& mantissaBits() != 0;	}
129 
operator <(const Float<StorageType,ExponentBits,MantissaBits,ExponentBias,Flags> & other) const130 	inline bool				operator<		(const Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>& other) const { return this->asDouble() < other.asDouble(); }
131 
132 	static Float			zero			(int sign);
133 	static Float			inf				(int sign);
134 	static Float			nan				(void);
135 
136 	static Float			largestNormal	(int sign);
137 	static Float			smallestNormal	(int sign);
138 
139 private:
140 	StorageType				m_value;
141 } DE_WARN_UNUSED_TYPE;
142 
143 // Common floating-point types.
144 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float16;	//!< IEEE 754-2008 16-bit floating-point value
145 typedef Float<deUint32,  8, 23,  127, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float32;	//!< IEEE 754 32-bit floating-point value
146 typedef Float<deUint64, 11, 52, 1023, FLOAT_HAS_SIGN|FLOAT_SUPPORT_DENORM>	Float64;	//!< IEEE 754 64-bit floating-point value
147 
148 typedef Float<deUint16,  5, 10,   15, FLOAT_HAS_SIGN>	Float16Denormless;	//!< IEEE 754-2008 16-bit floating-point value without denormalized support
149 
150 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(void)151 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (void)
152 	: m_value(0)
153 {
154 }
155 
156 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(StorageType value)157 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (StorageType value)
158 	: m_value(value)
159 {
160 }
161 
162 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(float value,RoundingDirection rd)163 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (float value, RoundingDirection rd)
164 	: m_value(0)
165 {
166 	deUint32 u32;
167 	memcpy(&u32, &value, sizeof(deUint32));
168 	*this = convert(Float32(u32), rd);
169 }
170 
171 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
Float(double value,RoundingDirection rd)172 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::Float (double value, RoundingDirection rd)
173 	: m_value(0)
174 {
175 	deUint64 u64;
176 	memcpy(&u64, &value, sizeof(deUint64));
177 	*this = convert(Float64(u64), rd);
178 }
179 
180 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asFloat(void) const181 inline float Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asFloat (void) const
182 {
183 	float		v;
184 	deUint32	u32		= Float32::convert(*this).bits();
185 	memcpy(&v, &u32, sizeof(deUint32));
186 	return v;
187 }
188 
189 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
asDouble(void) const190 inline double Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::asDouble (void) const
191 {
192 	double		v;
193 	deUint64	u64		= Float64::convert(*this).bits();
194 	memcpy(&v, &u64, sizeof(deUint64));
195 	return v;
196 }
197 
198 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
zero(int sign)199 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::zero (int sign)
200 {
201 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
202 	return Float(StorageType((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)));
203 }
204 
205 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
inf(int sign)206 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::inf (int sign)
207 {
208 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
209 	return Float(StorageType(((sign > 0 ? 0ull : 1ull) << (ExponentBits+MantissaBits)) | (((1ull<<ExponentBits)-1) << MantissaBits)));
210 }
211 
212 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
nan(void)213 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::nan (void)
214 {
215 	return Float(StorageType((1ull<<(ExponentBits+MantissaBits))-1));
216 }
217 
218 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
largestNormal(int sign)219 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::largestNormal (int sign)
220 {
221 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
222 	return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(sign, ExponentBias, (static_cast<StorageType>(1) << (MantissaBits + 1)) - 1);
223 }
224 
225 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
smallestNormal(int sign)226 inline Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags> Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::smallestNormal (int sign)
227 {
228 	DE_ASSERT(sign == 1 || ((Flags & FLOAT_HAS_SIGN) && sign == -1));
229 	return Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct(sign, 1 - ExponentBias, (static_cast<StorageType>(1) << MantissaBits));
230 }
231 
232 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
233 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
construct(int sign,int exponent,StorageType mantissa)234 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::construct
235 	(int sign, int exponent, StorageType mantissa)
236 {
237 	// Repurpose this otherwise invalid input as a shorthand notation for zero (no need for caller to care about internal representation)
238 	const bool			isShorthandZero	= exponent == 0 && mantissa == 0;
239 
240 	// Handles the typical notation for zero (min exponent, mantissa 0). Note that the exponent usually used exponent (-ExponentBias) for zero/subnormals is not used.
241 	// Instead zero/subnormals have the (normally implicit) leading mantissa bit set to zero.
242 	const bool			isDenormOrZero	= (exponent == 1 - ExponentBias) && (mantissa >> MantissaBits == 0);
243 	const StorageType	s				= StorageType((StorageType(sign < 0 ? 1 : 0)) << (StorageType(ExponentBits+MantissaBits)));
244 	const StorageType	exp				= (isShorthandZero  || isDenormOrZero) ? StorageType(0) : StorageType(exponent + ExponentBias);
245 
246 	DE_ASSERT(sign == +1 || sign == -1);
247 	DE_ASSERT(isShorthandZero || isDenormOrZero || mantissa >> MantissaBits == 1);
248 	DE_ASSERT(exp >> ExponentBits == 0);
249 
250 	return Float(StorageType(s | (exp << MantissaBits) | (mantissa & ((StorageType(1)<<MantissaBits)-1))));
251 }
252 
253 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
254 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
constructBits(int sign,int exponent,StorageType mantissaBits)255 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::constructBits
256 	(int sign, int exponent, StorageType mantissaBits)
257 {
258 	const StorageType signBit		= static_cast<StorageType>(sign < 0 ? 1 : 0);
259 	const StorageType exponentBits	= static_cast<StorageType>(exponent + ExponentBias);
260 
261 	DE_ASSERT(sign == +1 || sign == -1 );
262 	DE_ASSERT(exponentBits >> ExponentBits == 0);
263 	DE_ASSERT(mantissaBits >> MantissaBits == 0);
264 
265 	return Float(StorageType((signBit << (ExponentBits+MantissaBits)) | (exponentBits << MantissaBits) | (mantissaBits)));
266 }
267 
268 template <typename StorageType, int ExponentBits, int MantissaBits, int ExponentBias, deUint32 Flags>
269 template <typename OtherStorageType, int OtherExponentBits, int OtherMantissaBits, int OtherExponentBias, deUint32 OtherFlags>
270 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>
convert(const Float<OtherStorageType,OtherExponentBits,OtherMantissaBits,OtherExponentBias,OtherFlags> & other,RoundingDirection rd)271 Float<StorageType, ExponentBits, MantissaBits, ExponentBias, Flags>::convert
272 	(const Float<OtherStorageType, OtherExponentBits, OtherMantissaBits, OtherExponentBias, OtherFlags>& other, RoundingDirection rd)
273 {
274 	if (!(Flags & FLOAT_HAS_SIGN) && other.sign() < 0)
275 	{
276 		// Negative number, truncate to zero.
277 		return zero(+1);
278 	}
279 
280 	if (other.isInf())
281 	{
282 		return inf(other.sign());
283 	}
284 
285 	if (other.isNaN())
286 	{
287 		return nan();
288 	}
289 
290 	if (other.isZero())
291 	{
292 		return zero(other.sign());
293 	}
294 
295 	const int			eMin	= 1 - ExponentBias;
296 	const int			eMax	= ((1<<ExponentBits)-2) - ExponentBias;
297 
298 	const StorageType	s		= StorageType((StorageType(other.signBit())) << (StorageType(ExponentBits+MantissaBits))); // \note Not sign, but sign bit.
299 	int					e		= other.exponent();
300 	deUint64			m		= other.mantissa();
301 
302 	// Normalize denormalized values prior to conversion.
303 	while (!(m & (1ull<<OtherMantissaBits)))
304 	{
305 		m <<= 1;
306 		e  -= 1;
307 	}
308 
309 	if (e < eMin)
310 	{
311 		// Underflow.
312 		if ((Flags & FLOAT_SUPPORT_DENORM) && (eMin-e-1 <= MantissaBits))
313 		{
314 			// Shift and round.
315 			int			bitDiff			= (OtherMantissaBits-MantissaBits) + (eMin-e);
316 			deUint64	lastBitsMask	= (1ull << bitDiff) - 1ull;
317 			deUint64	lastBits		= (static_cast<deUint64>(m) & lastBitsMask);
318 			deUint64	half			= (1ull << (bitDiff - 1)) - 1;
319 			deUint64	bias			= (m >> bitDiff) & 1;
320 
321 			switch (rd)
322 			{
323 			case ROUND_TO_EVEN:
324 				return Float(StorageType(s | (m + half + bias) >> bitDiff));
325 
326 			case ROUND_DOWNWARD:
327 				m = (m >> bitDiff);
328 				if (lastBits != 0ull && other.sign() < 0)
329 				{
330 					m += 1;
331 				}
332 				return Float(StorageType(s | m));
333 
334 			case ROUND_UPWARD:
335 				m = (m >> bitDiff);
336 				if (lastBits != 0ull && other.sign() > 0)
337 				{
338 					m += 1;
339 				}
340 				return Float(StorageType(s | m));
341 
342 			case ROUND_TO_ZERO:
343 				return Float(StorageType(s | (m >> bitDiff)));
344 
345 			default:
346 				DE_ASSERT(false);
347 				break;
348 			}
349 		}
350 
351 		return zero(other.sign());
352 	}
353 
354 	// Remove leading 1.
355 	m = m & ~(1ull<<OtherMantissaBits);
356 
357 	if (MantissaBits < OtherMantissaBits)
358 	{
359 		// Round mantissa.
360 		int			bitDiff			= OtherMantissaBits-MantissaBits;
361 		deUint64	lastBitsMask	= (1ull << bitDiff) - 1ull;
362 		deUint64	lastBits		= (static_cast<deUint64>(m) & lastBitsMask);
363 		deUint64	half			= (1ull << (bitDiff - 1)) - 1;
364 		deUint64	bias			= (m >> bitDiff) & 1;
365 
366 		switch (rd)
367 		{
368 		case ROUND_TO_EVEN:
369 			m = (m + half + bias) >> bitDiff;
370 			break;
371 
372 		case ROUND_DOWNWARD:
373 			m = (m >> bitDiff);
374 			if (lastBits != 0ull && other.sign() < 0)
375 			{
376 				m += 1;
377 			}
378 			break;
379 
380 		case ROUND_UPWARD:
381 			m = (m >> bitDiff);
382 			if (lastBits != 0ull && other.sign() > 0)
383 			{
384 				m += 1;
385 			}
386 			break;
387 
388 		case ROUND_TO_ZERO:
389 			m = (m >> bitDiff);
390 			break;
391 
392 		default:
393 			DE_ASSERT(false);
394 			break;
395 		}
396 
397 		if (m & (1ull<<MantissaBits))
398 		{
399 			// Overflow in mantissa.
400 			m  = 0;
401 			e += 1;
402 		}
403 	}
404 	else
405 	{
406 		int bitDiff = MantissaBits-OtherMantissaBits;
407 		m = m << bitDiff;
408 	}
409 
410 	if (e > eMax)
411 	{
412 		// Overflow.
413 		return (((other.sign() < 0 && rd == ROUND_UPWARD) || (other.sign() > 0 && rd == ROUND_DOWNWARD)) ? largestNormal(other.sign()) : inf(other.sign()));
414 	}
415 
416 	DE_ASSERT(de::inRange(e, eMin, eMax));
417 	DE_ASSERT(((e + ExponentBias) & ~((1ull<<ExponentBits)-1)) == 0);
418 	DE_ASSERT((m & ~((1ull<<MantissaBits)-1)) == 0);
419 
420 	return Float(StorageType(s | (StorageType(e + ExponentBias) << MantissaBits) | m));
421 }
422 
423 } // tcu
424 
425 #endif // _TCUFLOAT_HPP
426