• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * drawElements Base Portability Library
3  * -------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief 16-bit floating-point math.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "deFloat16.h"
25 
26 DE_BEGIN_EXTERN_C
27 
deFloat32To16(float val32)28 deFloat16 deFloat32To16 (float val32)
29 {
30 	deUint32	sign;
31 	int			expotent;
32 	deUint32	mantissa;
33 	union
34 	{
35 		float		f;
36 		deUint32	u;
37 	} x;
38 
39 	x.f			= val32;
40 	sign		= (x.u >> 16u) & 0x00008000u;
41 	expotent	= (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15);
42 	mantissa	= x.u & 0x007fffffu;
43 
44 	if (expotent <= 0)
45 	{
46 		if (expotent < -10)
47 		{
48 			/* Rounds to zero. */
49 			return (deFloat16) sign;
50 		}
51 
52 		/* Converted to denormalized half, add leading 1 to significand. */
53 		mantissa = mantissa | 0x00800000u;
54 
55 		/* Round mantissa to nearest (10+e) */
56 		{
57 			deUint32 t = 14u - expotent;
58 			deUint32 a = (1u << (t - 1u)) - 1u;
59 			deUint32 b = (mantissa >> t) & 1u;
60 
61 			mantissa = (mantissa + a + b) >> t;
62 		}
63 
64 		return (deFloat16) (sign | mantissa);
65 	}
66 	else if (expotent == 0xff - (127 - 15))
67 	{
68 		if (mantissa == 0u)
69 		{
70 			/* InF */
71 			return (deFloat16) (sign | 0x7c00u);
72 		}
73 		else
74 		{
75 			/* NaN */
76 			mantissa >>= 13u;
77 			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
78 		}
79 	}
80 	else
81 	{
82 		/* Normalized float. */
83 		mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u);
84 
85 		if (mantissa & 0x00800000u)
86 		{
87 			/* Overflow in mantissa. */
88 			mantissa  = 0u;
89 			expotent += 1;
90 		}
91 
92 		if (expotent > 30)
93 		{
94 			/* \todo [pyry] Cause hw fp overflow */
95 			return (deFloat16) (sign | 0x7c00u);
96 		}
97 
98 		return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u));
99 	}
100 }
101 
102 /*--------------------------------------------------------------------*//*!
103  * \brief Round the given number `val` to nearest even by discarding
104  *        the last `numBitsToDiscard` bits.
105  * \param val value to round
106  * \param numBitsToDiscard number of (least significant) bits to discard
107  * \return The rounded value with the last `numBitsToDiscard` removed
108  *//*--------------------------------------------------------------------*/
roundToNearestEven(deUint32 val,const deUint32 numBitsToDiscard)109 static deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard)
110 {
111 	const deUint32	lastBits	= val & ((1 << numBitsToDiscard) - 1);
112 	const deUint32	headBit		= val & (1 << (numBitsToDiscard - 1));
113 
114 	DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32);	/* Make sure no overflow. */
115 	val >>= numBitsToDiscard;
116 
117 	if (headBit == 0)
118 	{
119 		return val;
120 	}
121 	else if (headBit == lastBits)
122 	{
123 		if ((val & 0x1) == 0x1)
124 		{
125 			return val + 1;
126 		}
127 		else
128 		{
129 			return val;
130 		}
131 	}
132 	else
133 	{
134 		return val + 1;
135 	}
136 }
137 
deFloat32To16Round(float val32,deRoundingMode mode)138 deFloat16 deFloat32To16Round (float val32, deRoundingMode mode)
139 {
140 	union
141 	{
142 		float		f;		/* Interpret as 32-bit float */
143 		deUint32	u;		/* Interpret as 32-bit unsigned integer */
144 	} x;
145 	deUint32	sign;		/* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
146 	deUint32	exp32;		/* exp32: biased exponent for 32-bit floats */
147 	int			exp16;		/* exp16: biased exponent for 16-bit floats */
148 	deUint32	mantissa;
149 
150 	/* We only support these two rounding modes for now */
151 	DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
152 
153 	x.f			= val32;
154 	sign		= (x.u >> 16u) & 0x00008000u;
155 	exp32		= (x.u >> 23u) & 0x000000ffu;
156 	exp16		= (int) (exp32) - 127 + 15;	/* 15/127: exponent bias for 16-bit/32-bit floats */
157 	mantissa	= x.u & 0x007fffffu;
158 
159 	/* Case: zero and denormalized floats */
160 	if (exp32 == 0)
161 	{
162 		/* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */
163 		return (deFloat16) sign;
164 	}
165 	/* Case: Inf and NaN */
166 	else if (exp32 == 0x000000ffu)
167 	{
168 		if (mantissa == 0u)
169 		{
170 			/* Inf */
171 			return (deFloat16) (sign | 0x7c00u);
172 		}
173 		else
174 		{
175 			/* NaN */
176 			mantissa >>= 13u;	/* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */
177 			/* Make sure we don't turn NaN into zero by | (mantissa == 0). */
178 			return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
179 		}
180 	}
181 	/* The following are cases for normalized floats.
182 	 *
183 	 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
184 	 *   we can only shift the mantissa further right.
185 	 *   The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
186 	 *   Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
187 	 *   So, we just need to right shift the mantissa -exp16 bits.
188 	 * * If exp16 is 0, mantissa shifting requirement is similar to the above.
189 	 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
190 	 */
191 	/* Case: normalized floats -> zero */
192 	else if (exp16 < -10)
193 	{
194 		/* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
195 		/* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
196 		return (deFloat16) sign;
197 	}
198 	/* Case: normalized floats -> zero and denormalized halfs */
199 	else if (exp16 <= 0)
200 	{
201 		/* Add the implicit leading 1 in mormalized float to mantissa. */
202 		mantissa |= 0x00800000u;
203 		/* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
204 		 * Need to discard the last 14-bits considering rounding mode.
205 		 * We also need to shift right -exp16 bits to encode the underflowed exponent.
206 		 */
207 		if (mode == DE_ROUNDINGMODE_TO_ZERO)
208 		{
209 			mantissa >>= (14 - exp16);
210 		}
211 		else
212 		{
213 			/* mantissa in the above may exceed 10-bits, in which case overflow happens.
214 			 * The overflowed bit is automatically carried to exponent then.
215 			 */
216 			mantissa = roundToNearestEven(mantissa, 14 - exp16);
217 		}
218 		return (deFloat16) (sign | mantissa);
219 	}
220 	/* Case: normalized floats -> normalized floats */
221 	else if (exp16 <= 30)
222 	{
223 		if (mode == DE_ROUNDINGMODE_TO_ZERO)
224 		{
225 			return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u));
226 		}
227 		else
228 		{
229 			mantissa	= roundToNearestEven(mantissa, 13);
230 			/* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
231 			exp16		= (exp16 << 10u) + (mantissa & (1 << 10));
232 			mantissa	&= (1u << 10) - 1;
233 			return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
234 		}
235 	}
236 	/* Case: normalized floats (too large to be representable as 16-bit floats) */
237 	else
238 	{
239 		/* According to IEEE Std 754-2008 Section 7.4,
240 		 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
241 		 *   of the intermediate  result.
242 		 * * roundTowardZero carries all overflows to the format’s largest finite number
243 		 *   with the sign of the intermediate result.
244 		 */
245 		if (mode == DE_ROUNDINGMODE_TO_ZERO)
246 		{
247 			return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
248 		}
249 		else
250 		{
251 			return (deFloat16) (sign | (0x1f << 10));
252 		}
253 	}
254 
255 	/* Make compiler happy */
256 	return (deFloat16) 0;
257 }
258 
deFloat16To32(deFloat16 val16)259 float deFloat16To32 (deFloat16 val16)
260 {
261 	deUint32 sign;
262 	deUint32 expotent;
263 	deUint32 mantissa;
264 	union
265 	{
266 		float		f;
267 		deUint32	u;
268 	} x;
269 
270 	x.u			= 0u;
271 
272 	sign		= ((deUint32)val16 >> 15u) & 0x00000001u;
273 	expotent	= ((deUint32)val16 >> 10u) & 0x0000001fu;
274 	mantissa	= (deUint32)val16 & 0x000003ffu;
275 
276 	if (expotent == 0u)
277 	{
278 		if (mantissa == 0u)
279 		{
280 			/* +/- 0 */
281 			x.u = sign << 31u;
282 			return x.f;
283 		}
284 		else
285 		{
286 			/* Denormalized, normalize it. */
287 
288 			while (!(mantissa & 0x00000400u))
289 			{
290 				mantissa <<= 1u;
291 				expotent -=  1u;
292 			}
293 
294 			expotent += 1u;
295 			mantissa &= ~0x00000400u;
296 		}
297 	}
298 	else if (expotent == 31u)
299 	{
300 		if (mantissa == 0u)
301 		{
302 			/* +/- InF */
303 			x.u = (sign << 31u) | 0x7f800000u;
304 			return x.f;
305 		}
306 		else
307 		{
308 			/* +/- NaN */
309 			x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u);
310 			return x.f;
311 		}
312 	}
313 
314 	expotent = expotent + (127u - 15u);
315 	mantissa = mantissa << 13u;
316 
317 	x.u = (sign << 31u) | (expotent << 23u) | mantissa;
318 	return x.f;
319 }
320 
321 DE_END_EXTERN_C
322