1 /*-------------------------------------------------------------------------
2 * drawElements Base Portability Library
3 * -------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief 16-bit floating-point math.
22 *//*--------------------------------------------------------------------*/
23
24 #include "deFloat16.h"
25
26 DE_BEGIN_EXTERN_C
27
deFloat32To16(float val32)28 deFloat16 deFloat32To16 (float val32)
29 {
30 deUint32 sign;
31 int expotent;
32 deUint32 mantissa;
33 union
34 {
35 float f;
36 deUint32 u;
37 } x;
38
39 x.f = val32;
40 sign = (x.u >> 16u) & 0x00008000u;
41 expotent = (int)((x.u >> 23u) & 0x000000ffu) - (127 - 15);
42 mantissa = x.u & 0x007fffffu;
43
44 if (expotent <= 0)
45 {
46 if (expotent < -10)
47 {
48 /* Rounds to zero. */
49 return (deFloat16) sign;
50 }
51
52 /* Converted to denormalized half, add leading 1 to significand. */
53 mantissa = mantissa | 0x00800000u;
54
55 /* Round mantissa to nearest (10+e) */
56 {
57 deUint32 t = 14u - expotent;
58 deUint32 a = (1u << (t - 1u)) - 1u;
59 deUint32 b = (mantissa >> t) & 1u;
60
61 mantissa = (mantissa + a + b) >> t;
62 }
63
64 return (deFloat16) (sign | mantissa);
65 }
66 else if (expotent == 0xff - (127 - 15))
67 {
68 if (mantissa == 0u)
69 {
70 /* InF */
71 return (deFloat16) (sign | 0x7c00u);
72 }
73 else
74 {
75 /* NaN */
76 mantissa >>= 13u;
77 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
78 }
79 }
80 else
81 {
82 /* Normalized float. */
83 mantissa = mantissa + 0x00000fffu + ((mantissa >> 13u) & 1u);
84
85 if (mantissa & 0x00800000u)
86 {
87 /* Overflow in mantissa. */
88 mantissa = 0u;
89 expotent += 1;
90 }
91
92 if (expotent > 30)
93 {
94 /* \todo [pyry] Cause hw fp overflow */
95 return (deFloat16) (sign | 0x7c00u);
96 }
97
98 return (deFloat16) (sign | ((deUint32)expotent << 10u) | (mantissa >> 13u));
99 }
100 }
101
102 /*--------------------------------------------------------------------*//*!
103 * \brief Round the given number `val` to nearest even by discarding
104 * the last `numBitsToDiscard` bits.
105 * \param val value to round
106 * \param numBitsToDiscard number of (least significant) bits to discard
107 * \return The rounded value with the last `numBitsToDiscard` removed
108 *//*--------------------------------------------------------------------*/
roundToNearestEven(deUint32 val,const deUint32 numBitsToDiscard)109 static deUint32 roundToNearestEven (deUint32 val, const deUint32 numBitsToDiscard)
110 {
111 const deUint32 lastBits = val & ((1 << numBitsToDiscard) - 1);
112 const deUint32 headBit = val & (1 << (numBitsToDiscard - 1));
113
114 DE_ASSERT(numBitsToDiscard > 0 && numBitsToDiscard < 32); /* Make sure no overflow. */
115 val >>= numBitsToDiscard;
116
117 if (headBit == 0)
118 {
119 return val;
120 }
121 else if (headBit == lastBits)
122 {
123 if ((val & 0x1) == 0x1)
124 {
125 return val + 1;
126 }
127 else
128 {
129 return val;
130 }
131 }
132 else
133 {
134 return val + 1;
135 }
136 }
137
deFloat32To16Round(float val32,deRoundingMode mode)138 deFloat16 deFloat32To16Round (float val32, deRoundingMode mode)
139 {
140 union
141 {
142 float f; /* Interpret as 32-bit float */
143 deUint32 u; /* Interpret as 32-bit unsigned integer */
144 } x;
145 deUint32 sign; /* sign : 0000 0000 0000 0000 X000 0000 0000 0000 */
146 deUint32 exp32; /* exp32: biased exponent for 32-bit floats */
147 int exp16; /* exp16: biased exponent for 16-bit floats */
148 deUint32 mantissa;
149
150 /* We only support these two rounding modes for now */
151 DE_ASSERT(mode == DE_ROUNDINGMODE_TO_ZERO || mode == DE_ROUNDINGMODE_TO_NEAREST_EVEN);
152
153 x.f = val32;
154 sign = (x.u >> 16u) & 0x00008000u;
155 exp32 = (x.u >> 23u) & 0x000000ffu;
156 exp16 = (int) (exp32) - 127 + 15; /* 15/127: exponent bias for 16-bit/32-bit floats */
157 mantissa = x.u & 0x007fffffu;
158
159 /* Case: zero and denormalized floats */
160 if (exp32 == 0)
161 {
162 /* Denormalized floats are < 2^(1-127), not representable in 16-bit floats, rounding to zero. */
163 return (deFloat16) sign;
164 }
165 /* Case: Inf and NaN */
166 else if (exp32 == 0x000000ffu)
167 {
168 if (mantissa == 0u)
169 {
170 /* Inf */
171 return (deFloat16) (sign | 0x7c00u);
172 }
173 else
174 {
175 /* NaN */
176 mantissa >>= 13u; /* 16-bit floats has 10-bit for mantissa, 13-bit less than 32-bit floats. */
177 /* Make sure we don't turn NaN into zero by | (mantissa == 0). */
178 return (deFloat16) (sign | 0x7c00u | mantissa | (mantissa == 0u));
179 }
180 }
181 /* The following are cases for normalized floats.
182 *
183 * * If exp16 is less than 0, we are experiencing underflow for the exponent. To encode this underflowed exponent,
184 * we can only shift the mantissa further right.
185 * The real exponent is exp16 - 15. A denormalized 16-bit float can represent -14 via its exponent.
186 * Note that the most significant bit in the mantissa of a denormalized float is already -1 as for exponent.
187 * So, we just need to right shift the mantissa -exp16 bits.
188 * * If exp16 is 0, mantissa shifting requirement is similar to the above.
189 * * If exp16 is greater than 30 (0b11110), we are experiencing overflow for the exponent of 16-bit normalized floats.
190 */
191 /* Case: normalized floats -> zero */
192 else if (exp16 < -10)
193 {
194 /* 16-bit floats have only 10 bits for mantissa. Minimal 16-bit denormalized float is (2^-10) * (2^-14). */
195 /* Expecting a number < (2^-10) * (2^-14) here, not representable, round to zero. */
196 return (deFloat16) sign;
197 }
198 /* Case: normalized floats -> zero and denormalized halfs */
199 else if (exp16 <= 0)
200 {
201 /* Add the implicit leading 1 in mormalized float to mantissa. */
202 mantissa |= 0x00800000u;
203 /* We have a (23 + 1)-bit mantissa, but 16-bit floats only expect 10-bit mantissa.
204 * Need to discard the last 14-bits considering rounding mode.
205 * We also need to shift right -exp16 bits to encode the underflowed exponent.
206 */
207 if (mode == DE_ROUNDINGMODE_TO_ZERO)
208 {
209 mantissa >>= (14 - exp16);
210 }
211 else
212 {
213 /* mantissa in the above may exceed 10-bits, in which case overflow happens.
214 * The overflowed bit is automatically carried to exponent then.
215 */
216 mantissa = roundToNearestEven(mantissa, 14 - exp16);
217 }
218 return (deFloat16) (sign | mantissa);
219 }
220 /* Case: normalized floats -> normalized floats */
221 else if (exp16 <= 30)
222 {
223 if (mode == DE_ROUNDINGMODE_TO_ZERO)
224 {
225 return (deFloat16) (sign | ((deUint32)exp16 << 10u) | (mantissa >> 13u));
226 }
227 else
228 {
229 mantissa = roundToNearestEven(mantissa, 13);
230 /* Handle overflow. exp16 may overflow (and become Inf) itself, but that's correct. */
231 exp16 = (exp16 << 10u) + (mantissa & (1 << 10));
232 mantissa &= (1u << 10) - 1;
233 return (deFloat16) (sign | ((deUint32) exp16) | mantissa);
234 }
235 }
236 /* Case: normalized floats (too large to be representable as 16-bit floats) */
237 else
238 {
239 /* According to IEEE Std 754-2008 Section 7.4,
240 * * roundTiesToEven and roundTiesToAway carry all overflows to Inf with the sign
241 * of the intermediate result.
242 * * roundTowardZero carries all overflows to the format’s largest finite number
243 * with the sign of the intermediate result.
244 */
245 if (mode == DE_ROUNDINGMODE_TO_ZERO)
246 {
247 return (deFloat16) (sign | 0x7bffu); /* 111 1011 1111 1111 */
248 }
249 else
250 {
251 return (deFloat16) (sign | (0x1f << 10));
252 }
253 }
254
255 /* Make compiler happy */
256 return (deFloat16) 0;
257 }
258
deFloat16To32(deFloat16 val16)259 float deFloat16To32 (deFloat16 val16)
260 {
261 deUint32 sign;
262 deUint32 expotent;
263 deUint32 mantissa;
264 union
265 {
266 float f;
267 deUint32 u;
268 } x;
269
270 x.u = 0u;
271
272 sign = ((deUint32)val16 >> 15u) & 0x00000001u;
273 expotent = ((deUint32)val16 >> 10u) & 0x0000001fu;
274 mantissa = (deUint32)val16 & 0x000003ffu;
275
276 if (expotent == 0u)
277 {
278 if (mantissa == 0u)
279 {
280 /* +/- 0 */
281 x.u = sign << 31u;
282 return x.f;
283 }
284 else
285 {
286 /* Denormalized, normalize it. */
287
288 while (!(mantissa & 0x00000400u))
289 {
290 mantissa <<= 1u;
291 expotent -= 1u;
292 }
293
294 expotent += 1u;
295 mantissa &= ~0x00000400u;
296 }
297 }
298 else if (expotent == 31u)
299 {
300 if (mantissa == 0u)
301 {
302 /* +/- InF */
303 x.u = (sign << 31u) | 0x7f800000u;
304 return x.f;
305 }
306 else
307 {
308 /* +/- NaN */
309 x.u = (sign << 31u) | 0x7f800000u | (mantissa << 13u);
310 return x.f;
311 }
312 }
313
314 expotent = expotent + (127u - 15u);
315 mantissa = mantissa << 13u;
316
317 x.u = (sign << 31u) | (expotent << 23u) | mantissa;
318 return x.f;
319 }
320
321 DE_END_EXTERN_C
322