• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.util;
18 
19 /**
20  * <p>The {@code FP16} class is a wrapper and a utility class to manipulate half-precision 16-bit
21  * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
22  * floating point data types (also called fp16 or binary16). A half-precision float can be
23  * created from or converted to single-precision floats, and is stored in a short data type.
24  *
25  * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p>
26  * <ul>
27  * <li>Sign bit: 1 bit</li>
28  * <li>Exponent width: 5 bits</li>
29  * <li>Significand: 10 bits</li>
30  * </ul>
31  *
32  * <p>The format is laid out as follows:</p>
33  * <pre>
34  * 1   11111   1111111111
35  * ^   --^--   -----^----
36  * sign  |          |_______ significand
37  *       |
38  *       -- exponent
39  * </pre>
40  *
41  * <p>Half-precision floating points can be useful to save memory and/or
42  * bandwidth at the expense of range and precision when compared to single-precision
43  * floating points (fp32).</p>
44  * <p>To help you decide whether fp16 is the right storage type for you need, please
45  * refer to the table below that shows the available precision throughout the range of
46  * possible values. The <em>precision</em> column indicates the step size between two
47  * consecutive numbers in a specific part of the range.</p>
48  *
49  * <table summary="Precision of fp16 across the range">
50  *     <tr><th>Range start</th><th>Precision</th></tr>
51  *     <tr><td>0</td><td>1 &frasl; 16,777,216</td></tr>
52  *     <tr><td>1 &frasl; 16,384</td><td>1 &frasl; 16,777,216</td></tr>
53  *     <tr><td>1 &frasl; 8,192</td><td>1 &frasl; 8,388,608</td></tr>
54  *     <tr><td>1 &frasl; 4,096</td><td>1 &frasl; 4,194,304</td></tr>
55  *     <tr><td>1 &frasl; 2,048</td><td>1 &frasl; 2,097,152</td></tr>
56  *     <tr><td>1 &frasl; 1,024</td><td>1 &frasl; 1,048,576</td></tr>
57  *     <tr><td>1 &frasl; 512</td><td>1 &frasl; 524,288</td></tr>
58  *     <tr><td>1 &frasl; 256</td><td>1 &frasl; 262,144</td></tr>
59  *     <tr><td>1 &frasl; 128</td><td>1 &frasl; 131,072</td></tr>
60  *     <tr><td>1 &frasl; 64</td><td>1 &frasl; 65,536</td></tr>
61  *     <tr><td>1 &frasl; 32</td><td>1 &frasl; 32,768</td></tr>
62  *     <tr><td>1 &frasl; 16</td><td>1 &frasl; 16,384</td></tr>
63  *     <tr><td>1 &frasl; 8</td><td>1 &frasl; 8,192</td></tr>
64  *     <tr><td>1 &frasl; 4</td><td>1 &frasl; 4,096</td></tr>
65  *     <tr><td>1 &frasl; 2</td><td>1 &frasl; 2,048</td></tr>
66  *     <tr><td>1</td><td>1 &frasl; 1,024</td></tr>
67  *     <tr><td>2</td><td>1 &frasl; 512</td></tr>
68  *     <tr><td>4</td><td>1 &frasl; 256</td></tr>
69  *     <tr><td>8</td><td>1 &frasl; 128</td></tr>
70  *     <tr><td>16</td><td>1 &frasl; 64</td></tr>
71  *     <tr><td>32</td><td>1 &frasl; 32</td></tr>
72  *     <tr><td>64</td><td>1 &frasl; 16</td></tr>
73  *     <tr><td>128</td><td>1 &frasl; 8</td></tr>
74  *     <tr><td>256</td><td>1 &frasl; 4</td></tr>
75  *     <tr><td>512</td><td>1 &frasl; 2</td></tr>
76  *     <tr><td>1,024</td><td>1</td></tr>
77  *     <tr><td>2,048</td><td>2</td></tr>
78  *     <tr><td>4,096</td><td>4</td></tr>
79  *     <tr><td>8,192</td><td>8</td></tr>
80  *     <tr><td>16,384</td><td>16</td></tr>
81  *     <tr><td>32,768</td><td>32</td></tr>
82  * </table>
83  *
84  * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p>
85  *
86  * @hide
87  */
88 
89 public final class FP16 {
90     /**
91      * The number of bits used to represent a half-precision float value.
92      *
93      * @hide
94      */
95     public static final int SIZE = 16;
96 
97     /**
98      * Epsilon is the difference between 1.0 and the next value representable
99      * by a half-precision floating-point.
100      *
101      * @hide
102      */
103     public static final short EPSILON = (short) 0x1400;
104 
105     /**
106      * Maximum exponent a finite half-precision float may have.
107      *
108      * @hide
109      */
110     public static final int MAX_EXPONENT = 15;
111     /**
112      * Minimum exponent a normalized half-precision float may have.
113      *
114      * @hide
115      */
116     public static final int MIN_EXPONENT = -14;
117 
118     /**
119      * Smallest negative value a half-precision float may have.
120      *
121      * @hide
122      */
123     public static final short LOWEST_VALUE = (short) 0xfbff;
124     /**
125      * Maximum positive finite value a half-precision float may have.
126      *
127      * @hide
128      */
129     public static final short MAX_VALUE = (short) 0x7bff;
130     /**
131      * Smallest positive normal value a half-precision float may have.
132      *
133      * @hide
134      */
135     public static final short MIN_NORMAL = (short) 0x0400;
136     /**
137      * Smallest positive non-zero value a half-precision float may have.
138      *
139      * @hide
140      */
141     public static final short MIN_VALUE = (short) 0x0001;
142     /**
143      * A Not-a-Number representation of a half-precision float.
144      *
145      * @hide
146      */
147     public static final short NaN = (short) 0x7e00;
148     /**
149      * Negative infinity of type half-precision float.
150      *
151      * @hide
152      */
153     public static final short NEGATIVE_INFINITY = (short) 0xfc00;
154     /**
155      * Negative 0 of type half-precision float.
156      *
157      * @hide
158      */
159     public static final short NEGATIVE_ZERO = (short) 0x8000;
160     /**
161      * Positive infinity of type half-precision float.
162      *
163      * @hide
164      */
165     public static final short POSITIVE_INFINITY = (short) 0x7c00;
166     /**
167      * Positive 0 of type half-precision float.
168      *
169      * @hide
170      */
171     public static final short POSITIVE_ZERO = (short) 0x0000;
172 
173     /**
174      * The offset to shift by to obtain the sign bit.
175      *
176      * @hide
177      */
178     public static final int SIGN_SHIFT                = 15;
179 
180     /**
181      * The offset to shift by to obtain the exponent bits.
182      *
183      * @hide
184      */
185     public static final int EXPONENT_SHIFT            = 10;
186 
187     /**
188      * The bitmask to AND a number with to obtain the sign bit.
189      *
190      * @hide
191      */
192     public static final int SIGN_MASK                 = 0x8000;
193 
194     /**
195      * The bitmask to AND a number shifted by {@link #EXPONENT_SHIFT} right, to obtain exponent bits.
196      *
197      * @hide
198      */
199     public static final int SHIFTED_EXPONENT_MASK     = 0x1f;
200 
201     /**
202      * The bitmask to AND a number with to obtain significand bits.
203      *
204      * @hide
205      */
206     public static final int SIGNIFICAND_MASK          = 0x3ff;
207 
208     /**
209      * The bitmask to AND with to obtain exponent and significand bits.
210      *
211      * @hide
212      */
213     public static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff;
214 
215     /**
216      * The offset of the exponent from the actual value.
217      *
218      * @hide
219      */
220     public static final int EXPONENT_BIAS             = 15;
221 
222     private static final int FP32_SIGN_SHIFT            = 31;
223     private static final int FP32_EXPONENT_SHIFT        = 23;
224     private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff;
225     private static final int FP32_SIGNIFICAND_MASK      = 0x7fffff;
226     private static final int FP32_EXPONENT_BIAS         = 127;
227     private static final int FP32_QNAN_MASK             = 0x400000;
228     private static final int FP32_DENORMAL_MAGIC = 126 << 23;
229     private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
230 
231     /** Hidden constructor to prevent instantiation. */
FP16()232     private FP16() {}
233 
234     /**
235      * <p>Compares the two specified half-precision float values. The following
236      * conditions apply during the comparison:</p>
237      *
238      * <ul>
239      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
240      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
241      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
242      * {@link #NEGATIVE_ZERO}.</li>
243      * </ul>
244      *
245      * @param x The first half-precision float value to compare.
246      * @param y The second half-precision float value to compare
247      *
248      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
249      *          value less than {@code 0} if {@code x} is numerically less than {@code y},
250      *          and a value greater than {@code 0} if {@code x} is numerically greater
251      *          than {@code y}
252      *
253      * @hide
254      */
compare(short x, short y)255     public static int compare(short x, short y) {
256         if (less(x, y)) return -1;
257         if (greater(x, y)) return 1;
258 
259         // Collapse NaNs, akin to halfToIntBits(), but we want to keep
260         // (signed) short value types to preserve the ordering of -0.0
261         // and +0.0
262         short xBits = isNaN(x) ? NaN : x;
263         short yBits = isNaN(y) ? NaN : y;
264 
265         return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
266     }
267 
268     /**
269      * Returns the closest integral half-precision float value to the specified
270      * half-precision float value. Special values are handled in the
271      * following ways:
272      * <ul>
273      * <li>If the specified half-precision float is NaN, the result is NaN</li>
274      * <li>If the specified half-precision float is infinity (negative or positive),
275      * the result is infinity (with the same sign)</li>
276      * <li>If the specified half-precision float is zero (negative or positive),
277      * the result is zero (with the same sign)</li>
278      * </ul>
279      *
280      * @param h A half-precision float value
281      * @return The value of the specified half-precision float rounded to the nearest
282      *         half-precision float value
283      *
284      * @hide
285      */
rint(short h)286     public static short rint(short h) {
287         int bits = h & 0xffff;
288         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
289         int result = bits;
290 
291         if (abs < 0x3c00) {
292             result &= SIGN_MASK;
293             if (abs > 0x3800){
294                 result |= 0x3c00;
295             }
296         } else if (abs < 0x6400) {
297             int exp = 25 - (abs >> 10);
298             int mask = (1 << exp) - 1;
299             result += ((1 << (exp - 1)) - (~(abs >> exp) & 1));
300             result &= ~mask;
301         }
302         if (isNaN((short) result)) {
303             // if result is NaN mask with qNaN
304             // (i.e. mask the most significant mantissa bit with 1)
305             // to comply with hardware implementations (ARM64, Intel, etc).
306             result |= NaN;
307         }
308 
309         return (short) result;
310     }
311 
312     /**
313      * Returns the smallest half-precision float value toward negative infinity
314      * greater than or equal to the specified half-precision float value.
315      * Special values are handled in the following ways:
316      * <ul>
317      * <li>If the specified half-precision float is NaN, the result is NaN</li>
318      * <li>If the specified half-precision float is infinity (negative or positive),
319      * the result is infinity (with the same sign)</li>
320      * <li>If the specified half-precision float is zero (negative or positive),
321      * the result is zero (with the same sign)</li>
322      * </ul>
323      *
324      * @param h A half-precision float value
325      * @return The smallest half-precision float value toward negative infinity
326      *         greater than or equal to the specified half-precision float value
327      *
328      * @hide
329      */
ceil(short h)330     public static short ceil(short h) {
331         int bits = h & 0xffff;
332         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
333         int result = bits;
334 
335         if (abs < 0x3c00) {
336             result &= SIGN_MASK;
337             result |= 0x3c00 & -(~(bits >> 15) & (abs != 0 ? 1 : 0));
338         } else if (abs < 0x6400) {
339             abs = 25 - (abs >> 10);
340             int mask = (1 << abs) - 1;
341             result += mask & ((bits >> 15) - 1);
342             result &= ~mask;
343         }
344         if (isNaN((short) result)) {
345             // if result is NaN mask with qNaN
346             // (i.e. mask the most significant mantissa bit with 1)
347             // to comply with hardware implementations (ARM64, Intel, etc).
348             result |= NaN;
349         }
350 
351         return (short) result;
352     }
353 
354     /**
355      * Returns the largest half-precision float value toward positive infinity
356      * less than or equal to the specified half-precision float value.
357      * Special values are handled in the following ways:
358      * <ul>
359      * <li>If the specified half-precision float is NaN, the result is NaN</li>
360      * <li>If the specified half-precision float is infinity (negative or positive),
361      * the result is infinity (with the same sign)</li>
362      * <li>If the specified half-precision float is zero (negative or positive),
363      * the result is zero (with the same sign)</li>
364      * </ul>
365      *
366      * @param h A half-precision float value
367      * @return The largest half-precision float value toward positive infinity
368      *         less than or equal to the specified half-precision float value
369      *
370      * @hide
371      */
floor(short h)372     public static short floor(short h) {
373         int bits = h & 0xffff;
374         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
375         int result = bits;
376 
377         if (abs < 0x3c00) {
378             result &= SIGN_MASK;
379             result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0);
380         } else if (abs < 0x6400) {
381             abs = 25 - (abs >> 10);
382             int mask = (1 << abs) - 1;
383             result += mask & -(bits >> 15);
384             result &= ~mask;
385         }
386         if (isNaN((short) result)) {
387             // if result is NaN mask with qNaN
388             // i.e. (Mask the most significant mantissa bit with 1)
389             result |= NaN;
390         }
391 
392         return (short) result;
393     }
394 
395     /**
396      * Returns the truncated half-precision float value of the specified
397      * half-precision float value. Special values are handled in the following ways:
398      * <ul>
399      * <li>If the specified half-precision float is NaN, the result is NaN</li>
400      * <li>If the specified half-precision float is infinity (negative or positive),
401      * the result is infinity (with the same sign)</li>
402      * <li>If the specified half-precision float is zero (negative or positive),
403      * the result is zero (with the same sign)</li>
404      * </ul>
405      *
406      * @param h A half-precision float value
407      * @return The truncated half-precision float value of the specified
408      *         half-precision float value
409      *
410      * @hide
411      */
trunc(short h)412     public static short trunc(short h) {
413         int bits = h & 0xffff;
414         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
415         int result = bits;
416 
417         if (abs < 0x3c00) {
418             result &= SIGN_MASK;
419         } else if (abs < 0x6400) {
420             abs = 25 - (abs >> 10);
421             int mask = (1 << abs) - 1;
422             result &= ~mask;
423         }
424 
425         return (short) result;
426     }
427 
428     /**
429      * Returns the smaller of two half-precision float values (the value closest
430      * to negative infinity). Special values are handled in the following ways:
431      * <ul>
432      * <li>If either value is NaN, the result is NaN</li>
433      * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li>
434      * </ul>
435      *
436      * @param x The first half-precision value
437      * @param y The second half-precision value
438      * @return The smaller of the two specified half-precision values
439      *
440      * @hide
441      */
min(short x, short y)442     public static short min(short x, short y) {
443         if (isNaN(x)) return NaN;
444         if (isNaN(y)) return NaN;
445 
446         if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) {
447             return (x & SIGN_MASK) != 0 ? x : y;
448         }
449 
450         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
451                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
452     }
453 
454     /**
455      * Returns the larger of two half-precision float values (the value closest
456      * to positive infinity). Special values are handled in the following ways:
457      * <ul>
458      * <li>If either value is NaN, the result is NaN</li>
459      * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li>
460      * </ul>
461      *
462      * @param x The first half-precision value
463      * @param y The second half-precision value
464      *
465      * @return The larger of the two specified half-precision values
466      *
467      * @hide
468      */
max(short x, short y)469     public static short max(short x, short y) {
470         if (isNaN(x)) return NaN;
471         if (isNaN(y)) return NaN;
472 
473         if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) {
474             return (x & SIGN_MASK) != 0 ? y : x;
475         }
476 
477         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
478                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
479     }
480 
481     /**
482      * Returns true if the first half-precision float value is less (smaller
483      * toward negative infinity) than the second half-precision float value.
484      * If either of the values is NaN, the result is false.
485      *
486      * @param x The first half-precision value
487      * @param y The second half-precision value
488      *
489      * @return True if x is less than y, false otherwise
490      *
491      * @hide
492      */
less(short x, short y)493     public static boolean less(short x, short y) {
494         if (isNaN(x)) return false;
495         if (isNaN(y)) return false;
496 
497         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
498                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
499     }
500 
501     /**
502      * Returns true if the first half-precision float value is less (smaller
503      * toward negative infinity) than or equal to the second half-precision
504      * float value. If either of the values is NaN, the result is false.
505      *
506      * @param x The first half-precision value
507      * @param y The second half-precision value
508      *
509      * @return True if x is less than or equal to y, false otherwise
510      *
511      * @hide
512      */
lessEquals(short x, short y)513     public static boolean lessEquals(short x, short y) {
514         if (isNaN(x)) return false;
515         if (isNaN(y)) return false;
516 
517         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <=
518                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
519     }
520 
521     /**
522      * Returns true if the first half-precision float value is greater (larger
523      * toward positive infinity) than the second half-precision float value.
524      * If either of the values is NaN, the result is false.
525      *
526      * @param x The first half-precision value
527      * @param y The second half-precision value
528      *
529      * @return True if x is greater than y, false otherwise
530      *
531      * @hide
532      */
greater(short x, short y)533     public static boolean greater(short x, short y) {
534         if (isNaN(x)) return false;
535         if (isNaN(y)) return false;
536 
537         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
538                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
539     }
540 
541     /**
542      * Returns true if the first half-precision float value is greater (larger
543      * toward positive infinity) than or equal to the second half-precision float
544      * value. If either of the values is NaN, the result is false.
545      *
546      * @param x The first half-precision value
547      * @param y The second half-precision value
548      *
549      * @return True if x is greater than y, false otherwise
550      *
551      * @hide
552      */
greaterEquals(short x, short y)553     public static boolean greaterEquals(short x, short y) {
554         if (isNaN(x)) return false;
555         if (isNaN(y)) return false;
556 
557         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >=
558                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
559     }
560 
561     /**
562      * Returns true if the two half-precision float values are equal.
563      * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO}
564      * and {@link #NEGATIVE_ZERO} are considered equal.
565      *
566      * @param x The first half-precision value
567      * @param y The second half-precision value
568      *
569      * @return True if x is equal to y, false otherwise
570      *
571      * @hide
572      */
equals(short x, short y)573     public static boolean equals(short x, short y) {
574         if (isNaN(x)) return false;
575         if (isNaN(y)) return false;
576 
577         return x == y || ((x | y) & EXPONENT_SIGNIFICAND_MASK) == 0;
578     }
579 
580     /**
581      * Returns true if the specified half-precision float value represents
582      * infinity, false otherwise.
583      *
584      * @param h A half-precision float value
585      * @return True if the value is positive infinity or negative infinity,
586      *         false otherwise
587      *
588      * @hide
589      */
isInfinite(short h)590     public static boolean isInfinite(short h) {
591         return (h & EXPONENT_SIGNIFICAND_MASK) == POSITIVE_INFINITY;
592     }
593 
594     /**
595      * Returns true if the specified half-precision float value represents
596      * a Not-a-Number, false otherwise.
597      *
598      * @param h A half-precision float value
599      * @return True if the value is a NaN, false otherwise
600      *
601      * @hide
602      */
isNaN(short h)603     public static boolean isNaN(short h) {
604         return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY;
605     }
606 
607     /**
608      * Returns true if the specified half-precision float value is normalized
609      * (does not have a subnormal representation). If the specified value is
610      * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY},
611      * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal
612      * number, this method returns false.
613      *
614      * @param h A half-precision float value
615      * @return True if the value is normalized, false otherwise
616      *
617      * @hide
618      */
isNormalized(short h)619     public static boolean isNormalized(short h) {
620         return (h & POSITIVE_INFINITY) != 0 && (h & POSITIVE_INFINITY) != POSITIVE_INFINITY;
621     }
622 
623     /**
624      * <p>Converts the specified half-precision float value into a
625      * single-precision float value. The following special cases are handled:</p>
626      * <ul>
627      * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li>
628      * <li>If the input is {@link #POSITIVE_INFINITY} or
629      * {@link #NEGATIVE_INFINITY}, the returned value is respectively
630      * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li>
631      * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li>
632      * <li>Otherwise, the returned value is a normalized single-precision float value</li>
633      * </ul>
634      *
635      * @param h The half-precision float value to convert to single-precision
636      * @return A normalized single-precision float value
637      *
638      * @hide
639      */
toFloat(short h)640     public static float toFloat(short h) {
641         int bits = h & 0xffff;
642         int s = bits & SIGN_MASK;
643         int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
644         int m = (bits                        ) & SIGNIFICAND_MASK;
645 
646         int outE = 0;
647         int outM = 0;
648 
649         if (e == 0) { // Denormal or 0
650             if (m != 0) {
651                 // Convert denorm fp16 into normalized fp32
652                 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
653                 o -= FP32_DENORMAL_FLOAT;
654                 return s == 0 ? o : -o;
655             }
656         } else {
657             outM = m << 13;
658             if (e == 0x1f) { // Infinite or NaN
659                 outE = 0xff;
660                 if (outM != 0) { // SNaNs are quieted
661                     outM |= FP32_QNAN_MASK;
662                 }
663             } else {
664                 outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS;
665             }
666         }
667 
668         int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
669         return Float.intBitsToFloat(out);
670     }
671 
672     /**
673      * <p>Converts the specified single-precision float value into a
674      * half-precision float value. The following special cases are handled:</p>
675      * <ul>
676      * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned
677      * value is {@link #NaN}</li>
678      * <li>If the input is {@link Float#POSITIVE_INFINITY} or
679      * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively
680      * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li>
681      * <li>If the input is 0 (positive or negative), the returned value is
682      * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
683      * <li>If the input is a less than {@link #MIN_VALUE}, the returned value
684      * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
685      * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value
686      * is a denorm half-precision float</li>
687      * <li>Otherwise, the returned value is rounded to the nearest
688      * representable half-precision float value</li>
689      * </ul>
690      *
691      * @param f The single-precision float value to convert to half-precision
692      * @return A half-precision float value
693      *
694      * @hide
695      */
toHalf(float f)696     public static short toHalf(float f) {
697         int bits = Float.floatToRawIntBits(f);
698         int s = (bits >>> FP32_SIGN_SHIFT    );
699         int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK;
700         int m = (bits                        ) & FP32_SIGNIFICAND_MASK;
701 
702         int outE = 0;
703         int outM = 0;
704 
705         if (e == 0xff) { // Infinite or NaN
706             outE = 0x1f;
707             outM = m != 0 ? 0x200 : 0;
708         } else {
709             e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS;
710             if (e >= 0x1f) { // Overflow
711                 outE = 0x1f;
712             } else if (e <= 0) { // Underflow
713                 if (e < -10) {
714                     // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
715                 } else {
716                     // The fp32 value is a normalized float less than MIN_NORMAL,
717                     // we convert to a denorm fp16
718                     m = m | 0x800000;
719                     int shift = 14 - e;
720                     outM = m >> shift;
721 
722                     int lowm = m & ((1 << shift) - 1);
723                     int hway = 1 << (shift - 1);
724                     // if above halfway or exactly halfway and outM is odd
725                     if (lowm + (outM & 1) > hway){
726                         // Round to nearest even
727                         // Can overflow into exponent bit, which surprisingly is OK.
728                         // This increment relies on the +outM in the return statement below
729                         outM++;
730                     }
731                 }
732             } else {
733                 outE = e;
734                 outM = m >> 13;
735                 // if above halfway or exactly halfway and outM is odd
736                 if ((m & 0x1fff) + (outM & 0x1) > 0x1000) {
737                     // Round to nearest even
738                     // Can overflow into exponent bit, which surprisingly is OK.
739                     // This increment relies on the +outM in the return statement below
740                     outM++;
741                 }
742             }
743         }
744         // The outM is added here as the +1 increments for outM above can
745         // cause an overflow in the exponent bit which is OK.
746         return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM);
747     }
748 
749     /**
750      * <p>Returns a hexadecimal string representation of the specified half-precision
751      * float value. If the value is a NaN, the result is <code>"NaN"</code>,
752      * otherwise the result follows this format:</p>
753      * <ul>
754      * <li>If the sign is positive, no sign character appears in the result</li>
755      * <li>If the sign is negative, the first character is <code>'-'</code></li>
756      * <li>If the value is inifinity, the string is <code>"Infinity"</code></li>
757      * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li>
758      * <li>If the value has a normalized representation, the exponent and
759      * significand are represented in the string in two fields. The significand
760      * starts with <code>"0x1."</code> followed by its lowercase hexadecimal
761      * representation. Trailing zeroes are removed unless all digits are 0, then
762      * a single zero is used. The significand representation is followed by the
763      * exponent, represented by <code>"p"</code>, itself followed by a decimal
764      * string of the unbiased exponent</li>
765      * <li>If the value has a subnormal representation, the significand starts
766      * with <code>"0x0."</code> followed by its lowercase hexadecimal
767      * representation. Trailing zeroes are removed unless all digits are 0, then
768      * a single zero is used. The significand representation is followed by the
769      * exponent, represented by <code>"p-14"</code></li>
770      * </ul>
771      *
772      * @param h A half-precision float value
773      * @return A hexadecimal string representation of the specified value
774      *
775      * @hide
776      */
toHexString(short h)777     public static String toHexString(short h) {
778         StringBuilder o = new StringBuilder();
779 
780         int bits = h & 0xffff;
781         int s = (bits >>> SIGN_SHIFT    );
782         int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
783         int m = (bits                   ) & SIGNIFICAND_MASK;
784 
785         if (e == 0x1f) { // Infinite or NaN
786             if (m == 0) {
787                 if (s != 0) o.append('-');
788                 o.append("Infinity");
789             } else {
790                 o.append("NaN");
791             }
792         } else {
793             if (s == 1) o.append('-');
794             if (e == 0) {
795                 if (m == 0) {
796                     o.append("0x0.0p0");
797                 } else {
798                     o.append("0x0.");
799                     String significand = Integer.toHexString(m);
800                     o.append(significand.replaceFirst("0{2,}$", ""));
801                     o.append("p-14");
802                 }
803             } else {
804                 o.append("0x1.");
805                 String significand = Integer.toHexString(m);
806                 o.append(significand.replaceFirst("0{2,}$", ""));
807                 o.append('p');
808                 o.append(Integer.toString(e - EXPONENT_BIAS));
809             }
810         }
811 
812         return o.toString();
813     }
814 }
815