• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.util;
18 
19 import android.annotation.HalfFloat;
20 import android.annotation.NonNull;
21 import android.annotation.Nullable;
22 
23 import sun.misc.FloatingDecimal;
24 
25 /**
26  * <p>The {@code Half} class is a wrapper and a utility class to manipulate half-precision 16-bit
27  * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
28  * floating point data types (also called fp16 or binary16). A half-precision float can be
29  * created from or converted to single-precision floats, and is stored in a short data type.
30  * To distinguish short values holding half-precision floats from regular short values,
31  * it is recommended to use the <code>@HalfFloat</code> annotation.</p>
32  *
33  * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p>
34  * <ul>
35  * <li>Sign bit: 1 bit</li>
36  * <li>Exponent width: 5 bits</li>
37  * <li>Significand: 10 bits</li>
38  * </ul>
39  *
40  * <p>The format is laid out as follows:</p>
41  * <pre>
42  * 1   11111   1111111111
43  * ^   --^--   -----^----
44  * sign  |          |_______ significand
45  *       |
46  *       -- exponent
47  * </pre>
48  *
49  * <p>Half-precision floating points can be useful to save memory and/or
50  * bandwidth at the expense of range and precision when compared to single-precision
51  * floating points (fp32).</p>
52  * <p>To help you decide whether fp16 is the right storage type for you need, please
53  * refer to the table below that shows the available precision throughout the range of
54  * possible values. The <em>precision</em> column indicates the step size between two
55  * consecutive numbers in a specific part of the range.</p>
56  *
57  * <table summary="Precision of fp16 across the range">
58  *     <tr><th>Range start</th><th>Precision</th></tr>
59  *     <tr><td>0</td><td>1 &frasl; 16,777,216</td></tr>
60  *     <tr><td>1 &frasl; 16,384</td><td>1 &frasl; 16,777,216</td></tr>
61  *     <tr><td>1 &frasl; 8,192</td><td>1 &frasl; 8,388,608</td></tr>
62  *     <tr><td>1 &frasl; 4,096</td><td>1 &frasl; 4,194,304</td></tr>
63  *     <tr><td>1 &frasl; 2,048</td><td>1 &frasl; 2,097,152</td></tr>
64  *     <tr><td>1 &frasl; 1,024</td><td>1 &frasl; 1,048,576</td></tr>
65  *     <tr><td>1 &frasl; 512</td><td>1 &frasl; 524,288</td></tr>
66  *     <tr><td>1 &frasl; 256</td><td>1 &frasl; 262,144</td></tr>
67  *     <tr><td>1 &frasl; 128</td><td>1 &frasl; 131,072</td></tr>
68  *     <tr><td>1 &frasl; 64</td><td>1 &frasl; 65,536</td></tr>
69  *     <tr><td>1 &frasl; 32</td><td>1 &frasl; 32,768</td></tr>
70  *     <tr><td>1 &frasl; 16</td><td>1 &frasl; 16,384</td></tr>
71  *     <tr><td>1 &frasl; 8</td><td>1 &frasl; 8,192</td></tr>
72  *     <tr><td>1 &frasl; 4</td><td>1 &frasl; 4,096</td></tr>
73  *     <tr><td>1 &frasl; 2</td><td>1 &frasl; 2,048</td></tr>
74  *     <tr><td>1</td><td>1 &frasl; 1,024</td></tr>
75  *     <tr><td>2</td><td>1 &frasl; 512</td></tr>
76  *     <tr><td>4</td><td>1 &frasl; 256</td></tr>
77  *     <tr><td>8</td><td>1 &frasl; 128</td></tr>
78  *     <tr><td>16</td><td>1 &frasl; 64</td></tr>
79  *     <tr><td>32</td><td>1 &frasl; 32</td></tr>
80  *     <tr><td>64</td><td>1 &frasl; 16</td></tr>
81  *     <tr><td>128</td><td>1 &frasl; 8</td></tr>
82  *     <tr><td>256</td><td>1 &frasl; 4</td></tr>
83  *     <tr><td>512</td><td>1 &frasl; 2</td></tr>
84  *     <tr><td>1,024</td><td>1</td></tr>
85  *     <tr><td>2,048</td><td>2</td></tr>
86  *     <tr><td>4,096</td><td>4</td></tr>
87  *     <tr><td>8,192</td><td>8</td></tr>
88  *     <tr><td>16,384</td><td>16</td></tr>
89  *     <tr><td>32,768</td><td>32</td></tr>
90  * </table>
91  *
92  * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p>
93  */
94 @SuppressWarnings("SimplifiableIfStatement")
95 public final class Half extends Number implements Comparable<Half> {
96     /**
97      * The number of bits used to represent a half-precision float value.
98      */
99     public static final int SIZE = 16;
100 
101     /**
102      * Epsilon is the difference between 1.0 and the next value representable
103      * by a half-precision floating-point.
104      */
105     public static final @HalfFloat short EPSILON = (short) 0x1400;
106 
107     /**
108      * Maximum exponent a finite half-precision float may have.
109      */
110     public static final int MAX_EXPONENT = 15;
111     /**
112      * Minimum exponent a normalized half-precision float may have.
113      */
114     public static final int MIN_EXPONENT = -14;
115 
116     /**
117      * Smallest negative value a half-precision float may have.
118      */
119     public static final @HalfFloat short LOWEST_VALUE = (short) 0xfbff;
120     /**
121      * Maximum positive finite value a half-precision float may have.
122      */
123     public static final @HalfFloat short MAX_VALUE = (short) 0x7bff;
124     /**
125      * Smallest positive normal value a half-precision float may have.
126      */
127     public static final @HalfFloat short MIN_NORMAL = (short) 0x0400;
128     /**
129      * Smallest positive non-zero value a half-precision float may have.
130      */
131     public static final @HalfFloat short MIN_VALUE = (short) 0x0001;
132     /**
133      * A Not-a-Number representation of a half-precision float.
134      */
135     public static final @HalfFloat short NaN = (short) 0x7e00;
136     /**
137      * Negative infinity of type half-precision float.
138      */
139     public static final @HalfFloat short NEGATIVE_INFINITY = (short) 0xfc00;
140     /**
141      * Negative 0 of type half-precision float.
142      */
143     public static final @HalfFloat short NEGATIVE_ZERO = (short) 0x8000;
144     /**
145      * Positive infinity of type half-precision float.
146      */
147     public static final @HalfFloat short POSITIVE_INFINITY = (short) 0x7c00;
148     /**
149      * Positive 0 of type half-precision float.
150      */
151     public static final @HalfFloat short POSITIVE_ZERO = (short) 0x0000;
152 
153     private static final int FP16_SIGN_SHIFT        = 15;
154     private static final int FP16_SIGN_MASK         = 0x8000;
155     private static final int FP16_EXPONENT_SHIFT    = 10;
156     private static final int FP16_EXPONENT_MASK     = 0x1f;
157     private static final int FP16_SIGNIFICAND_MASK  = 0x3ff;
158     private static final int FP16_EXPONENT_BIAS     = 15;
159     private static final int FP16_COMBINED          = 0x7fff;
160     private static final int FP16_EXPONENT_MAX      = 0x7c00;
161 
162     private static final int FP32_SIGN_SHIFT        = 31;
163     private static final int FP32_EXPONENT_SHIFT    = 23;
164     private static final int FP32_EXPONENT_MASK     = 0xff;
165     private static final int FP32_SIGNIFICAND_MASK  = 0x7fffff;
166     private static final int FP32_EXPONENT_BIAS     = 127;
167 
168     private static final int FP32_DENORMAL_MAGIC = 126 << 23;
169     private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
170 
171     private final @HalfFloat short mValue;
172 
173     /**
174      * Constructs a newly allocated {@code Half} object that represents the
175      * half-precision float type argument.
176      *
177      * @param value The value to be represented by the {@code Half}
178      */
Half(@alfFloat short value)179     public Half(@HalfFloat short value) {
180         mValue = value;
181     }
182 
183     /**
184      * Constructs a newly allocated {@code Half} object that represents the
185      * argument converted to a half-precision float.
186      *
187      * @param value The value to be represented by the {@code Half}
188      *
189      * @see #toHalf(float)
190      */
Half(float value)191     public Half(float value) {
192         mValue = toHalf(value);
193     }
194 
195     /**
196      * Constructs a newly allocated {@code Half} object that
197      * represents the argument converted to a half-precision float.
198      *
199      * @param value The value to be represented by the {@code Half}
200      *
201      * @see #toHalf(float)
202      */
Half(double value)203     public Half(double value) {
204         mValue = toHalf((float) value);
205     }
206 
207     /**
208      * <p>Constructs a newly allocated {@code Half} object that represents the
209      * half-precision float value represented by the string.
210      * The string is converted to a half-precision float value as if by the
211      * {@link #valueOf(String)} method.</p>
212      *
213      * <p>Calling this constructor is equivalent to calling:</p>
214      * <pre>
215      *     new Half(Float.parseFloat(value))
216      * </pre>
217      *
218      * @param value A string to be converted to a {@code Half}
219      * @throws NumberFormatException if the string does not contain a parsable number
220      *
221      * @see Float#valueOf(java.lang.String)
222      * @see #toHalf(float)
223      */
Half(@onNull String value)224     public Half(@NonNull String value) throws NumberFormatException {
225         mValue = toHalf(Float.parseFloat(value));
226     }
227 
228     /**
229      * Returns the half-precision value of this {@code Half} as a {@code short}
230      * containing the bit representation described in {@link Half}.
231      *
232      * @return The half-precision float value represented by this object
233      */
halfValue()234     public @HalfFloat short halfValue() {
235         return mValue;
236     }
237 
238     /**
239      * Returns the value of this {@code Half} as a {@code byte} after
240      * a narrowing primitive conversion.
241      *
242      * @return The half-precision float value represented by this object
243      *         converted to type {@code byte}
244      */
245     @Override
byteValue()246     public byte byteValue() {
247         return (byte) toFloat(mValue);
248     }
249 
250     /**
251      * Returns the value of this {@code Half} as a {@code short} after
252      * a narrowing primitive conversion.
253      *
254      * @return The half-precision float value represented by this object
255      *         converted to type {@code short}
256      */
257     @Override
shortValue()258     public short shortValue() {
259         return (short) toFloat(mValue);
260     }
261 
262     /**
263      * Returns the value of this {@code Half} as a {@code int} after
264      * a narrowing primitive conversion.
265      *
266      * @return The half-precision float value represented by this object
267      *         converted to type {@code int}
268      */
269     @Override
intValue()270     public int intValue() {
271         return (int) toFloat(mValue);
272     }
273 
274     /**
275      * Returns the value of this {@code Half} as a {@code long} after
276      * a narrowing primitive conversion.
277      *
278      * @return The half-precision float value represented by this object
279      *         converted to type {@code long}
280      */
281     @Override
longValue()282     public long longValue() {
283         return (long) toFloat(mValue);
284     }
285 
286     /**
287      * Returns the value of this {@code Half} as a {@code float} after
288      * a widening primitive conversion.
289      *
290      * @return The half-precision float value represented by this object
291      *         converted to type {@code float}
292      */
293     @Override
floatValue()294     public float floatValue() {
295         return toFloat(mValue);
296     }
297 
298     /**
299      * Returns the value of this {@code Half} as a {@code double} after
300      * a widening primitive conversion.
301      *
302      * @return The half-precision float value represented by this object
303      *         converted to type {@code double}
304      */
305     @Override
doubleValue()306     public double doubleValue() {
307         return toFloat(mValue);
308     }
309 
310     /**
311      * Returns true if this {@code Half} value represents a Not-a-Number,
312      * false otherwise.
313      *
314      * @return True if the value is a NaN, false otherwise
315      */
isNaN()316     public boolean isNaN() {
317         return isNaN(mValue);
318     }
319 
320     /**
321      * Compares this object against the specified object. The result is {@code true}
322      * if and only if the argument is not {@code null} and is a {@code Half} object
323      * that represents the same half-precision value as the this object. Two
324      * half-precision values are considered to be the same if and only if the method
325      * {@link #halfToIntBits(short)} returns an identical {@code int} value for both.
326      *
327      * @param o The object to compare
328      * @return True if the objects are the same, false otherwise
329      *
330      * @see #halfToIntBits(short)
331      */
332     @Override
equals(@ullable Object o)333     public boolean equals(@Nullable Object o) {
334         return (o instanceof Half) &&
335                 (halfToIntBits(((Half) o).mValue) == halfToIntBits(mValue));
336     }
337 
338     /**
339      * Returns a hash code for this {@code Half} object. The result is the
340      * integer bit representation, exactly as produced by the method
341      * {@link #halfToIntBits(short)}, of the primitive half-precision float
342      * value represented by this {@code Half} object.
343      *
344      * @return A hash code value for this object
345      */
346     @Override
hashCode()347     public int hashCode() {
348         return hashCode(mValue);
349     }
350 
351     /**
352      * Returns a string representation of the specified half-precision
353      * float value. See {@link #toString(short)} for more information.
354      *
355      * @return A string representation of this {@code Half} object
356      */
357     @NonNull
358     @Override
toString()359     public String toString() {
360         return toString(mValue);
361     }
362 
363     /**
364      * <p>Compares the two specified half-precision float values. The following
365      * conditions apply during the comparison:</p>
366      *
367      * <ul>
368      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
369      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
370      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
371      * {@link #NEGATIVE_ZERO}.</li>
372      * </ul>
373      *
374      * @param h The half-precision float value to compare to the half-precision value
375      *          represented by this {@code Half} object
376      *
377      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}; a
378      *          value less than {@code 0} if {@code x} is numerically less than {@code y};
379      *          and a value greater than {@code 0} if {@code x} is numerically greater
380      *          than {@code y}
381      */
382     @Override
compareTo(@onNull Half h)383     public int compareTo(@NonNull Half h) {
384         return compare(mValue, h.mValue);
385     }
386 
387     /**
388      * Returns a hash code for a half-precision float value.
389      *
390      * @param h The value to hash
391      *
392      * @return A hash code value for a half-precision float value
393      */
hashCode(@alfFloat short h)394     public static int hashCode(@HalfFloat short h) {
395         return halfToIntBits(h);
396     }
397 
398     /**
399      * <p>Compares the two specified half-precision float values. The following
400      * conditions apply during the comparison:</p>
401      *
402      * <ul>
403      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
404      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
405      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
406      * {@link #NEGATIVE_ZERO}.</li>
407      * </ul>
408      *
409      * @param x The first half-precision float value to compare.
410      * @param y The second half-precision float value to compare
411      *
412      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
413      *          value less than {@code 0} if {@code x} is numerically less than {@code y},
414      *          and a value greater than {@code 0} if {@code x} is numerically greater
415      *          than {@code y}
416      */
compare(@alfFloat short x, @HalfFloat short y)417     public static int compare(@HalfFloat short x, @HalfFloat short y) {
418         if (less(x, y)) return -1;
419         if (greater(x, y)) return 1;
420 
421         // Collapse NaNs, akin to halfToIntBits(), but we want to keep
422         // (signed) short value types to preserve the ordering of -0.0
423         // and +0.0
424         short xBits = (x & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : x;
425         short yBits = (y & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : y;
426 
427         return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
428     }
429 
430     /**
431      * <p>Returns a representation of the specified half-precision float value
432      * according to the bit layout described in {@link Half}.</p>
433      *
434      * <p>Similar to {@link #halfToIntBits(short)}, this method collapses all
435      * possible Not-a-Number values to a single canonical Not-a-Number value
436      * defined by {@link #NaN}.</p>
437      *
438      * @param h A half-precision float value
439      * @return The bits that represent the half-precision float value
440      *
441      * @see #halfToIntBits(short)
442      */
halfToShortBits(@alfFloat short h)443     public static @HalfFloat short halfToShortBits(@HalfFloat short h) {
444         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h;
445     }
446 
447     /**
448      * <p>Returns a representation of the specified half-precision float value
449      * according to the bit layout described in {@link Half}.</p>
450      *
451      * <p>Unlike {@link #halfToRawIntBits(short)}, this method collapses all
452      * possible Not-a-Number values to a single canonical Not-a-Number value
453      * defined by {@link #NaN}.</p>
454      *
455      * @param h A half-precision float value
456      * @return The bits that represent the half-precision float value
457      *
458      * @see #halfToRawIntBits(short)
459      * @see #halfToShortBits(short)
460      * @see #intBitsToHalf(int)
461      */
halfToIntBits(@alfFloat short h)462     public static int halfToIntBits(@HalfFloat short h) {
463         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h & 0xffff;
464     }
465 
466     /**
467      * <p>Returns a representation of the specified half-precision float value
468      * according to the bit layout described in {@link Half}.</p>
469      *
470      * <p>The argument is considered to be a representation of a half-precision
471      * float value according to the bit layout described in {@link Half}. The 16
472      * most significant bits of the returned value are set to 0.</p>
473      *
474      * @param h A half-precision float value
475      * @return The bits that represent the half-precision float value
476      *
477      * @see #halfToIntBits(short)
478      * @see #intBitsToHalf(int)
479      */
halfToRawIntBits(@alfFloat short h)480     public static int halfToRawIntBits(@HalfFloat short h) {
481         return h & 0xffff;
482     }
483 
484     /**
485      * <p>Returns the half-precision float value corresponding to a given
486      * bit representation.</p>
487      *
488      * <p>The argument is considered to be a representation of a half-precision
489      * float value according to the bit layout described in {@link Half}. The 16
490      * most significant bits of the argument are ignored.</p>
491      *
492      * @param bits An integer
493      * @return The half-precision float value with the same bit pattern
494      */
intBitsToHalf(int bits)495     public static @HalfFloat short intBitsToHalf(int bits) {
496         return (short) (bits & 0xffff);
497     }
498 
499     /**
500      * Returns the first parameter with the sign of the second parameter.
501      * This method treats NaNs as having a sign.
502      *
503      * @param magnitude A half-precision float value providing the magnitude of the result
504      * @param sign  A half-precision float value providing the sign of the result
505      * @return A value with the magnitude of the first parameter and the sign
506      *         of the second parameter
507      */
copySign(@alfFloat short magnitude, @HalfFloat short sign)508     public static @HalfFloat short copySign(@HalfFloat short magnitude, @HalfFloat short sign) {
509         return (short) ((sign & FP16_SIGN_MASK) | (magnitude & FP16_COMBINED));
510     }
511 
512     /**
513      * Returns the absolute value of the specified half-precision float.
514      * Special values are handled in the following ways:
515      * <ul>
516      * <li>If the specified half-precision float is NaN, the result is NaN</li>
517      * <li>If the specified half-precision float is zero (negative or positive),
518      * the result is positive zero (see {@link #POSITIVE_ZERO})</li>
519      * <li>If the specified half-precision float is infinity (negative or positive),
520      * the result is positive infinity (see {@link #POSITIVE_INFINITY})</li>
521      * </ul>
522      *
523      * @param h A half-precision float value
524      * @return The absolute value of the specified half-precision float
525      */
abs(@alfFloat short h)526     public static @HalfFloat short abs(@HalfFloat short h) {
527         return (short) (h & FP16_COMBINED);
528     }
529 
530     /**
531      * Returns the closest integral half-precision float value to the specified
532      * half-precision float value. Special values are handled in the
533      * following ways:
534      * <ul>
535      * <li>If the specified half-precision float is NaN, the result is NaN</li>
536      * <li>If the specified half-precision float is infinity (negative or positive),
537      * the result is infinity (with the same sign)</li>
538      * <li>If the specified half-precision float is zero (negative or positive),
539      * the result is zero (with the same sign)</li>
540      * </ul>
541      *
542      * @param h A half-precision float value
543      * @return The value of the specified half-precision float rounded to the nearest
544      *         half-precision float value
545      */
round(@alfFloat short h)546     public static @HalfFloat short round(@HalfFloat short h) {
547         int bits = h & 0xffff;
548         int e = bits & 0x7fff;
549         int result = bits;
550 
551         if (e < 0x3c00) {
552             result &= FP16_SIGN_MASK;
553             result |= (0x3c00 & (e >= 0x3800 ? 0xffff : 0x0));
554         } else if (e < 0x6400) {
555             e = 25 - (e >> 10);
556             int mask = (1 << e) - 1;
557             result += (1 << (e - 1));
558             result &= ~mask;
559         }
560 
561         return (short) result;
562     }
563 
564     /**
565      * Returns the smallest half-precision float value toward negative infinity
566      * greater than or equal to the specified half-precision float value.
567      * Special values are handled in the following ways:
568      * <ul>
569      * <li>If the specified half-precision float is NaN, the result is NaN</li>
570      * <li>If the specified half-precision float is infinity (negative or positive),
571      * the result is infinity (with the same sign)</li>
572      * <li>If the specified half-precision float is zero (negative or positive),
573      * the result is zero (with the same sign)</li>
574      * </ul>
575      *
576      * @param h A half-precision float value
577      * @return The smallest half-precision float value toward negative infinity
578      *         greater than or equal to the specified half-precision float value
579      */
ceil(@alfFloat short h)580     public static @HalfFloat short ceil(@HalfFloat short h) {
581         int bits = h & 0xffff;
582         int e = bits & 0x7fff;
583         int result = bits;
584 
585         if (e < 0x3c00) {
586             result &= FP16_SIGN_MASK;
587             result |= 0x3c00 & -(~(bits >> 15) & (e != 0 ? 1 : 0));
588         } else if (e < 0x6400) {
589             e = 25 - (e >> 10);
590             int mask = (1 << e) - 1;
591             result += mask & ((bits >> 15) - 1);
592             result &= ~mask;
593         }
594 
595         return (short) result;
596     }
597 
598     /**
599      * Returns the largest half-precision float value toward positive infinity
600      * less than or equal to the specified half-precision float value.
601      * Special values are handled in the following ways:
602      * <ul>
603      * <li>If the specified half-precision float is NaN, the result is NaN</li>
604      * <li>If the specified half-precision float is infinity (negative or positive),
605      * the result is infinity (with the same sign)</li>
606      * <li>If the specified half-precision float is zero (negative or positive),
607      * the result is zero (with the same sign)</li>
608      * </ul>
609      *
610      * @param h A half-precision float value
611      * @return The largest half-precision float value toward positive infinity
612      *         less than or equal to the specified half-precision float value
613      */
floor(@alfFloat short h)614     public static @HalfFloat short floor(@HalfFloat short h) {
615         int bits = h & 0xffff;
616         int e = bits & 0x7fff;
617         int result = bits;
618 
619         if (e < 0x3c00) {
620             result &= FP16_SIGN_MASK;
621             result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0);
622         } else if (e < 0x6400) {
623             e = 25 - (e >> 10);
624             int mask = (1 << e) - 1;
625             result += mask & -(bits >> 15);
626             result &= ~mask;
627         }
628 
629         return (short) result;
630     }
631 
632     /**
633      * Returns the truncated half-precision float value of the specified
634      * half-precision float value. Special values are handled in the following ways:
635      * <ul>
636      * <li>If the specified half-precision float is NaN, the result is NaN</li>
637      * <li>If the specified half-precision float is infinity (negative or positive),
638      * the result is infinity (with the same sign)</li>
639      * <li>If the specified half-precision float is zero (negative or positive),
640      * the result is zero (with the same sign)</li>
641      * </ul>
642      *
643      * @param h A half-precision float value
644      * @return The truncated half-precision float value of the specified
645      *         half-precision float value
646      */
trunc(@alfFloat short h)647     public static @HalfFloat short trunc(@HalfFloat short h) {
648         int bits = h & 0xffff;
649         int e = bits & 0x7fff;
650         int result = bits;
651 
652         if (e < 0x3c00) {
653             result &= FP16_SIGN_MASK;
654         } else if (e < 0x6400) {
655             e = 25 - (e >> 10);
656             int mask = (1 << e) - 1;
657             result &= ~mask;
658         }
659 
660         return (short) result;
661     }
662 
663     /**
664      * Returns the smaller of two half-precision float values (the value closest
665      * to negative infinity). Special values are handled in the following ways:
666      * <ul>
667      * <li>If either value is NaN, the result is NaN</li>
668      * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li>
669      * </ul>
670      *
671      * @param x The first half-precision value
672      * @param y The second half-precision value
673      * @return The smaller of the two specified half-precision values
674      */
min(@alfFloat short x, @HalfFloat short y)675     public static @HalfFloat short min(@HalfFloat short x, @HalfFloat short y) {
676         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
677         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
678 
679         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
680             return (x & FP16_SIGN_MASK) != 0 ? x : y;
681         }
682 
683         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
684                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
685     }
686 
687     /**
688      * Returns the larger of two half-precision float values (the value closest
689      * to positive infinity). Special values are handled in the following ways:
690      * <ul>
691      * <li>If either value is NaN, the result is NaN</li>
692      * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li>
693      * </ul>
694      *
695      * @param x The first half-precision value
696      * @param y The second half-precision value
697      *
698      * @return The larger of the two specified half-precision values
699      */
max(@alfFloat short x, @HalfFloat short y)700     public static @HalfFloat short max(@HalfFloat short x, @HalfFloat short y) {
701         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
702         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
703 
704         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
705             return (x & FP16_SIGN_MASK) != 0 ? y : x;
706         }
707 
708         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
709                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
710     }
711 
712     /**
713      * Returns true if the first half-precision float value is less (smaller
714      * toward negative infinity) than the second half-precision float value.
715      * If either of the values is NaN, the result is false.
716      *
717      * @param x The first half-precision value
718      * @param y The second half-precision value
719      *
720      * @return True if x is less than y, false otherwise
721      */
less(@alfFloat short x, @HalfFloat short y)722     public static boolean less(@HalfFloat short x, @HalfFloat short y) {
723         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
724         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
725 
726         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
727                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
728     }
729 
730     /**
731      * Returns true if the first half-precision float value is less (smaller
732      * toward negative infinity) than or equal to the second half-precision
733      * float value. If either of the values is NaN, the result is false.
734      *
735      * @param x The first half-precision value
736      * @param y The second half-precision value
737      *
738      * @return True if x is less than or equal to y, false otherwise
739      */
lessEquals(@alfFloat short x, @HalfFloat short y)740     public static boolean lessEquals(@HalfFloat short x, @HalfFloat short y) {
741         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
742         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
743 
744         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <=
745                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
746     }
747 
748     /**
749      * Returns true if the first half-precision float value is greater (larger
750      * toward positive infinity) than the second half-precision float value.
751      * If either of the values is NaN, the result is false.
752      *
753      * @param x The first half-precision value
754      * @param y The second half-precision value
755      *
756      * @return True if x is greater than y, false otherwise
757      */
greater(@alfFloat short x, @HalfFloat short y)758     public static boolean greater(@HalfFloat short x, @HalfFloat short y) {
759         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
760         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
761 
762         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
763                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
764     }
765 
766     /**
767      * Returns true if the first half-precision float value is greater (larger
768      * toward positive infinity) than or equal to the second half-precision float
769      * value. If either of the values is NaN, the result is false.
770      *
771      * @param x The first half-precision value
772      * @param y The second half-precision value
773      *
774      * @return True if x is greater than y, false otherwise
775      */
greaterEquals(@alfFloat short x, @HalfFloat short y)776     public static boolean greaterEquals(@HalfFloat short x, @HalfFloat short y) {
777         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
778         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
779 
780         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >=
781                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
782     }
783 
784     /**
785      * Returns true if the two half-precision float values are equal.
786      * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO}
787      * and {@link #NEGATIVE_ZERO} are considered equal.
788      *
789      * @param x The first half-precision value
790      * @param y The second half-precision value
791      *
792      * @return True if x is equal to y, false otherwise
793      */
equals(@alfFloat short x, @HalfFloat short y)794     public static boolean equals(@HalfFloat short x, @HalfFloat short y) {
795         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
796         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
797 
798         return x == y || ((x | y) & FP16_COMBINED) == 0;
799     }
800 
801     /**
802      * Returns the sign of the specified half-precision float.
803      *
804      * @param h A half-precision float value
805      * @return 1 if the value is positive, -1 if the value is negative
806      */
getSign(@alfFloat short h)807     public static int getSign(@HalfFloat short h) {
808         return (h & FP16_SIGN_MASK) == 0 ? 1 : -1;
809     }
810 
811     /**
812      * Returns the unbiased exponent used in the representation of
813      * the specified  half-precision float value. if the value is NaN
814      * or infinite, this* method returns {@link #MAX_EXPONENT} + 1.
815      * If the argument is 0 or a subnormal representation, this method
816      * returns {@link #MIN_EXPONENT} - 1.
817      *
818      * @param h A half-precision float value
819      * @return The unbiased exponent of the specified value
820      */
getExponent(@alfFloat short h)821     public static int getExponent(@HalfFloat short h) {
822         return ((h >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK) - FP16_EXPONENT_BIAS;
823     }
824 
825     /**
826      * Returns the significand, or mantissa, used in the representation
827      * of the specified half-precision float value.
828      *
829      * @param h A half-precision float value
830      * @return The significand, or significand, of the specified vlaue
831      */
getSignificand(@alfFloat short h)832     public static int getSignificand(@HalfFloat short h) {
833         return h & FP16_SIGNIFICAND_MASK;
834     }
835 
836     /**
837      * Returns true if the specified half-precision float value represents
838      * infinity, false otherwise.
839      *
840      * @param h A half-precision float value
841      * @return True if the value is positive infinity or negative infinity,
842      *         false otherwise
843      */
isInfinite(@alfFloat short h)844     public static boolean isInfinite(@HalfFloat short h) {
845         return (h & FP16_COMBINED) == FP16_EXPONENT_MAX;
846     }
847 
848     /**
849      * Returns true if the specified half-precision float value represents
850      * a Not-a-Number, false otherwise.
851      *
852      * @param h A half-precision float value
853      * @return True if the value is a NaN, false otherwise
854      */
isNaN(@alfFloat short h)855     public static boolean isNaN(@HalfFloat short h) {
856         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX;
857     }
858 
859     /**
860      * Returns true if the specified half-precision float value is normalized
861      * (does not have a subnormal representation). If the specified value is
862      * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY},
863      * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal
864      * number, this method returns false.
865      *
866      * @param h A half-precision float value
867      * @return True if the value is normalized, false otherwise
868      */
isNormalized(@alfFloat short h)869     public static boolean isNormalized(@HalfFloat short h) {
870         return (h & FP16_EXPONENT_MAX) != 0 && (h & FP16_EXPONENT_MAX) != FP16_EXPONENT_MAX;
871     }
872 
873     /**
874      * <p>Converts the specified half-precision float value into a
875      * single-precision float value. The following special cases are handled:</p>
876      * <ul>
877      * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li>
878      * <li>If the input is {@link #POSITIVE_INFINITY} or
879      * {@link #NEGATIVE_INFINITY}, the returned value is respectively
880      * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li>
881      * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li>
882      * <li>Otherwise, the returned value is a normalized single-precision float value</li>
883      * </ul>
884      *
885      * @param h The half-precision float value to convert to single-precision
886      * @return A normalized single-precision float value
887      */
toFloat(@alfFloat short h)888     public static float toFloat(@HalfFloat short h) {
889         int bits = h & 0xffff;
890         int s = bits & FP16_SIGN_MASK;
891         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
892         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
893 
894         int outE = 0;
895         int outM = 0;
896 
897         if (e == 0) { // Denormal or 0
898             if (m != 0) {
899                 // Convert denorm fp16 into normalized fp32
900                 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
901                 o -= FP32_DENORMAL_FLOAT;
902                 return s == 0 ? o : -o;
903             }
904         } else {
905             outM = m << 13;
906             if (e == 0x1f) { // Infinite or NaN
907                 outE = 0xff;
908             } else {
909                 outE = e - FP16_EXPONENT_BIAS + FP32_EXPONENT_BIAS;
910             }
911         }
912 
913         int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
914         return Float.intBitsToFloat(out);
915     }
916 
917     /**
918      * <p>Converts the specified single-precision float value into a
919      * half-precision float value. The following special cases are handled:</p>
920      * <ul>
921      * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned
922      * value is {@link #NaN}</li>
923      * <li>If the input is {@link Float#POSITIVE_INFINITY} or
924      * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively
925      * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li>
926      * <li>If the input is 0 (positive or negative), the returned value is
927      * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
928      * <li>If the input is a less than {@link #MIN_VALUE}, the returned value
929      * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
930      * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value
931      * is a denorm half-precision float</li>
932      * <li>Otherwise, the returned value is rounded to the nearest
933      * representable half-precision float value</li>
934      * </ul>
935      *
936      * @param f The single-precision float value to convert to half-precision
937      * @return A half-precision float value
938      */
939     @SuppressWarnings("StatementWithEmptyBody")
toHalf(float f)940     public static @HalfFloat short toHalf(float f) {
941         int bits = Float.floatToRawIntBits(f);
942         int s = (bits >>> FP32_SIGN_SHIFT    );
943         int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_EXPONENT_MASK;
944         int m = (bits                        ) & FP32_SIGNIFICAND_MASK;
945 
946         int outE = 0;
947         int outM = 0;
948 
949         if (e == 0xff) { // Infinite or NaN
950             outE = 0x1f;
951             outM = m != 0 ? 0x200 : 0;
952         } else {
953             e = e - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS;
954             if (e >= 0x1f) { // Overflow
955                 outE = 0x31;
956             } else if (e <= 0) { // Underflow
957                 if (e < -10) {
958                     // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
959                 } else {
960                     // The fp32 value is a normalized float less than MIN_NORMAL,
961                     // we convert to a denorm fp16
962                     m = (m | 0x800000) >> (1 - e);
963                     if ((m & 0x1000) != 0) m += 0x2000;
964                     outM = m >> 13;
965                 }
966             } else {
967                 outE = e;
968                 outM = m >> 13;
969                 if ((m & 0x1000) != 0) {
970                     // Round to nearest "0.5" up
971                     int out = (outE << FP16_EXPONENT_SHIFT) | outM;
972                     out++;
973                     return (short) (out | (s << FP16_SIGN_SHIFT));
974                 }
975             }
976         }
977 
978         return (short) ((s << FP16_SIGN_SHIFT) | (outE << FP16_EXPONENT_SHIFT) | outM);
979     }
980 
981     /**
982      * Returns a {@code Half} instance representing the specified
983      * half-precision float value.
984      *
985      * @param h A half-precision float value
986      * @return a {@code Half} instance representing {@code h}
987      */
valueOf(@alfFloat short h)988     public static @NonNull Half valueOf(@HalfFloat short h) {
989         return new Half(h);
990     }
991 
992     /**
993      * Returns a {@code Half} instance representing the specified float value.
994      *
995      * @param f A float value
996      * @return a {@code Half} instance representing {@code f}
997      */
valueOf(float f)998     public static @NonNull Half valueOf(float f) {
999         return new Half(f);
1000     }
1001 
1002     /**
1003      * Returns a {@code Half} instance representing the specified string value.
1004      * Calling this method is equivalent to calling
1005      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
1006      * for more information on the format of the string representation.
1007      *
1008      * @param s The string to be parsed
1009      * @return a {@code Half} instance representing {@code h}
1010      * @throws NumberFormatException if the string does not contain a parsable
1011      *         half-precision float value
1012      */
valueOf(@onNull String s)1013     public static @NonNull Half valueOf(@NonNull String s) {
1014         return new Half(s);
1015     }
1016 
1017     /**
1018      * Returns the half-precision float value represented by the specified string.
1019      * Calling this method is equivalent to calling
1020      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
1021      * for more information on the format of the string representation.
1022      *
1023      * @param s The string to be parsed
1024      * @return A half-precision float value represented by the string
1025      * @throws NumberFormatException if the string does not contain a parsable
1026      *         half-precision float value
1027      */
parseHalf(@onNull String s)1028     public static @HalfFloat short parseHalf(@NonNull String s) throws NumberFormatException {
1029         return toHalf(FloatingDecimal.parseFloat(s));
1030     }
1031 
1032     /**
1033      * Returns a string representation of the specified half-precision
1034      * float value. Calling this method is equivalent to calling
1035      * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)}
1036      * for more information on the format of the string representation.
1037      *
1038      * @param h A half-precision float value
1039      * @return A string representation of the specified value
1040      */
1041     @NonNull
toString(@alfFloat short h)1042     public static String toString(@HalfFloat short h) {
1043         return Float.toString(toFloat(h));
1044     }
1045 
1046     /**
1047      * <p>Returns a hexadecimal string representation of the specified half-precision
1048      * float value. If the value is a NaN, the result is <code>"NaN"</code>,
1049      * otherwise the result follows this format:</p>
1050      * <ul>
1051      * <li>If the sign is positive, no sign character appears in the result</li>
1052      * <li>If the sign is negative, the first character is <code>'-'</code></li>
1053      * <li>If the value is inifinity, the string is <code>"Infinity"</code></li>
1054      * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li>
1055      * <li>If the value has a normalized representation, the exponent and
1056      * significand are represented in the string in two fields. The significand
1057      * starts with <code>"0x1."</code> followed by its lowercase hexadecimal
1058      * representation. Trailing zeroes are removed unless all digits are 0, then
1059      * a single zero is used. The significand representation is followed by the
1060      * exponent, represented by <code>"p"</code>, itself followed by a decimal
1061      * string of the unbiased exponent</li>
1062      * <li>If the value has a subnormal representation, the significand starts
1063      * with <code>"0x0."</code> followed by its lowercase hexadecimal
1064      * representation. Trailing zeroes are removed unless all digits are 0, then
1065      * a single zero is used. The significand representation is followed by the
1066      * exponent, represented by <code>"p-14"</code></li>
1067      * </ul>
1068      *
1069      * @param h A half-precision float value
1070      * @return A hexadecimal string representation of the specified value
1071      */
1072     @NonNull
toHexString(@alfFloat short h)1073     public static String toHexString(@HalfFloat short h) {
1074         StringBuilder o = new StringBuilder();
1075 
1076         int bits = h & 0xffff;
1077         int s = (bits >>> FP16_SIGN_SHIFT    );
1078         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
1079         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
1080 
1081         if (e == 0x1f) { // Infinite or NaN
1082             if (m == 0) {
1083                 if (s != 0) o.append('-');
1084                 o.append("Infinity");
1085             } else {
1086                 o.append("NaN");
1087             }
1088         } else {
1089             if (s == 1) o.append('-');
1090             if (e == 0) {
1091                 if (m == 0) {
1092                     o.append("0x0.0p0");
1093                 } else {
1094                     o.append("0x0.");
1095                     String significand = Integer.toHexString(m);
1096                     o.append(significand.replaceFirst("0{2,}$", ""));
1097                     o.append("p-14");
1098                 }
1099             } else {
1100                 o.append("0x1.");
1101                 String significand = Integer.toHexString(m);
1102                 o.append(significand.replaceFirst("0{2,}$", ""));
1103                 o.append('p');
1104                 o.append(Integer.toString(e - FP16_EXPONENT_BIAS));
1105             }
1106         }
1107 
1108         return o.toString();
1109     }
1110 }
1111