• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.util;
18 
19 import android.annotation.HalfFloat;
20 import android.annotation.NonNull;
21 import android.annotation.Nullable;
22 
23 /**
24  * <p>The {@code Half} class is a wrapper and a utility class to manipulate half-precision 16-bit
25  * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
26  * floating point data types (also called fp16 or binary16). A half-precision float can be
27  * created from or converted to single-precision floats, and is stored in a short data type.
28  * To distinguish short values holding half-precision floats from regular short values,
29  * it is recommended to use the <code>@HalfFloat</code> annotation.</p>
30  *
31  * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p>
32  * <ul>
33  * <li>Sign bit: 1 bit</li>
34  * <li>Exponent width: 5 bits</li>
35  * <li>Significand: 10 bits</li>
36  * </ul>
37  *
38  * <p>The format is laid out as follows:</p>
39  * <pre>
40  * 1   11111   1111111111
41  * ^   --^--   -----^----
42  * sign  |          |_______ significand
43  *       |
44  *       -- exponent
45  * </pre>
46  *
47  * <p>Half-precision floating points can be useful to save memory and/or
48  * bandwidth at the expense of range and precision when compared to single-precision
49  * floating points (fp32).</p>
50  * <p>To help you decide whether fp16 is the right storage type for you need, please
51  * refer to the table below that shows the available precision throughout the range of
52  * possible values. The <em>precision</em> column indicates the step size between two
53  * consecutive numbers in a specific part of the range.</p>
54  *
55  * <table summary="Precision of fp16 across the range">
56  *     <tr><th>Range start</th><th>Precision</th></tr>
57  *     <tr><td>0</td><td>1 &frasl; 16,777,216</td></tr>
58  *     <tr><td>1 &frasl; 16,384</td><td>1 &frasl; 16,777,216</td></tr>
59  *     <tr><td>1 &frasl; 8,192</td><td>1 &frasl; 8,388,608</td></tr>
60  *     <tr><td>1 &frasl; 4,096</td><td>1 &frasl; 4,194,304</td></tr>
61  *     <tr><td>1 &frasl; 2,048</td><td>1 &frasl; 2,097,152</td></tr>
62  *     <tr><td>1 &frasl; 1,024</td><td>1 &frasl; 1,048,576</td></tr>
63  *     <tr><td>1 &frasl; 512</td><td>1 &frasl; 524,288</td></tr>
64  *     <tr><td>1 &frasl; 256</td><td>1 &frasl; 262,144</td></tr>
65  *     <tr><td>1 &frasl; 128</td><td>1 &frasl; 131,072</td></tr>
66  *     <tr><td>1 &frasl; 64</td><td>1 &frasl; 65,536</td></tr>
67  *     <tr><td>1 &frasl; 32</td><td>1 &frasl; 32,768</td></tr>
68  *     <tr><td>1 &frasl; 16</td><td>1 &frasl; 16,384</td></tr>
69  *     <tr><td>1 &frasl; 8</td><td>1 &frasl; 8,192</td></tr>
70  *     <tr><td>1 &frasl; 4</td><td>1 &frasl; 4,096</td></tr>
71  *     <tr><td>1 &frasl; 2</td><td>1 &frasl; 2,048</td></tr>
72  *     <tr><td>1</td><td>1 &frasl; 1,024</td></tr>
73  *     <tr><td>2</td><td>1 &frasl; 512</td></tr>
74  *     <tr><td>4</td><td>1 &frasl; 256</td></tr>
75  *     <tr><td>8</td><td>1 &frasl; 128</td></tr>
76  *     <tr><td>16</td><td>1 &frasl; 64</td></tr>
77  *     <tr><td>32</td><td>1 &frasl; 32</td></tr>
78  *     <tr><td>64</td><td>1 &frasl; 16</td></tr>
79  *     <tr><td>128</td><td>1 &frasl; 8</td></tr>
80  *     <tr><td>256</td><td>1 &frasl; 4</td></tr>
81  *     <tr><td>512</td><td>1 &frasl; 2</td></tr>
82  *     <tr><td>1,024</td><td>1</td></tr>
83  *     <tr><td>2,048</td><td>2</td></tr>
84  *     <tr><td>4,096</td><td>4</td></tr>
85  *     <tr><td>8,192</td><td>8</td></tr>
86  *     <tr><td>16,384</td><td>16</td></tr>
87  *     <tr><td>32,768</td><td>32</td></tr>
88  * </table>
89  *
90  * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p>
91  */
92 @SuppressWarnings("SimplifiableIfStatement")
93 public final class Half extends Number implements Comparable<Half> {
94     /**
95      * The number of bits used to represent a half-precision float value.
96      */
97     public static final int SIZE = 16;
98 
99     /**
100      * Epsilon is the difference between 1.0 and the next value representable
101      * by a half-precision floating-point.
102      */
103     public static final @HalfFloat short EPSILON = (short) 0x1400;
104 
105     /**
106      * Maximum exponent a finite half-precision float may have.
107      */
108     public static final int MAX_EXPONENT = 15;
109     /**
110      * Minimum exponent a normalized half-precision float may have.
111      */
112     public static final int MIN_EXPONENT = -14;
113 
114     /**
115      * Smallest negative value a half-precision float may have.
116      */
117     public static final @HalfFloat short LOWEST_VALUE = (short) 0xfbff;
118     /**
119      * Maximum positive finite value a half-precision float may have.
120      */
121     public static final @HalfFloat short MAX_VALUE = (short) 0x7bff;
122     /**
123      * Smallest positive normal value a half-precision float may have.
124      */
125     public static final @HalfFloat short MIN_NORMAL = (short) 0x0400;
126     /**
127      * Smallest positive non-zero value a half-precision float may have.
128      */
129     public static final @HalfFloat short MIN_VALUE = (short) 0x0001;
130     /**
131      * A Not-a-Number representation of a half-precision float.
132      */
133     public static final @HalfFloat short NaN = (short) 0x7e00;
134     /**
135      * Negative infinity of type half-precision float.
136      */
137     public static final @HalfFloat short NEGATIVE_INFINITY = (short) 0xfc00;
138     /**
139      * Negative 0 of type half-precision float.
140      */
141     public static final @HalfFloat short NEGATIVE_ZERO = (short) 0x8000;
142     /**
143      * Positive infinity of type half-precision float.
144      */
145     public static final @HalfFloat short POSITIVE_INFINITY = (short) 0x7c00;
146     /**
147      * Positive 0 of type half-precision float.
148      */
149     public static final @HalfFloat short POSITIVE_ZERO = (short) 0x0000;
150 
151     private static final int FP16_SIGN_SHIFT        = 15;
152     private static final int FP16_SIGN_MASK         = 0x8000;
153     private static final int FP16_EXPONENT_SHIFT    = 10;
154     private static final int FP16_EXPONENT_MASK     = 0x1f;
155     private static final int FP16_SIGNIFICAND_MASK  = 0x3ff;
156     private static final int FP16_EXPONENT_BIAS     = 15;
157     private static final int FP16_COMBINED          = 0x7fff;
158     private static final int FP16_EXPONENT_MAX      = 0x7c00;
159 
160     private static final int FP32_SIGN_SHIFT        = 31;
161     private static final int FP32_EXPONENT_SHIFT    = 23;
162     private static final int FP32_EXPONENT_MASK     = 0xff;
163     private static final int FP32_SIGNIFICAND_MASK  = 0x7fffff;
164     private static final int FP32_EXPONENT_BIAS     = 127;
165     private static final int FP32_QNAN_MASK         = 0x400000;
166 
167     private static final int FP32_DENORMAL_MAGIC = 126 << 23;
168     private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
169 
170     private final @HalfFloat short mValue;
171 
172     /**
173      * Constructs a newly allocated {@code Half} object that represents the
174      * half-precision float type argument.
175      *
176      * @param value The value to be represented by the {@code Half}
177      */
Half(@alfFloat short value)178     public Half(@HalfFloat short value) {
179         mValue = value;
180     }
181 
182     /**
183      * Constructs a newly allocated {@code Half} object that represents the
184      * argument converted to a half-precision float.
185      *
186      * @param value The value to be represented by the {@code Half}
187      *
188      * @see #toHalf(float)
189      */
Half(float value)190     public Half(float value) {
191         mValue = toHalf(value);
192     }
193 
194     /**
195      * Constructs a newly allocated {@code Half} object that
196      * represents the argument converted to a half-precision float.
197      *
198      * @param value The value to be represented by the {@code Half}
199      *
200      * @see #toHalf(float)
201      */
Half(double value)202     public Half(double value) {
203         mValue = toHalf((float) value);
204     }
205 
206     /**
207      * <p>Constructs a newly allocated {@code Half} object that represents the
208      * half-precision float value represented by the string.
209      * The string is converted to a half-precision float value as if by the
210      * {@link #valueOf(String)} method.</p>
211      *
212      * <p>Calling this constructor is equivalent to calling:</p>
213      * <pre>
214      *     new Half(Float.parseFloat(value))
215      * </pre>
216      *
217      * @param value A string to be converted to a {@code Half}
218      * @throws NumberFormatException if the string does not contain a parsable number
219      *
220      * @see Float#valueOf(java.lang.String)
221      * @see #toHalf(float)
222      */
Half(@onNull String value)223     public Half(@NonNull String value) throws NumberFormatException {
224         mValue = toHalf(Float.parseFloat(value));
225     }
226 
227     /**
228      * Returns the half-precision value of this {@code Half} as a {@code short}
229      * containing the bit representation described in {@link Half}.
230      *
231      * @return The half-precision float value represented by this object
232      */
halfValue()233     public @HalfFloat short halfValue() {
234         return mValue;
235     }
236 
237     /**
238      * Returns the value of this {@code Half} as a {@code byte} after
239      * a narrowing primitive conversion.
240      *
241      * @return The half-precision float value represented by this object
242      *         converted to type {@code byte}
243      */
244     @Override
byteValue()245     public byte byteValue() {
246         return (byte) toFloat(mValue);
247     }
248 
249     /**
250      * Returns the value of this {@code Half} as a {@code short} after
251      * a narrowing primitive conversion.
252      *
253      * @return The half-precision float value represented by this object
254      *         converted to type {@code short}
255      */
256     @Override
shortValue()257     public short shortValue() {
258         return (short) toFloat(mValue);
259     }
260 
261     /**
262      * Returns the value of this {@code Half} as a {@code int} after
263      * a narrowing primitive conversion.
264      *
265      * @return The half-precision float value represented by this object
266      *         converted to type {@code int}
267      */
268     @Override
intValue()269     public int intValue() {
270         return (int) toFloat(mValue);
271     }
272 
273     /**
274      * Returns the value of this {@code Half} as a {@code long} after
275      * a narrowing primitive conversion.
276      *
277      * @return The half-precision float value represented by this object
278      *         converted to type {@code long}
279      */
280     @Override
longValue()281     public long longValue() {
282         return (long) toFloat(mValue);
283     }
284 
285     /**
286      * Returns the value of this {@code Half} as a {@code float} after
287      * a widening primitive conversion.
288      *
289      * @return The half-precision float value represented by this object
290      *         converted to type {@code float}
291      */
292     @Override
floatValue()293     public float floatValue() {
294         return toFloat(mValue);
295     }
296 
297     /**
298      * Returns the value of this {@code Half} as a {@code double} after
299      * a widening primitive conversion.
300      *
301      * @return The half-precision float value represented by this object
302      *         converted to type {@code double}
303      */
304     @Override
doubleValue()305     public double doubleValue() {
306         return toFloat(mValue);
307     }
308 
309     /**
310      * Returns true if this {@code Half} value represents a Not-a-Number,
311      * false otherwise.
312      *
313      * @return True if the value is a NaN, false otherwise
314      */
isNaN()315     public boolean isNaN() {
316         return isNaN(mValue);
317     }
318 
319     /**
320      * Compares this object against the specified object. The result is {@code true}
321      * if and only if the argument is not {@code null} and is a {@code Half} object
322      * that represents the same half-precision value as the this object. Two
323      * half-precision values are considered to be the same if and only if the method
324      * {@link #halfToIntBits(short)} returns an identical {@code int} value for both.
325      *
326      * @param o The object to compare
327      * @return True if the objects are the same, false otherwise
328      *
329      * @see #halfToIntBits(short)
330      */
331     @Override
equals(@ullable Object o)332     public boolean equals(@Nullable Object o) {
333         return (o instanceof Half) &&
334                 (halfToIntBits(((Half) o).mValue) == halfToIntBits(mValue));
335     }
336 
337     /**
338      * Returns a hash code for this {@code Half} object. The result is the
339      * integer bit representation, exactly as produced by the method
340      * {@link #halfToIntBits(short)}, of the primitive half-precision float
341      * value represented by this {@code Half} object.
342      *
343      * @return A hash code value for this object
344      */
345     @Override
hashCode()346     public int hashCode() {
347         return hashCode(mValue);
348     }
349 
350     /**
351      * Returns a string representation of the specified half-precision
352      * float value. See {@link #toString(short)} for more information.
353      *
354      * @return A string representation of this {@code Half} object
355      */
356     @NonNull
357     @Override
toString()358     public String toString() {
359         return toString(mValue);
360     }
361 
362     /**
363      * <p>Compares the two specified half-precision float values. The following
364      * conditions apply during the comparison:</p>
365      *
366      * <ul>
367      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
368      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
369      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
370      * {@link #NEGATIVE_ZERO}.</li>
371      * </ul>
372      *
373      * @param h The half-precision float value to compare to the half-precision value
374      *          represented by this {@code Half} object
375      *
376      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}; a
377      *          value less than {@code 0} if {@code x} is numerically less than {@code y};
378      *          and a value greater than {@code 0} if {@code x} is numerically greater
379      *          than {@code y}
380      */
381     @Override
compareTo(@onNull Half h)382     public int compareTo(@NonNull Half h) {
383         return compare(mValue, h.mValue);
384     }
385 
386     /**
387      * Returns a hash code for a half-precision float value.
388      *
389      * @param h The value to hash
390      *
391      * @return A hash code value for a half-precision float value
392      */
hashCode(@alfFloat short h)393     public static int hashCode(@HalfFloat short h) {
394         return halfToIntBits(h);
395     }
396 
397     /**
398      * <p>Compares the two specified half-precision float values. The following
399      * conditions apply during the comparison:</p>
400      *
401      * <ul>
402      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
403      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
404      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
405      * {@link #NEGATIVE_ZERO}.</li>
406      * </ul>
407      *
408      * @param x The first half-precision float value to compare.
409      * @param y The second half-precision float value to compare
410      *
411      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
412      *          value less than {@code 0} if {@code x} is numerically less than {@code y},
413      *          and a value greater than {@code 0} if {@code x} is numerically greater
414      *          than {@code y}
415      */
compare(@alfFloat short x, @HalfFloat short y)416     public static int compare(@HalfFloat short x, @HalfFloat short y) {
417         if (less(x, y)) return -1;
418         if (greater(x, y)) return 1;
419 
420         // Collapse NaNs, akin to halfToIntBits(), but we want to keep
421         // (signed) short value types to preserve the ordering of -0.0
422         // and +0.0
423         short xBits = (x & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : x;
424         short yBits = (y & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : y;
425 
426         return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
427     }
428 
429     /**
430      * <p>Returns a representation of the specified half-precision float value
431      * according to the bit layout described in {@link Half}.</p>
432      *
433      * <p>Similar to {@link #halfToIntBits(short)}, this method collapses all
434      * possible Not-a-Number values to a single canonical Not-a-Number value
435      * defined by {@link #NaN}.</p>
436      *
437      * @param h A half-precision float value
438      * @return The bits that represent the half-precision float value
439      *
440      * @see #halfToIntBits(short)
441      */
halfToShortBits(@alfFloat short h)442     public static @HalfFloat short halfToShortBits(@HalfFloat short h) {
443         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h;
444     }
445 
446     /**
447      * <p>Returns a representation of the specified half-precision float value
448      * according to the bit layout described in {@link Half}.</p>
449      *
450      * <p>Unlike {@link #halfToRawIntBits(short)}, this method collapses all
451      * possible Not-a-Number values to a single canonical Not-a-Number value
452      * defined by {@link #NaN}.</p>
453      *
454      * @param h A half-precision float value
455      * @return The bits that represent the half-precision float value
456      *
457      * @see #halfToRawIntBits(short)
458      * @see #halfToShortBits(short)
459      * @see #intBitsToHalf(int)
460      */
halfToIntBits(@alfFloat short h)461     public static int halfToIntBits(@HalfFloat short h) {
462         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h & 0xffff;
463     }
464 
465     /**
466      * <p>Returns a representation of the specified half-precision float value
467      * according to the bit layout described in {@link Half}.</p>
468      *
469      * <p>The argument is considered to be a representation of a half-precision
470      * float value according to the bit layout described in {@link Half}. The 16
471      * most significant bits of the returned value are set to 0.</p>
472      *
473      * @param h A half-precision float value
474      * @return The bits that represent the half-precision float value
475      *
476      * @see #halfToIntBits(short)
477      * @see #intBitsToHalf(int)
478      */
halfToRawIntBits(@alfFloat short h)479     public static int halfToRawIntBits(@HalfFloat short h) {
480         return h & 0xffff;
481     }
482 
483     /**
484      * <p>Returns the half-precision float value corresponding to a given
485      * bit representation.</p>
486      *
487      * <p>The argument is considered to be a representation of a half-precision
488      * float value according to the bit layout described in {@link Half}. The 16
489      * most significant bits of the argument are ignored.</p>
490      *
491      * @param bits An integer
492      * @return The half-precision float value with the same bit pattern
493      */
intBitsToHalf(int bits)494     public static @HalfFloat short intBitsToHalf(int bits) {
495         return (short) (bits & 0xffff);
496     }
497 
498     /**
499      * Returns the first parameter with the sign of the second parameter.
500      * This method treats NaNs as having a sign.
501      *
502      * @param magnitude A half-precision float value providing the magnitude of the result
503      * @param sign  A half-precision float value providing the sign of the result
504      * @return A value with the magnitude of the first parameter and the sign
505      *         of the second parameter
506      */
copySign(@alfFloat short magnitude, @HalfFloat short sign)507     public static @HalfFloat short copySign(@HalfFloat short magnitude, @HalfFloat short sign) {
508         return (short) ((sign & FP16_SIGN_MASK) | (magnitude & FP16_COMBINED));
509     }
510 
511     /**
512      * Returns the absolute value of the specified half-precision float.
513      * Special values are handled in the following ways:
514      * <ul>
515      * <li>If the specified half-precision float is NaN, the result is NaN</li>
516      * <li>If the specified half-precision float is zero (negative or positive),
517      * the result is positive zero (see {@link #POSITIVE_ZERO})</li>
518      * <li>If the specified half-precision float is infinity (negative or positive),
519      * the result is positive infinity (see {@link #POSITIVE_INFINITY})</li>
520      * </ul>
521      *
522      * @param h A half-precision float value
523      * @return The absolute value of the specified half-precision float
524      */
abs(@alfFloat short h)525     public static @HalfFloat short abs(@HalfFloat short h) {
526         return (short) (h & FP16_COMBINED);
527     }
528 
529     /**
530      * Returns the closest integral half-precision float value to the specified
531      * half-precision float value. Special values are handled in the
532      * following ways:
533      * <ul>
534      * <li>If the specified half-precision float is NaN, the result is NaN</li>
535      * <li>If the specified half-precision float is infinity (negative or positive),
536      * the result is infinity (with the same sign)</li>
537      * <li>If the specified half-precision float is zero (negative or positive),
538      * the result is zero (with the same sign)</li>
539      * </ul>
540      *
541      * @param h A half-precision float value
542      * @return The value of the specified half-precision float rounded to the nearest
543      *         half-precision float value
544      */
round(@alfFloat short h)545     public static @HalfFloat short round(@HalfFloat short h) {
546         int bits = h & 0xffff;
547         int e = bits & 0x7fff;
548         int result = bits;
549 
550         if (e < 0x3c00) {
551             result &= FP16_SIGN_MASK;
552             result |= (0x3c00 & (e >= 0x3800 ? 0xffff : 0x0));
553         } else if (e < 0x6400) {
554             e = 25 - (e >> 10);
555             int mask = (1 << e) - 1;
556             result += (1 << (e - 1));
557             result &= ~mask;
558         }
559 
560         return (short) result;
561     }
562 
563     /**
564      * Returns the smallest half-precision float value toward negative infinity
565      * greater than or equal to the specified half-precision float value.
566      * Special values are handled in the following ways:
567      * <ul>
568      * <li>If the specified half-precision float is NaN, the result is NaN</li>
569      * <li>If the specified half-precision float is infinity (negative or positive),
570      * the result is infinity (with the same sign)</li>
571      * <li>If the specified half-precision float is zero (negative or positive),
572      * the result is zero (with the same sign)</li>
573      * </ul>
574      *
575      * @param h A half-precision float value
576      * @return The smallest half-precision float value toward negative infinity
577      *         greater than or equal to the specified half-precision float value
578      */
ceil(@alfFloat short h)579     public static @HalfFloat short ceil(@HalfFloat short h) {
580         int bits = h & 0xffff;
581         int e = bits & 0x7fff;
582         int result = bits;
583 
584         if (e < 0x3c00) {
585             result &= FP16_SIGN_MASK;
586             result |= 0x3c00 & -(~(bits >> 15) & (e != 0 ? 1 : 0));
587         } else if (e < 0x6400) {
588             e = 25 - (e >> 10);
589             int mask = (1 << e) - 1;
590             result += mask & ((bits >> 15) - 1);
591             result &= ~mask;
592         }
593 
594         return (short) result;
595     }
596 
597     /**
598      * Returns the largest half-precision float value toward positive infinity
599      * less than or equal to the specified half-precision float value.
600      * Special values are handled in the following ways:
601      * <ul>
602      * <li>If the specified half-precision float is NaN, the result is NaN</li>
603      * <li>If the specified half-precision float is infinity (negative or positive),
604      * the result is infinity (with the same sign)</li>
605      * <li>If the specified half-precision float is zero (negative or positive),
606      * the result is zero (with the same sign)</li>
607      * </ul>
608      *
609      * @param h A half-precision float value
610      * @return The largest half-precision float value toward positive infinity
611      *         less than or equal to the specified half-precision float value
612      */
floor(@alfFloat short h)613     public static @HalfFloat short floor(@HalfFloat short h) {
614         int bits = h & 0xffff;
615         int e = bits & 0x7fff;
616         int result = bits;
617 
618         if (e < 0x3c00) {
619             result &= FP16_SIGN_MASK;
620             result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0);
621         } else if (e < 0x6400) {
622             e = 25 - (e >> 10);
623             int mask = (1 << e) - 1;
624             result += mask & -(bits >> 15);
625             result &= ~mask;
626         }
627 
628         return (short) result;
629     }
630 
631     /**
632      * Returns the truncated half-precision float value of the specified
633      * half-precision float value. Special values are handled in the following ways:
634      * <ul>
635      * <li>If the specified half-precision float is NaN, the result is NaN</li>
636      * <li>If the specified half-precision float is infinity (negative or positive),
637      * the result is infinity (with the same sign)</li>
638      * <li>If the specified half-precision float is zero (negative or positive),
639      * the result is zero (with the same sign)</li>
640      * </ul>
641      *
642      * @param h A half-precision float value
643      * @return The truncated half-precision float value of the specified
644      *         half-precision float value
645      */
trunc(@alfFloat short h)646     public static @HalfFloat short trunc(@HalfFloat short h) {
647         int bits = h & 0xffff;
648         int e = bits & 0x7fff;
649         int result = bits;
650 
651         if (e < 0x3c00) {
652             result &= FP16_SIGN_MASK;
653         } else if (e < 0x6400) {
654             e = 25 - (e >> 10);
655             int mask = (1 << e) - 1;
656             result &= ~mask;
657         }
658 
659         return (short) result;
660     }
661 
662     /**
663      * Returns the smaller of two half-precision float values (the value closest
664      * to negative infinity). Special values are handled in the following ways:
665      * <ul>
666      * <li>If either value is NaN, the result is NaN</li>
667      * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li>
668      * </ul>
669      *
670      * @param x The first half-precision value
671      * @param y The second half-precision value
672      * @return The smaller of the two specified half-precision values
673      */
min(@alfFloat short x, @HalfFloat short y)674     public static @HalfFloat short min(@HalfFloat short x, @HalfFloat short y) {
675         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
676         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
677 
678         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
679             return (x & FP16_SIGN_MASK) != 0 ? x : y;
680         }
681 
682         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
683                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
684     }
685 
686     /**
687      * Returns the larger of two half-precision float values (the value closest
688      * to positive infinity). Special values are handled in the following ways:
689      * <ul>
690      * <li>If either value is NaN, the result is NaN</li>
691      * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li>
692      * </ul>
693      *
694      * @param x The first half-precision value
695      * @param y The second half-precision value
696      *
697      * @return The larger of the two specified half-precision values
698      */
max(@alfFloat short x, @HalfFloat short y)699     public static @HalfFloat short max(@HalfFloat short x, @HalfFloat short y) {
700         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
701         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN;
702 
703         if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) {
704             return (x & FP16_SIGN_MASK) != 0 ? y : x;
705         }
706 
707         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
708                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
709     }
710 
711     /**
712      * Returns true if the first half-precision float value is less (smaller
713      * toward negative infinity) than the second half-precision float value.
714      * If either of the values is NaN, the result is false.
715      *
716      * @param x The first half-precision value
717      * @param y The second half-precision value
718      *
719      * @return True if x is less than y, false otherwise
720      */
less(@alfFloat short x, @HalfFloat short y)721     public static boolean less(@HalfFloat short x, @HalfFloat short y) {
722         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
723         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
724 
725         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
726                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
727     }
728 
729     /**
730      * Returns true if the first half-precision float value is less (smaller
731      * toward negative infinity) than or equal to the second half-precision
732      * float value. If either of the values is NaN, the result is false.
733      *
734      * @param x The first half-precision value
735      * @param y The second half-precision value
736      *
737      * @return True if x is less than or equal to y, false otherwise
738      */
lessEquals(@alfFloat short x, @HalfFloat short y)739     public static boolean lessEquals(@HalfFloat short x, @HalfFloat short y) {
740         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
741         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
742 
743         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <=
744                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
745     }
746 
747     /**
748      * Returns true if the first half-precision float value is greater (larger
749      * toward positive infinity) than the second half-precision float value.
750      * If either of the values is NaN, the result is false.
751      *
752      * @param x The first half-precision value
753      * @param y The second half-precision value
754      *
755      * @return True if x is greater than y, false otherwise
756      */
greater(@alfFloat short x, @HalfFloat short y)757     public static boolean greater(@HalfFloat short x, @HalfFloat short y) {
758         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
759         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
760 
761         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
762                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
763     }
764 
765     /**
766      * Returns true if the first half-precision float value is greater (larger
767      * toward positive infinity) than or equal to the second half-precision float
768      * value. If either of the values is NaN, the result is false.
769      *
770      * @param x The first half-precision value
771      * @param y The second half-precision value
772      *
773      * @return True if x is greater than y, false otherwise
774      */
greaterEquals(@alfFloat short x, @HalfFloat short y)775     public static boolean greaterEquals(@HalfFloat short x, @HalfFloat short y) {
776         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
777         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
778 
779         return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >=
780                ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
781     }
782 
783     /**
784      * Returns true if the two half-precision float values are equal.
785      * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO}
786      * and {@link #NEGATIVE_ZERO} are considered equal.
787      *
788      * @param x The first half-precision value
789      * @param y The second half-precision value
790      *
791      * @return True if x is equal to y, false otherwise
792      */
equals(@alfFloat short x, @HalfFloat short y)793     public static boolean equals(@HalfFloat short x, @HalfFloat short y) {
794         if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
795         if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false;
796 
797         return x == y || ((x | y) & FP16_COMBINED) == 0;
798     }
799 
800     /**
801      * Returns the sign of the specified half-precision float.
802      *
803      * @param h A half-precision float value
804      * @return 1 if the value is positive, -1 if the value is negative
805      */
getSign(@alfFloat short h)806     public static int getSign(@HalfFloat short h) {
807         return (h & FP16_SIGN_MASK) == 0 ? 1 : -1;
808     }
809 
810     /**
811      * Returns the unbiased exponent used in the representation of
812      * the specified  half-precision float value. if the value is NaN
813      * or infinite, this* method returns {@link #MAX_EXPONENT} + 1.
814      * If the argument is 0 or a subnormal representation, this method
815      * returns {@link #MIN_EXPONENT} - 1.
816      *
817      * @param h A half-precision float value
818      * @return The unbiased exponent of the specified value
819      */
getExponent(@alfFloat short h)820     public static int getExponent(@HalfFloat short h) {
821         return ((h >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK) - FP16_EXPONENT_BIAS;
822     }
823 
824     /**
825      * Returns the significand, or mantissa, used in the representation
826      * of the specified half-precision float value.
827      *
828      * @param h A half-precision float value
829      * @return The significand, or significand, of the specified vlaue
830      */
getSignificand(@alfFloat short h)831     public static int getSignificand(@HalfFloat short h) {
832         return h & FP16_SIGNIFICAND_MASK;
833     }
834 
835     /**
836      * Returns true if the specified half-precision float value represents
837      * infinity, false otherwise.
838      *
839      * @param h A half-precision float value
840      * @return True if the value is positive infinity or negative infinity,
841      *         false otherwise
842      */
isInfinite(@alfFloat short h)843     public static boolean isInfinite(@HalfFloat short h) {
844         return (h & FP16_COMBINED) == FP16_EXPONENT_MAX;
845     }
846 
847     /**
848      * Returns true if the specified half-precision float value represents
849      * a Not-a-Number, false otherwise.
850      *
851      * @param h A half-precision float value
852      * @return True if the value is a NaN, false otherwise
853      */
isNaN(@alfFloat short h)854     public static boolean isNaN(@HalfFloat short h) {
855         return (h & FP16_COMBINED) > FP16_EXPONENT_MAX;
856     }
857 
858     /**
859      * Returns true if the specified half-precision float value is normalized
860      * (does not have a subnormal representation). If the specified value is
861      * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY},
862      * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal
863      * number, this method returns false.
864      *
865      * @param h A half-precision float value
866      * @return True if the value is normalized, false otherwise
867      */
isNormalized(@alfFloat short h)868     public static boolean isNormalized(@HalfFloat short h) {
869         return (h & FP16_EXPONENT_MAX) != 0 && (h & FP16_EXPONENT_MAX) != FP16_EXPONENT_MAX;
870     }
871 
872     /**
873      * <p>Converts the specified half-precision float value into a
874      * single-precision float value. The following special cases are handled:</p>
875      * <ul>
876      * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li>
877      * <li>If the input is {@link #POSITIVE_INFINITY} or
878      * {@link #NEGATIVE_INFINITY}, the returned value is respectively
879      * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li>
880      * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li>
881      * <li>Otherwise, the returned value is a normalized single-precision float value</li>
882      * </ul>
883      *
884      * @param h The half-precision float value to convert to single-precision
885      * @return A normalized single-precision float value
886      */
toFloat(@alfFloat short h)887     public static float toFloat(@HalfFloat short h) {
888         int bits = h & 0xffff;
889         int s = bits & FP16_SIGN_MASK;
890         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
891         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
892 
893         int outE = 0;
894         int outM = 0;
895 
896         if (e == 0) { // Denormal or 0
897             if (m != 0) {
898                 // Convert denorm fp16 into normalized fp32
899                 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
900                 o -= FP32_DENORMAL_FLOAT;
901                 return s == 0 ? o : -o;
902             }
903         } else {
904             outM = m << 13;
905             if (e == 0x1f) { // Infinite or NaN
906                 outE = 0xff;
907                 if (outM != 0) { // SNaNs are quieted
908                     outM |= FP32_QNAN_MASK;
909                 }
910             } else {
911                 outE = e - FP16_EXPONENT_BIAS + FP32_EXPONENT_BIAS;
912             }
913         }
914 
915         int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
916         return Float.intBitsToFloat(out);
917     }
918 
919     /**
920      * <p>Converts the specified single-precision float value into a
921      * half-precision float value. The following special cases are handled:</p>
922      * <ul>
923      * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned
924      * value is {@link #NaN}</li>
925      * <li>If the input is {@link Float#POSITIVE_INFINITY} or
926      * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively
927      * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li>
928      * <li>If the input is 0 (positive or negative), the returned value is
929      * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
930      * <li>If the input is a less than {@link #MIN_VALUE}, the returned value
931      * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
932      * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value
933      * is a denorm half-precision float</li>
934      * <li>Otherwise, the returned value is rounded to the nearest
935      * representable half-precision float value</li>
936      * </ul>
937      *
938      * @param f The single-precision float value to convert to half-precision
939      * @return A half-precision float value
940      */
941     @SuppressWarnings("StatementWithEmptyBody")
toHalf(float f)942     public static @HalfFloat short toHalf(float f) {
943         int bits = Float.floatToRawIntBits(f);
944         int s = (bits >>> FP32_SIGN_SHIFT    );
945         int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_EXPONENT_MASK;
946         int m = (bits                        ) & FP32_SIGNIFICAND_MASK;
947 
948         int outE = 0;
949         int outM = 0;
950 
951         if (e == 0xff) { // Infinite or NaN
952             outE = 0x1f;
953             outM = m != 0 ? 0x200 : 0;
954         } else {
955             e = e - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS;
956             if (e >= 0x1f) { // Overflow
957                 outE = 0x31;
958             } else if (e <= 0) { // Underflow
959                 if (e < -10) {
960                     // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
961                 } else {
962                     // The fp32 value is a normalized float less than MIN_NORMAL,
963                     // we convert to a denorm fp16
964                     m = (m | 0x800000) >> (1 - e);
965                     if ((m & 0x1000) != 0) m += 0x2000;
966                     outM = m >> 13;
967                 }
968             } else {
969                 outE = e;
970                 outM = m >> 13;
971                 if ((m & 0x1000) != 0) {
972                     // Round to nearest "0.5" up
973                     int out = (outE << FP16_EXPONENT_SHIFT) | outM;
974                     out++;
975                     return (short) (out | (s << FP16_SIGN_SHIFT));
976                 }
977             }
978         }
979 
980         return (short) ((s << FP16_SIGN_SHIFT) | (outE << FP16_EXPONENT_SHIFT) | outM);
981     }
982 
983     /**
984      * Returns a {@code Half} instance representing the specified
985      * half-precision float value.
986      *
987      * @param h A half-precision float value
988      * @return a {@code Half} instance representing {@code h}
989      */
valueOf(@alfFloat short h)990     public static @NonNull Half valueOf(@HalfFloat short h) {
991         return new Half(h);
992     }
993 
994     /**
995      * Returns a {@code Half} instance representing the specified float value.
996      *
997      * @param f A float value
998      * @return a {@code Half} instance representing {@code f}
999      */
valueOf(float f)1000     public static @NonNull Half valueOf(float f) {
1001         return new Half(f);
1002     }
1003 
1004     /**
1005      * Returns a {@code Half} instance representing the specified string value.
1006      * Calling this method is equivalent to calling
1007      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
1008      * for more information on the format of the string representation.
1009      *
1010      * @param s The string to be parsed
1011      * @return a {@code Half} instance representing {@code h}
1012      * @throws NumberFormatException if the string does not contain a parsable
1013      *         half-precision float value
1014      */
valueOf(@onNull String s)1015     public static @NonNull Half valueOf(@NonNull String s) {
1016         return new Half(s);
1017     }
1018 
1019     /**
1020      * Returns the half-precision float value represented by the specified string.
1021      * Calling this method is equivalent to calling
1022      * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)}
1023      * for more information on the format of the string representation.
1024      *
1025      * @param s The string to be parsed
1026      * @return A half-precision float value represented by the string
1027      * @throws NumberFormatException if the string does not contain a parsable
1028      *         half-precision float value
1029      */
parseHalf(@onNull String s)1030     public static @HalfFloat short parseHalf(@NonNull String s) throws NumberFormatException {
1031         return toHalf(Float.parseFloat(s));
1032     }
1033 
1034     /**
1035      * Returns a string representation of the specified half-precision
1036      * float value. Calling this method is equivalent to calling
1037      * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)}
1038      * for more information on the format of the string representation.
1039      *
1040      * @param h A half-precision float value
1041      * @return A string representation of the specified value
1042      */
1043     @NonNull
toString(@alfFloat short h)1044     public static String toString(@HalfFloat short h) {
1045         return Float.toString(toFloat(h));
1046     }
1047 
1048     /**
1049      * <p>Returns a hexadecimal string representation of the specified half-precision
1050      * float value. If the value is a NaN, the result is <code>"NaN"</code>,
1051      * otherwise the result follows this format:</p>
1052      * <ul>
1053      * <li>If the sign is positive, no sign character appears in the result</li>
1054      * <li>If the sign is negative, the first character is <code>'-'</code></li>
1055      * <li>If the value is inifinity, the string is <code>"Infinity"</code></li>
1056      * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li>
1057      * <li>If the value has a normalized representation, the exponent and
1058      * significand are represented in the string in two fields. The significand
1059      * starts with <code>"0x1."</code> followed by its lowercase hexadecimal
1060      * representation. Trailing zeroes are removed unless all digits are 0, then
1061      * a single zero is used. The significand representation is followed by the
1062      * exponent, represented by <code>"p"</code>, itself followed by a decimal
1063      * string of the unbiased exponent</li>
1064      * <li>If the value has a subnormal representation, the significand starts
1065      * with <code>"0x0."</code> followed by its lowercase hexadecimal
1066      * representation. Trailing zeroes are removed unless all digits are 0, then
1067      * a single zero is used. The significand representation is followed by the
1068      * exponent, represented by <code>"p-14"</code></li>
1069      * </ul>
1070      *
1071      * @param h A half-precision float value
1072      * @return A hexadecimal string representation of the specified value
1073      */
1074     @NonNull
toHexString(@alfFloat short h)1075     public static String toHexString(@HalfFloat short h) {
1076         StringBuilder o = new StringBuilder();
1077 
1078         int bits = h & 0xffff;
1079         int s = (bits >>> FP16_SIGN_SHIFT    );
1080         int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK;
1081         int m = (bits                        ) & FP16_SIGNIFICAND_MASK;
1082 
1083         if (e == 0x1f) { // Infinite or NaN
1084             if (m == 0) {
1085                 if (s != 0) o.append('-');
1086                 o.append("Infinity");
1087             } else {
1088                 o.append("NaN");
1089             }
1090         } else {
1091             if (s == 1) o.append('-');
1092             if (e == 0) {
1093                 if (m == 0) {
1094                     o.append("0x0.0p0");
1095                 } else {
1096                     o.append("0x0.");
1097                     String significand = Integer.toHexString(m);
1098                     o.append(significand.replaceFirst("0{2,}$", ""));
1099                     o.append("p-14");
1100                 }
1101             } else {
1102                 o.append("0x1.");
1103                 String significand = Integer.toHexString(m);
1104                 o.append(significand.replaceFirst("0{2,}$", ""));
1105                 o.append('p');
1106                 o.append(Integer.toString(e - FP16_EXPONENT_BIAS));
1107             }
1108         }
1109 
1110         return o.toString();
1111     }
1112 }
1113