1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.util; 18 19 import android.annotation.HalfFloat; 20 import android.annotation.NonNull; 21 import android.annotation.Nullable; 22 23 /** 24 * <p>The {@code Half} class is a wrapper and a utility class to manipulate half-precision 16-bit 25 * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a> 26 * floating point data types (also called fp16 or binary16). A half-precision float can be 27 * created from or converted to single-precision floats, and is stored in a short data type. 28 * To distinguish short values holding half-precision floats from regular short values, 29 * it is recommended to use the <code>@HalfFloat</code> annotation.</p> 30 * 31 * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p> 32 * <ul> 33 * <li>Sign bit: 1 bit</li> 34 * <li>Exponent width: 5 bits</li> 35 * <li>Significand: 10 bits</li> 36 * </ul> 37 * 38 * <p>The format is laid out as follows:</p> 39 * <pre> 40 * 1 11111 1111111111 41 * ^ --^-- -----^---- 42 * sign | |_______ significand 43 * | 44 * -- exponent 45 * </pre> 46 * 47 * <p>Half-precision floating points can be useful to save memory and/or 48 * bandwidth at the expense of range and precision when compared to single-precision 49 * floating points (fp32).</p> 50 * <p>To help you decide whether fp16 is the right storage type for you need, please 51 * refer to the table below that shows the available precision throughout the range of 52 * possible values. The <em>precision</em> column indicates the step size between two 53 * consecutive numbers in a specific part of the range.</p> 54 * 55 * <table summary="Precision of fp16 across the range"> 56 * <tr><th>Range start</th><th>Precision</th></tr> 57 * <tr><td>0</td><td>1 ⁄ 16,777,216</td></tr> 58 * <tr><td>1 ⁄ 16,384</td><td>1 ⁄ 16,777,216</td></tr> 59 * <tr><td>1 ⁄ 8,192</td><td>1 ⁄ 8,388,608</td></tr> 60 * <tr><td>1 ⁄ 4,096</td><td>1 ⁄ 4,194,304</td></tr> 61 * <tr><td>1 ⁄ 2,048</td><td>1 ⁄ 2,097,152</td></tr> 62 * <tr><td>1 ⁄ 1,024</td><td>1 ⁄ 1,048,576</td></tr> 63 * <tr><td>1 ⁄ 512</td><td>1 ⁄ 524,288</td></tr> 64 * <tr><td>1 ⁄ 256</td><td>1 ⁄ 262,144</td></tr> 65 * <tr><td>1 ⁄ 128</td><td>1 ⁄ 131,072</td></tr> 66 * <tr><td>1 ⁄ 64</td><td>1 ⁄ 65,536</td></tr> 67 * <tr><td>1 ⁄ 32</td><td>1 ⁄ 32,768</td></tr> 68 * <tr><td>1 ⁄ 16</td><td>1 ⁄ 16,384</td></tr> 69 * <tr><td>1 ⁄ 8</td><td>1 ⁄ 8,192</td></tr> 70 * <tr><td>1 ⁄ 4</td><td>1 ⁄ 4,096</td></tr> 71 * <tr><td>1 ⁄ 2</td><td>1 ⁄ 2,048</td></tr> 72 * <tr><td>1</td><td>1 ⁄ 1,024</td></tr> 73 * <tr><td>2</td><td>1 ⁄ 512</td></tr> 74 * <tr><td>4</td><td>1 ⁄ 256</td></tr> 75 * <tr><td>8</td><td>1 ⁄ 128</td></tr> 76 * <tr><td>16</td><td>1 ⁄ 64</td></tr> 77 * <tr><td>32</td><td>1 ⁄ 32</td></tr> 78 * <tr><td>64</td><td>1 ⁄ 16</td></tr> 79 * <tr><td>128</td><td>1 ⁄ 8</td></tr> 80 * <tr><td>256</td><td>1 ⁄ 4</td></tr> 81 * <tr><td>512</td><td>1 ⁄ 2</td></tr> 82 * <tr><td>1,024</td><td>1</td></tr> 83 * <tr><td>2,048</td><td>2</td></tr> 84 * <tr><td>4,096</td><td>4</td></tr> 85 * <tr><td>8,192</td><td>8</td></tr> 86 * <tr><td>16,384</td><td>16</td></tr> 87 * <tr><td>32,768</td><td>32</td></tr> 88 * </table> 89 * 90 * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p> 91 */ 92 @SuppressWarnings("SimplifiableIfStatement") 93 public final class Half extends Number implements Comparable<Half> { 94 /** 95 * The number of bits used to represent a half-precision float value. 96 */ 97 public static final int SIZE = 16; 98 99 /** 100 * Epsilon is the difference between 1.0 and the next value representable 101 * by a half-precision floating-point. 102 */ 103 public static final @HalfFloat short EPSILON = (short) 0x1400; 104 105 /** 106 * Maximum exponent a finite half-precision float may have. 107 */ 108 public static final int MAX_EXPONENT = 15; 109 /** 110 * Minimum exponent a normalized half-precision float may have. 111 */ 112 public static final int MIN_EXPONENT = -14; 113 114 /** 115 * Smallest negative value a half-precision float may have. 116 */ 117 public static final @HalfFloat short LOWEST_VALUE = (short) 0xfbff; 118 /** 119 * Maximum positive finite value a half-precision float may have. 120 */ 121 public static final @HalfFloat short MAX_VALUE = (short) 0x7bff; 122 /** 123 * Smallest positive normal value a half-precision float may have. 124 */ 125 public static final @HalfFloat short MIN_NORMAL = (short) 0x0400; 126 /** 127 * Smallest positive non-zero value a half-precision float may have. 128 */ 129 public static final @HalfFloat short MIN_VALUE = (short) 0x0001; 130 /** 131 * A Not-a-Number representation of a half-precision float. 132 */ 133 public static final @HalfFloat short NaN = (short) 0x7e00; 134 /** 135 * Negative infinity of type half-precision float. 136 */ 137 public static final @HalfFloat short NEGATIVE_INFINITY = (short) 0xfc00; 138 /** 139 * Negative 0 of type half-precision float. 140 */ 141 public static final @HalfFloat short NEGATIVE_ZERO = (short) 0x8000; 142 /** 143 * Positive infinity of type half-precision float. 144 */ 145 public static final @HalfFloat short POSITIVE_INFINITY = (short) 0x7c00; 146 /** 147 * Positive 0 of type half-precision float. 148 */ 149 public static final @HalfFloat short POSITIVE_ZERO = (short) 0x0000; 150 151 private static final int FP16_SIGN_SHIFT = 15; 152 private static final int FP16_SIGN_MASK = 0x8000; 153 private static final int FP16_EXPONENT_SHIFT = 10; 154 private static final int FP16_EXPONENT_MASK = 0x1f; 155 private static final int FP16_SIGNIFICAND_MASK = 0x3ff; 156 private static final int FP16_EXPONENT_BIAS = 15; 157 private static final int FP16_COMBINED = 0x7fff; 158 private static final int FP16_EXPONENT_MAX = 0x7c00; 159 160 private static final int FP32_SIGN_SHIFT = 31; 161 private static final int FP32_EXPONENT_SHIFT = 23; 162 private static final int FP32_EXPONENT_MASK = 0xff; 163 private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; 164 private static final int FP32_EXPONENT_BIAS = 127; 165 private static final int FP32_QNAN_MASK = 0x400000; 166 167 private static final int FP32_DENORMAL_MAGIC = 126 << 23; 168 private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); 169 170 private final @HalfFloat short mValue; 171 172 /** 173 * Constructs a newly allocated {@code Half} object that represents the 174 * half-precision float type argument. 175 * 176 * @param value The value to be represented by the {@code Half} 177 */ Half(@alfFloat short value)178 public Half(@HalfFloat short value) { 179 mValue = value; 180 } 181 182 /** 183 * Constructs a newly allocated {@code Half} object that represents the 184 * argument converted to a half-precision float. 185 * 186 * @param value The value to be represented by the {@code Half} 187 * 188 * @see #toHalf(float) 189 */ Half(float value)190 public Half(float value) { 191 mValue = toHalf(value); 192 } 193 194 /** 195 * Constructs a newly allocated {@code Half} object that 196 * represents the argument converted to a half-precision float. 197 * 198 * @param value The value to be represented by the {@code Half} 199 * 200 * @see #toHalf(float) 201 */ Half(double value)202 public Half(double value) { 203 mValue = toHalf((float) value); 204 } 205 206 /** 207 * <p>Constructs a newly allocated {@code Half} object that represents the 208 * half-precision float value represented by the string. 209 * The string is converted to a half-precision float value as if by the 210 * {@link #valueOf(String)} method.</p> 211 * 212 * <p>Calling this constructor is equivalent to calling:</p> 213 * <pre> 214 * new Half(Float.parseFloat(value)) 215 * </pre> 216 * 217 * @param value A string to be converted to a {@code Half} 218 * @throws NumberFormatException if the string does not contain a parsable number 219 * 220 * @see Float#valueOf(java.lang.String) 221 * @see #toHalf(float) 222 */ Half(@onNull String value)223 public Half(@NonNull String value) throws NumberFormatException { 224 mValue = toHalf(Float.parseFloat(value)); 225 } 226 227 /** 228 * Returns the half-precision value of this {@code Half} as a {@code short} 229 * containing the bit representation described in {@link Half}. 230 * 231 * @return The half-precision float value represented by this object 232 */ halfValue()233 public @HalfFloat short halfValue() { 234 return mValue; 235 } 236 237 /** 238 * Returns the value of this {@code Half} as a {@code byte} after 239 * a narrowing primitive conversion. 240 * 241 * @return The half-precision float value represented by this object 242 * converted to type {@code byte} 243 */ 244 @Override byteValue()245 public byte byteValue() { 246 return (byte) toFloat(mValue); 247 } 248 249 /** 250 * Returns the value of this {@code Half} as a {@code short} after 251 * a narrowing primitive conversion. 252 * 253 * @return The half-precision float value represented by this object 254 * converted to type {@code short} 255 */ 256 @Override shortValue()257 public short shortValue() { 258 return (short) toFloat(mValue); 259 } 260 261 /** 262 * Returns the value of this {@code Half} as a {@code int} after 263 * a narrowing primitive conversion. 264 * 265 * @return The half-precision float value represented by this object 266 * converted to type {@code int} 267 */ 268 @Override intValue()269 public int intValue() { 270 return (int) toFloat(mValue); 271 } 272 273 /** 274 * Returns the value of this {@code Half} as a {@code long} after 275 * a narrowing primitive conversion. 276 * 277 * @return The half-precision float value represented by this object 278 * converted to type {@code long} 279 */ 280 @Override longValue()281 public long longValue() { 282 return (long) toFloat(mValue); 283 } 284 285 /** 286 * Returns the value of this {@code Half} as a {@code float} after 287 * a widening primitive conversion. 288 * 289 * @return The half-precision float value represented by this object 290 * converted to type {@code float} 291 */ 292 @Override floatValue()293 public float floatValue() { 294 return toFloat(mValue); 295 } 296 297 /** 298 * Returns the value of this {@code Half} as a {@code double} after 299 * a widening primitive conversion. 300 * 301 * @return The half-precision float value represented by this object 302 * converted to type {@code double} 303 */ 304 @Override doubleValue()305 public double doubleValue() { 306 return toFloat(mValue); 307 } 308 309 /** 310 * Returns true if this {@code Half} value represents a Not-a-Number, 311 * false otherwise. 312 * 313 * @return True if the value is a NaN, false otherwise 314 */ isNaN()315 public boolean isNaN() { 316 return isNaN(mValue); 317 } 318 319 /** 320 * Compares this object against the specified object. The result is {@code true} 321 * if and only if the argument is not {@code null} and is a {@code Half} object 322 * that represents the same half-precision value as the this object. Two 323 * half-precision values are considered to be the same if and only if the method 324 * {@link #halfToIntBits(short)} returns an identical {@code int} value for both. 325 * 326 * @param o The object to compare 327 * @return True if the objects are the same, false otherwise 328 * 329 * @see #halfToIntBits(short) 330 */ 331 @Override equals(@ullable Object o)332 public boolean equals(@Nullable Object o) { 333 return (o instanceof Half) && 334 (halfToIntBits(((Half) o).mValue) == halfToIntBits(mValue)); 335 } 336 337 /** 338 * Returns a hash code for this {@code Half} object. The result is the 339 * integer bit representation, exactly as produced by the method 340 * {@link #halfToIntBits(short)}, of the primitive half-precision float 341 * value represented by this {@code Half} object. 342 * 343 * @return A hash code value for this object 344 */ 345 @Override hashCode()346 public int hashCode() { 347 return hashCode(mValue); 348 } 349 350 /** 351 * Returns a string representation of the specified half-precision 352 * float value. See {@link #toString(short)} for more information. 353 * 354 * @return A string representation of this {@code Half} object 355 */ 356 @NonNull 357 @Override toString()358 public String toString() { 359 return toString(mValue); 360 } 361 362 /** 363 * <p>Compares the two specified half-precision float values. The following 364 * conditions apply during the comparison:</p> 365 * 366 * <ul> 367 * <li>{@link #NaN} is considered by this method to be equal to itself and greater 368 * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li> 369 * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than 370 * {@link #NEGATIVE_ZERO}.</li> 371 * </ul> 372 * 373 * @param h The half-precision float value to compare to the half-precision value 374 * represented by this {@code Half} object 375 * 376 * @return The value {@code 0} if {@code x} is numerically equal to {@code y}; a 377 * value less than {@code 0} if {@code x} is numerically less than {@code y}; 378 * and a value greater than {@code 0} if {@code x} is numerically greater 379 * than {@code y} 380 */ 381 @Override compareTo(@onNull Half h)382 public int compareTo(@NonNull Half h) { 383 return compare(mValue, h.mValue); 384 } 385 386 /** 387 * Returns a hash code for a half-precision float value. 388 * 389 * @param h The value to hash 390 * 391 * @return A hash code value for a half-precision float value 392 */ hashCode(@alfFloat short h)393 public static int hashCode(@HalfFloat short h) { 394 return halfToIntBits(h); 395 } 396 397 /** 398 * <p>Compares the two specified half-precision float values. The following 399 * conditions apply during the comparison:</p> 400 * 401 * <ul> 402 * <li>{@link #NaN} is considered by this method to be equal to itself and greater 403 * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li> 404 * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than 405 * {@link #NEGATIVE_ZERO}.</li> 406 * </ul> 407 * 408 * @param x The first half-precision float value to compare. 409 * @param y The second half-precision float value to compare 410 * 411 * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a 412 * value less than {@code 0} if {@code x} is numerically less than {@code y}, 413 * and a value greater than {@code 0} if {@code x} is numerically greater 414 * than {@code y} 415 */ compare(@alfFloat short x, @HalfFloat short y)416 public static int compare(@HalfFloat short x, @HalfFloat short y) { 417 if (less(x, y)) return -1; 418 if (greater(x, y)) return 1; 419 420 // Collapse NaNs, akin to halfToIntBits(), but we want to keep 421 // (signed) short value types to preserve the ordering of -0.0 422 // and +0.0 423 short xBits = (x & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : x; 424 short yBits = (y & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : y; 425 426 return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); 427 } 428 429 /** 430 * <p>Returns a representation of the specified half-precision float value 431 * according to the bit layout described in {@link Half}.</p> 432 * 433 * <p>Similar to {@link #halfToIntBits(short)}, this method collapses all 434 * possible Not-a-Number values to a single canonical Not-a-Number value 435 * defined by {@link #NaN}.</p> 436 * 437 * @param h A half-precision float value 438 * @return The bits that represent the half-precision float value 439 * 440 * @see #halfToIntBits(short) 441 */ halfToShortBits(@alfFloat short h)442 public static @HalfFloat short halfToShortBits(@HalfFloat short h) { 443 return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h; 444 } 445 446 /** 447 * <p>Returns a representation of the specified half-precision float value 448 * according to the bit layout described in {@link Half}.</p> 449 * 450 * <p>Unlike {@link #halfToRawIntBits(short)}, this method collapses all 451 * possible Not-a-Number values to a single canonical Not-a-Number value 452 * defined by {@link #NaN}.</p> 453 * 454 * @param h A half-precision float value 455 * @return The bits that represent the half-precision float value 456 * 457 * @see #halfToRawIntBits(short) 458 * @see #halfToShortBits(short) 459 * @see #intBitsToHalf(int) 460 */ halfToIntBits(@alfFloat short h)461 public static int halfToIntBits(@HalfFloat short h) { 462 return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h & 0xffff; 463 } 464 465 /** 466 * <p>Returns a representation of the specified half-precision float value 467 * according to the bit layout described in {@link Half}.</p> 468 * 469 * <p>The argument is considered to be a representation of a half-precision 470 * float value according to the bit layout described in {@link Half}. The 16 471 * most significant bits of the returned value are set to 0.</p> 472 * 473 * @param h A half-precision float value 474 * @return The bits that represent the half-precision float value 475 * 476 * @see #halfToIntBits(short) 477 * @see #intBitsToHalf(int) 478 */ halfToRawIntBits(@alfFloat short h)479 public static int halfToRawIntBits(@HalfFloat short h) { 480 return h & 0xffff; 481 } 482 483 /** 484 * <p>Returns the half-precision float value corresponding to a given 485 * bit representation.</p> 486 * 487 * <p>The argument is considered to be a representation of a half-precision 488 * float value according to the bit layout described in {@link Half}. The 16 489 * most significant bits of the argument are ignored.</p> 490 * 491 * @param bits An integer 492 * @return The half-precision float value with the same bit pattern 493 */ intBitsToHalf(int bits)494 public static @HalfFloat short intBitsToHalf(int bits) { 495 return (short) (bits & 0xffff); 496 } 497 498 /** 499 * Returns the first parameter with the sign of the second parameter. 500 * This method treats NaNs as having a sign. 501 * 502 * @param magnitude A half-precision float value providing the magnitude of the result 503 * @param sign A half-precision float value providing the sign of the result 504 * @return A value with the magnitude of the first parameter and the sign 505 * of the second parameter 506 */ copySign(@alfFloat short magnitude, @HalfFloat short sign)507 public static @HalfFloat short copySign(@HalfFloat short magnitude, @HalfFloat short sign) { 508 return (short) ((sign & FP16_SIGN_MASK) | (magnitude & FP16_COMBINED)); 509 } 510 511 /** 512 * Returns the absolute value of the specified half-precision float. 513 * Special values are handled in the following ways: 514 * <ul> 515 * <li>If the specified half-precision float is NaN, the result is NaN</li> 516 * <li>If the specified half-precision float is zero (negative or positive), 517 * the result is positive zero (see {@link #POSITIVE_ZERO})</li> 518 * <li>If the specified half-precision float is infinity (negative or positive), 519 * the result is positive infinity (see {@link #POSITIVE_INFINITY})</li> 520 * </ul> 521 * 522 * @param h A half-precision float value 523 * @return The absolute value of the specified half-precision float 524 */ abs(@alfFloat short h)525 public static @HalfFloat short abs(@HalfFloat short h) { 526 return (short) (h & FP16_COMBINED); 527 } 528 529 /** 530 * Returns the closest integral half-precision float value to the specified 531 * half-precision float value. Special values are handled in the 532 * following ways: 533 * <ul> 534 * <li>If the specified half-precision float is NaN, the result is NaN</li> 535 * <li>If the specified half-precision float is infinity (negative or positive), 536 * the result is infinity (with the same sign)</li> 537 * <li>If the specified half-precision float is zero (negative or positive), 538 * the result is zero (with the same sign)</li> 539 * </ul> 540 * 541 * @param h A half-precision float value 542 * @return The value of the specified half-precision float rounded to the nearest 543 * half-precision float value 544 */ round(@alfFloat short h)545 public static @HalfFloat short round(@HalfFloat short h) { 546 int bits = h & 0xffff; 547 int e = bits & 0x7fff; 548 int result = bits; 549 550 if (e < 0x3c00) { 551 result &= FP16_SIGN_MASK; 552 result |= (0x3c00 & (e >= 0x3800 ? 0xffff : 0x0)); 553 } else if (e < 0x6400) { 554 e = 25 - (e >> 10); 555 int mask = (1 << e) - 1; 556 result += (1 << (e - 1)); 557 result &= ~mask; 558 } 559 560 return (short) result; 561 } 562 563 /** 564 * Returns the smallest half-precision float value toward negative infinity 565 * greater than or equal to the specified half-precision float value. 566 * Special values are handled in the following ways: 567 * <ul> 568 * <li>If the specified half-precision float is NaN, the result is NaN</li> 569 * <li>If the specified half-precision float is infinity (negative or positive), 570 * the result is infinity (with the same sign)</li> 571 * <li>If the specified half-precision float is zero (negative or positive), 572 * the result is zero (with the same sign)</li> 573 * </ul> 574 * 575 * @param h A half-precision float value 576 * @return The smallest half-precision float value toward negative infinity 577 * greater than or equal to the specified half-precision float value 578 */ ceil(@alfFloat short h)579 public static @HalfFloat short ceil(@HalfFloat short h) { 580 int bits = h & 0xffff; 581 int e = bits & 0x7fff; 582 int result = bits; 583 584 if (e < 0x3c00) { 585 result &= FP16_SIGN_MASK; 586 result |= 0x3c00 & -(~(bits >> 15) & (e != 0 ? 1 : 0)); 587 } else if (e < 0x6400) { 588 e = 25 - (e >> 10); 589 int mask = (1 << e) - 1; 590 result += mask & ((bits >> 15) - 1); 591 result &= ~mask; 592 } 593 594 return (short) result; 595 } 596 597 /** 598 * Returns the largest half-precision float value toward positive infinity 599 * less than or equal to the specified half-precision float value. 600 * Special values are handled in the following ways: 601 * <ul> 602 * <li>If the specified half-precision float is NaN, the result is NaN</li> 603 * <li>If the specified half-precision float is infinity (negative or positive), 604 * the result is infinity (with the same sign)</li> 605 * <li>If the specified half-precision float is zero (negative or positive), 606 * the result is zero (with the same sign)</li> 607 * </ul> 608 * 609 * @param h A half-precision float value 610 * @return The largest half-precision float value toward positive infinity 611 * less than or equal to the specified half-precision float value 612 */ floor(@alfFloat short h)613 public static @HalfFloat short floor(@HalfFloat short h) { 614 int bits = h & 0xffff; 615 int e = bits & 0x7fff; 616 int result = bits; 617 618 if (e < 0x3c00) { 619 result &= FP16_SIGN_MASK; 620 result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0); 621 } else if (e < 0x6400) { 622 e = 25 - (e >> 10); 623 int mask = (1 << e) - 1; 624 result += mask & -(bits >> 15); 625 result &= ~mask; 626 } 627 628 return (short) result; 629 } 630 631 /** 632 * Returns the truncated half-precision float value of the specified 633 * half-precision float value. Special values are handled in the following ways: 634 * <ul> 635 * <li>If the specified half-precision float is NaN, the result is NaN</li> 636 * <li>If the specified half-precision float is infinity (negative or positive), 637 * the result is infinity (with the same sign)</li> 638 * <li>If the specified half-precision float is zero (negative or positive), 639 * the result is zero (with the same sign)</li> 640 * </ul> 641 * 642 * @param h A half-precision float value 643 * @return The truncated half-precision float value of the specified 644 * half-precision float value 645 */ trunc(@alfFloat short h)646 public static @HalfFloat short trunc(@HalfFloat short h) { 647 int bits = h & 0xffff; 648 int e = bits & 0x7fff; 649 int result = bits; 650 651 if (e < 0x3c00) { 652 result &= FP16_SIGN_MASK; 653 } else if (e < 0x6400) { 654 e = 25 - (e >> 10); 655 int mask = (1 << e) - 1; 656 result &= ~mask; 657 } 658 659 return (short) result; 660 } 661 662 /** 663 * Returns the smaller of two half-precision float values (the value closest 664 * to negative infinity). Special values are handled in the following ways: 665 * <ul> 666 * <li>If either value is NaN, the result is NaN</li> 667 * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li> 668 * </ul> 669 * 670 * @param x The first half-precision value 671 * @param y The second half-precision value 672 * @return The smaller of the two specified half-precision values 673 */ min(@alfFloat short x, @HalfFloat short y)674 public static @HalfFloat short min(@HalfFloat short x, @HalfFloat short y) { 675 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 676 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 677 678 if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) { 679 return (x & FP16_SIGN_MASK) != 0 ? x : y; 680 } 681 682 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 683 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 684 } 685 686 /** 687 * Returns the larger of two half-precision float values (the value closest 688 * to positive infinity). Special values are handled in the following ways: 689 * <ul> 690 * <li>If either value is NaN, the result is NaN</li> 691 * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li> 692 * </ul> 693 * 694 * @param x The first half-precision value 695 * @param y The second half-precision value 696 * 697 * @return The larger of the two specified half-precision values 698 */ max(@alfFloat short x, @HalfFloat short y)699 public static @HalfFloat short max(@HalfFloat short x, @HalfFloat short y) { 700 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 701 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 702 703 if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) { 704 return (x & FP16_SIGN_MASK) != 0 ? y : x; 705 } 706 707 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 708 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 709 } 710 711 /** 712 * Returns true if the first half-precision float value is less (smaller 713 * toward negative infinity) than the second half-precision float value. 714 * If either of the values is NaN, the result is false. 715 * 716 * @param x The first half-precision value 717 * @param y The second half-precision value 718 * 719 * @return True if x is less than y, false otherwise 720 */ less(@alfFloat short x, @HalfFloat short y)721 public static boolean less(@HalfFloat short x, @HalfFloat short y) { 722 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 723 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 724 725 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 726 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 727 } 728 729 /** 730 * Returns true if the first half-precision float value is less (smaller 731 * toward negative infinity) than or equal to the second half-precision 732 * float value. If either of the values is NaN, the result is false. 733 * 734 * @param x The first half-precision value 735 * @param y The second half-precision value 736 * 737 * @return True if x is less than or equal to y, false otherwise 738 */ lessEquals(@alfFloat short x, @HalfFloat short y)739 public static boolean lessEquals(@HalfFloat short x, @HalfFloat short y) { 740 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 741 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 742 743 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <= 744 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 745 } 746 747 /** 748 * Returns true if the first half-precision float value is greater (larger 749 * toward positive infinity) than the second half-precision float value. 750 * If either of the values is NaN, the result is false. 751 * 752 * @param x The first half-precision value 753 * @param y The second half-precision value 754 * 755 * @return True if x is greater than y, false otherwise 756 */ greater(@alfFloat short x, @HalfFloat short y)757 public static boolean greater(@HalfFloat short x, @HalfFloat short y) { 758 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 759 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 760 761 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 762 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 763 } 764 765 /** 766 * Returns true if the first half-precision float value is greater (larger 767 * toward positive infinity) than or equal to the second half-precision float 768 * value. If either of the values is NaN, the result is false. 769 * 770 * @param x The first half-precision value 771 * @param y The second half-precision value 772 * 773 * @return True if x is greater than y, false otherwise 774 */ greaterEquals(@alfFloat short x, @HalfFloat short y)775 public static boolean greaterEquals(@HalfFloat short x, @HalfFloat short y) { 776 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 777 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 778 779 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >= 780 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 781 } 782 783 /** 784 * Returns true if the two half-precision float values are equal. 785 * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO} 786 * and {@link #NEGATIVE_ZERO} are considered equal. 787 * 788 * @param x The first half-precision value 789 * @param y The second half-precision value 790 * 791 * @return True if x is equal to y, false otherwise 792 */ equals(@alfFloat short x, @HalfFloat short y)793 public static boolean equals(@HalfFloat short x, @HalfFloat short y) { 794 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 795 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 796 797 return x == y || ((x | y) & FP16_COMBINED) == 0; 798 } 799 800 /** 801 * Returns the sign of the specified half-precision float. 802 * 803 * @param h A half-precision float value 804 * @return 1 if the value is positive, -1 if the value is negative 805 */ getSign(@alfFloat short h)806 public static int getSign(@HalfFloat short h) { 807 return (h & FP16_SIGN_MASK) == 0 ? 1 : -1; 808 } 809 810 /** 811 * Returns the unbiased exponent used in the representation of 812 * the specified half-precision float value. if the value is NaN 813 * or infinite, this* method returns {@link #MAX_EXPONENT} + 1. 814 * If the argument is 0 or a subnormal representation, this method 815 * returns {@link #MIN_EXPONENT} - 1. 816 * 817 * @param h A half-precision float value 818 * @return The unbiased exponent of the specified value 819 */ getExponent(@alfFloat short h)820 public static int getExponent(@HalfFloat short h) { 821 return ((h >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK) - FP16_EXPONENT_BIAS; 822 } 823 824 /** 825 * Returns the significand, or mantissa, used in the representation 826 * of the specified half-precision float value. 827 * 828 * @param h A half-precision float value 829 * @return The significand, or significand, of the specified vlaue 830 */ getSignificand(@alfFloat short h)831 public static int getSignificand(@HalfFloat short h) { 832 return h & FP16_SIGNIFICAND_MASK; 833 } 834 835 /** 836 * Returns true if the specified half-precision float value represents 837 * infinity, false otherwise. 838 * 839 * @param h A half-precision float value 840 * @return True if the value is positive infinity or negative infinity, 841 * false otherwise 842 */ isInfinite(@alfFloat short h)843 public static boolean isInfinite(@HalfFloat short h) { 844 return (h & FP16_COMBINED) == FP16_EXPONENT_MAX; 845 } 846 847 /** 848 * Returns true if the specified half-precision float value represents 849 * a Not-a-Number, false otherwise. 850 * 851 * @param h A half-precision float value 852 * @return True if the value is a NaN, false otherwise 853 */ isNaN(@alfFloat short h)854 public static boolean isNaN(@HalfFloat short h) { 855 return (h & FP16_COMBINED) > FP16_EXPONENT_MAX; 856 } 857 858 /** 859 * Returns true if the specified half-precision float value is normalized 860 * (does not have a subnormal representation). If the specified value is 861 * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY}, 862 * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal 863 * number, this method returns false. 864 * 865 * @param h A half-precision float value 866 * @return True if the value is normalized, false otherwise 867 */ isNormalized(@alfFloat short h)868 public static boolean isNormalized(@HalfFloat short h) { 869 return (h & FP16_EXPONENT_MAX) != 0 && (h & FP16_EXPONENT_MAX) != FP16_EXPONENT_MAX; 870 } 871 872 /** 873 * <p>Converts the specified half-precision float value into a 874 * single-precision float value. The following special cases are handled:</p> 875 * <ul> 876 * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li> 877 * <li>If the input is {@link #POSITIVE_INFINITY} or 878 * {@link #NEGATIVE_INFINITY}, the returned value is respectively 879 * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li> 880 * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li> 881 * <li>Otherwise, the returned value is a normalized single-precision float value</li> 882 * </ul> 883 * 884 * @param h The half-precision float value to convert to single-precision 885 * @return A normalized single-precision float value 886 */ toFloat(@alfFloat short h)887 public static float toFloat(@HalfFloat short h) { 888 int bits = h & 0xffff; 889 int s = bits & FP16_SIGN_MASK; 890 int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK; 891 int m = (bits ) & FP16_SIGNIFICAND_MASK; 892 893 int outE = 0; 894 int outM = 0; 895 896 if (e == 0) { // Denormal or 0 897 if (m != 0) { 898 // Convert denorm fp16 into normalized fp32 899 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); 900 o -= FP32_DENORMAL_FLOAT; 901 return s == 0 ? o : -o; 902 } 903 } else { 904 outM = m << 13; 905 if (e == 0x1f) { // Infinite or NaN 906 outE = 0xff; 907 if (outM != 0) { // SNaNs are quieted 908 outM |= FP32_QNAN_MASK; 909 } 910 } else { 911 outE = e - FP16_EXPONENT_BIAS + FP32_EXPONENT_BIAS; 912 } 913 } 914 915 int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; 916 return Float.intBitsToFloat(out); 917 } 918 919 /** 920 * <p>Converts the specified single-precision float value into a 921 * half-precision float value. The following special cases are handled:</p> 922 * <ul> 923 * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned 924 * value is {@link #NaN}</li> 925 * <li>If the input is {@link Float#POSITIVE_INFINITY} or 926 * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively 927 * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li> 928 * <li>If the input is 0 (positive or negative), the returned value is 929 * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 930 * <li>If the input is a less than {@link #MIN_VALUE}, the returned value 931 * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 932 * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value 933 * is a denorm half-precision float</li> 934 * <li>Otherwise, the returned value is rounded to the nearest 935 * representable half-precision float value</li> 936 * </ul> 937 * 938 * @param f The single-precision float value to convert to half-precision 939 * @return A half-precision float value 940 */ 941 @SuppressWarnings("StatementWithEmptyBody") toHalf(float f)942 public static @HalfFloat short toHalf(float f) { 943 int bits = Float.floatToRawIntBits(f); 944 int s = (bits >>> FP32_SIGN_SHIFT ); 945 int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_EXPONENT_MASK; 946 int m = (bits ) & FP32_SIGNIFICAND_MASK; 947 948 int outE = 0; 949 int outM = 0; 950 951 if (e == 0xff) { // Infinite or NaN 952 outE = 0x1f; 953 outM = m != 0 ? 0x200 : 0; 954 } else { 955 e = e - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS; 956 if (e >= 0x1f) { // Overflow 957 outE = 0x31; 958 } else if (e <= 0) { // Underflow 959 if (e < -10) { 960 // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 961 } else { 962 // The fp32 value is a normalized float less than MIN_NORMAL, 963 // we convert to a denorm fp16 964 m = (m | 0x800000) >> (1 - e); 965 if ((m & 0x1000) != 0) m += 0x2000; 966 outM = m >> 13; 967 } 968 } else { 969 outE = e; 970 outM = m >> 13; 971 if ((m & 0x1000) != 0) { 972 // Round to nearest "0.5" up 973 int out = (outE << FP16_EXPONENT_SHIFT) | outM; 974 out++; 975 return (short) (out | (s << FP16_SIGN_SHIFT)); 976 } 977 } 978 } 979 980 return (short) ((s << FP16_SIGN_SHIFT) | (outE << FP16_EXPONENT_SHIFT) | outM); 981 } 982 983 /** 984 * Returns a {@code Half} instance representing the specified 985 * half-precision float value. 986 * 987 * @param h A half-precision float value 988 * @return a {@code Half} instance representing {@code h} 989 */ valueOf(@alfFloat short h)990 public static @NonNull Half valueOf(@HalfFloat short h) { 991 return new Half(h); 992 } 993 994 /** 995 * Returns a {@code Half} instance representing the specified float value. 996 * 997 * @param f A float value 998 * @return a {@code Half} instance representing {@code f} 999 */ valueOf(float f)1000 public static @NonNull Half valueOf(float f) { 1001 return new Half(f); 1002 } 1003 1004 /** 1005 * Returns a {@code Half} instance representing the specified string value. 1006 * Calling this method is equivalent to calling 1007 * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)} 1008 * for more information on the format of the string representation. 1009 * 1010 * @param s The string to be parsed 1011 * @return a {@code Half} instance representing {@code h} 1012 * @throws NumberFormatException if the string does not contain a parsable 1013 * half-precision float value 1014 */ valueOf(@onNull String s)1015 public static @NonNull Half valueOf(@NonNull String s) { 1016 return new Half(s); 1017 } 1018 1019 /** 1020 * Returns the half-precision float value represented by the specified string. 1021 * Calling this method is equivalent to calling 1022 * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)} 1023 * for more information on the format of the string representation. 1024 * 1025 * @param s The string to be parsed 1026 * @return A half-precision float value represented by the string 1027 * @throws NumberFormatException if the string does not contain a parsable 1028 * half-precision float value 1029 */ parseHalf(@onNull String s)1030 public static @HalfFloat short parseHalf(@NonNull String s) throws NumberFormatException { 1031 return toHalf(Float.parseFloat(s)); 1032 } 1033 1034 /** 1035 * Returns a string representation of the specified half-precision 1036 * float value. Calling this method is equivalent to calling 1037 * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)} 1038 * for more information on the format of the string representation. 1039 * 1040 * @param h A half-precision float value 1041 * @return A string representation of the specified value 1042 */ 1043 @NonNull toString(@alfFloat short h)1044 public static String toString(@HalfFloat short h) { 1045 return Float.toString(toFloat(h)); 1046 } 1047 1048 /** 1049 * <p>Returns a hexadecimal string representation of the specified half-precision 1050 * float value. If the value is a NaN, the result is <code>"NaN"</code>, 1051 * otherwise the result follows this format:</p> 1052 * <ul> 1053 * <li>If the sign is positive, no sign character appears in the result</li> 1054 * <li>If the sign is negative, the first character is <code>'-'</code></li> 1055 * <li>If the value is inifinity, the string is <code>"Infinity"</code></li> 1056 * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li> 1057 * <li>If the value has a normalized representation, the exponent and 1058 * significand are represented in the string in two fields. The significand 1059 * starts with <code>"0x1."</code> followed by its lowercase hexadecimal 1060 * representation. Trailing zeroes are removed unless all digits are 0, then 1061 * a single zero is used. The significand representation is followed by the 1062 * exponent, represented by <code>"p"</code>, itself followed by a decimal 1063 * string of the unbiased exponent</li> 1064 * <li>If the value has a subnormal representation, the significand starts 1065 * with <code>"0x0."</code> followed by its lowercase hexadecimal 1066 * representation. Trailing zeroes are removed unless all digits are 0, then 1067 * a single zero is used. The significand representation is followed by the 1068 * exponent, represented by <code>"p-14"</code></li> 1069 * </ul> 1070 * 1071 * @param h A half-precision float value 1072 * @return A hexadecimal string representation of the specified value 1073 */ 1074 @NonNull toHexString(@alfFloat short h)1075 public static String toHexString(@HalfFloat short h) { 1076 StringBuilder o = new StringBuilder(); 1077 1078 int bits = h & 0xffff; 1079 int s = (bits >>> FP16_SIGN_SHIFT ); 1080 int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK; 1081 int m = (bits ) & FP16_SIGNIFICAND_MASK; 1082 1083 if (e == 0x1f) { // Infinite or NaN 1084 if (m == 0) { 1085 if (s != 0) o.append('-'); 1086 o.append("Infinity"); 1087 } else { 1088 o.append("NaN"); 1089 } 1090 } else { 1091 if (s == 1) o.append('-'); 1092 if (e == 0) { 1093 if (m == 0) { 1094 o.append("0x0.0p0"); 1095 } else { 1096 o.append("0x0."); 1097 String significand = Integer.toHexString(m); 1098 o.append(significand.replaceFirst("0{2,}$", "")); 1099 o.append("p-14"); 1100 } 1101 } else { 1102 o.append("0x1."); 1103 String significand = Integer.toHexString(m); 1104 o.append(significand.replaceFirst("0{2,}$", "")); 1105 o.append('p'); 1106 o.append(Integer.toString(e - FP16_EXPONENT_BIAS)); 1107 } 1108 } 1109 1110 return o.toString(); 1111 } 1112 } 1113