1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.util; 18 19 import android.annotation.HalfFloat; 20 import android.annotation.NonNull; 21 import android.annotation.Nullable; 22 23 import sun.misc.FloatingDecimal; 24 25 /** 26 * <p>The {@code Half} class is a wrapper and a utility class to manipulate half-precision 16-bit 27 * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a> 28 * floating point data types (also called fp16 or binary16). A half-precision float can be 29 * created from or converted to single-precision floats, and is stored in a short data type. 30 * To distinguish short values holding half-precision floats from regular short values, 31 * it is recommended to use the <code>@HalfFloat</code> annotation.</p> 32 * 33 * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p> 34 * <ul> 35 * <li>Sign bit: 1 bit</li> 36 * <li>Exponent width: 5 bits</li> 37 * <li>Significand: 10 bits</li> 38 * </ul> 39 * 40 * <p>The format is laid out as follows:</p> 41 * <pre> 42 * 1 11111 1111111111 43 * ^ --^-- -----^---- 44 * sign | |_______ significand 45 * | 46 * -- exponent 47 * </pre> 48 * 49 * <p>Half-precision floating points can be useful to save memory and/or 50 * bandwidth at the expense of range and precision when compared to single-precision 51 * floating points (fp32).</p> 52 * <p>To help you decide whether fp16 is the right storage type for you need, please 53 * refer to the table below that shows the available precision throughout the range of 54 * possible values. The <em>precision</em> column indicates the step size between two 55 * consecutive numbers in a specific part of the range.</p> 56 * 57 * <table summary="Precision of fp16 across the range"> 58 * <tr><th>Range start</th><th>Precision</th></tr> 59 * <tr><td>0</td><td>1 ⁄ 16,777,216</td></tr> 60 * <tr><td>1 ⁄ 16,384</td><td>1 ⁄ 16,777,216</td></tr> 61 * <tr><td>1 ⁄ 8,192</td><td>1 ⁄ 8,388,608</td></tr> 62 * <tr><td>1 ⁄ 4,096</td><td>1 ⁄ 4,194,304</td></tr> 63 * <tr><td>1 ⁄ 2,048</td><td>1 ⁄ 2,097,152</td></tr> 64 * <tr><td>1 ⁄ 1,024</td><td>1 ⁄ 1,048,576</td></tr> 65 * <tr><td>1 ⁄ 512</td><td>1 ⁄ 524,288</td></tr> 66 * <tr><td>1 ⁄ 256</td><td>1 ⁄ 262,144</td></tr> 67 * <tr><td>1 ⁄ 128</td><td>1 ⁄ 131,072</td></tr> 68 * <tr><td>1 ⁄ 64</td><td>1 ⁄ 65,536</td></tr> 69 * <tr><td>1 ⁄ 32</td><td>1 ⁄ 32,768</td></tr> 70 * <tr><td>1 ⁄ 16</td><td>1 ⁄ 16,384</td></tr> 71 * <tr><td>1 ⁄ 8</td><td>1 ⁄ 8,192</td></tr> 72 * <tr><td>1 ⁄ 4</td><td>1 ⁄ 4,096</td></tr> 73 * <tr><td>1 ⁄ 2</td><td>1 ⁄ 2,048</td></tr> 74 * <tr><td>1</td><td>1 ⁄ 1,024</td></tr> 75 * <tr><td>2</td><td>1 ⁄ 512</td></tr> 76 * <tr><td>4</td><td>1 ⁄ 256</td></tr> 77 * <tr><td>8</td><td>1 ⁄ 128</td></tr> 78 * <tr><td>16</td><td>1 ⁄ 64</td></tr> 79 * <tr><td>32</td><td>1 ⁄ 32</td></tr> 80 * <tr><td>64</td><td>1 ⁄ 16</td></tr> 81 * <tr><td>128</td><td>1 ⁄ 8</td></tr> 82 * <tr><td>256</td><td>1 ⁄ 4</td></tr> 83 * <tr><td>512</td><td>1 ⁄ 2</td></tr> 84 * <tr><td>1,024</td><td>1</td></tr> 85 * <tr><td>2,048</td><td>2</td></tr> 86 * <tr><td>4,096</td><td>4</td></tr> 87 * <tr><td>8,192</td><td>8</td></tr> 88 * <tr><td>16,384</td><td>16</td></tr> 89 * <tr><td>32,768</td><td>32</td></tr> 90 * </table> 91 * 92 * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p> 93 */ 94 @SuppressWarnings("SimplifiableIfStatement") 95 public final class Half extends Number implements Comparable<Half> { 96 /** 97 * The number of bits used to represent a half-precision float value. 98 */ 99 public static final int SIZE = 16; 100 101 /** 102 * Epsilon is the difference between 1.0 and the next value representable 103 * by a half-precision floating-point. 104 */ 105 public static final @HalfFloat short EPSILON = (short) 0x1400; 106 107 /** 108 * Maximum exponent a finite half-precision float may have. 109 */ 110 public static final int MAX_EXPONENT = 15; 111 /** 112 * Minimum exponent a normalized half-precision float may have. 113 */ 114 public static final int MIN_EXPONENT = -14; 115 116 /** 117 * Smallest negative value a half-precision float may have. 118 */ 119 public static final @HalfFloat short LOWEST_VALUE = (short) 0xfbff; 120 /** 121 * Maximum positive finite value a half-precision float may have. 122 */ 123 public static final @HalfFloat short MAX_VALUE = (short) 0x7bff; 124 /** 125 * Smallest positive normal value a half-precision float may have. 126 */ 127 public static final @HalfFloat short MIN_NORMAL = (short) 0x0400; 128 /** 129 * Smallest positive non-zero value a half-precision float may have. 130 */ 131 public static final @HalfFloat short MIN_VALUE = (short) 0x0001; 132 /** 133 * A Not-a-Number representation of a half-precision float. 134 */ 135 public static final @HalfFloat short NaN = (short) 0x7e00; 136 /** 137 * Negative infinity of type half-precision float. 138 */ 139 public static final @HalfFloat short NEGATIVE_INFINITY = (short) 0xfc00; 140 /** 141 * Negative 0 of type half-precision float. 142 */ 143 public static final @HalfFloat short NEGATIVE_ZERO = (short) 0x8000; 144 /** 145 * Positive infinity of type half-precision float. 146 */ 147 public static final @HalfFloat short POSITIVE_INFINITY = (short) 0x7c00; 148 /** 149 * Positive 0 of type half-precision float. 150 */ 151 public static final @HalfFloat short POSITIVE_ZERO = (short) 0x0000; 152 153 private static final int FP16_SIGN_SHIFT = 15; 154 private static final int FP16_SIGN_MASK = 0x8000; 155 private static final int FP16_EXPONENT_SHIFT = 10; 156 private static final int FP16_EXPONENT_MASK = 0x1f; 157 private static final int FP16_SIGNIFICAND_MASK = 0x3ff; 158 private static final int FP16_EXPONENT_BIAS = 15; 159 private static final int FP16_COMBINED = 0x7fff; 160 private static final int FP16_EXPONENT_MAX = 0x7c00; 161 162 private static final int FP32_SIGN_SHIFT = 31; 163 private static final int FP32_EXPONENT_SHIFT = 23; 164 private static final int FP32_EXPONENT_MASK = 0xff; 165 private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; 166 private static final int FP32_EXPONENT_BIAS = 127; 167 168 private static final int FP32_DENORMAL_MAGIC = 126 << 23; 169 private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); 170 171 private final @HalfFloat short mValue; 172 173 /** 174 * Constructs a newly allocated {@code Half} object that represents the 175 * half-precision float type argument. 176 * 177 * @param value The value to be represented by the {@code Half} 178 */ Half(@alfFloat short value)179 public Half(@HalfFloat short value) { 180 mValue = value; 181 } 182 183 /** 184 * Constructs a newly allocated {@code Half} object that represents the 185 * argument converted to a half-precision float. 186 * 187 * @param value The value to be represented by the {@code Half} 188 * 189 * @see #toHalf(float) 190 */ Half(float value)191 public Half(float value) { 192 mValue = toHalf(value); 193 } 194 195 /** 196 * Constructs a newly allocated {@code Half} object that 197 * represents the argument converted to a half-precision float. 198 * 199 * @param value The value to be represented by the {@code Half} 200 * 201 * @see #toHalf(float) 202 */ Half(double value)203 public Half(double value) { 204 mValue = toHalf((float) value); 205 } 206 207 /** 208 * <p>Constructs a newly allocated {@code Half} object that represents the 209 * half-precision float value represented by the string. 210 * The string is converted to a half-precision float value as if by the 211 * {@link #valueOf(String)} method.</p> 212 * 213 * <p>Calling this constructor is equivalent to calling:</p> 214 * <pre> 215 * new Half(Float.parseFloat(value)) 216 * </pre> 217 * 218 * @param value A string to be converted to a {@code Half} 219 * @throws NumberFormatException if the string does not contain a parsable number 220 * 221 * @see Float#valueOf(java.lang.String) 222 * @see #toHalf(float) 223 */ Half(@onNull String value)224 public Half(@NonNull String value) throws NumberFormatException { 225 mValue = toHalf(Float.parseFloat(value)); 226 } 227 228 /** 229 * Returns the half-precision value of this {@code Half} as a {@code short} 230 * containing the bit representation described in {@link Half}. 231 * 232 * @return The half-precision float value represented by this object 233 */ halfValue()234 public @HalfFloat short halfValue() { 235 return mValue; 236 } 237 238 /** 239 * Returns the value of this {@code Half} as a {@code byte} after 240 * a narrowing primitive conversion. 241 * 242 * @return The half-precision float value represented by this object 243 * converted to type {@code byte} 244 */ 245 @Override byteValue()246 public byte byteValue() { 247 return (byte) toFloat(mValue); 248 } 249 250 /** 251 * Returns the value of this {@code Half} as a {@code short} after 252 * a narrowing primitive conversion. 253 * 254 * @return The half-precision float value represented by this object 255 * converted to type {@code short} 256 */ 257 @Override shortValue()258 public short shortValue() { 259 return (short) toFloat(mValue); 260 } 261 262 /** 263 * Returns the value of this {@code Half} as a {@code int} after 264 * a narrowing primitive conversion. 265 * 266 * @return The half-precision float value represented by this object 267 * converted to type {@code int} 268 */ 269 @Override intValue()270 public int intValue() { 271 return (int) toFloat(mValue); 272 } 273 274 /** 275 * Returns the value of this {@code Half} as a {@code long} after 276 * a narrowing primitive conversion. 277 * 278 * @return The half-precision float value represented by this object 279 * converted to type {@code long} 280 */ 281 @Override longValue()282 public long longValue() { 283 return (long) toFloat(mValue); 284 } 285 286 /** 287 * Returns the value of this {@code Half} as a {@code float} after 288 * a widening primitive conversion. 289 * 290 * @return The half-precision float value represented by this object 291 * converted to type {@code float} 292 */ 293 @Override floatValue()294 public float floatValue() { 295 return toFloat(mValue); 296 } 297 298 /** 299 * Returns the value of this {@code Half} as a {@code double} after 300 * a widening primitive conversion. 301 * 302 * @return The half-precision float value represented by this object 303 * converted to type {@code double} 304 */ 305 @Override doubleValue()306 public double doubleValue() { 307 return toFloat(mValue); 308 } 309 310 /** 311 * Returns true if this {@code Half} value represents a Not-a-Number, 312 * false otherwise. 313 * 314 * @return True if the value is a NaN, false otherwise 315 */ isNaN()316 public boolean isNaN() { 317 return isNaN(mValue); 318 } 319 320 /** 321 * Compares this object against the specified object. The result is {@code true} 322 * if and only if the argument is not {@code null} and is a {@code Half} object 323 * that represents the same half-precision value as the this object. Two 324 * half-precision values are considered to be the same if and only if the method 325 * {@link #halfToIntBits(short)} returns an identical {@code int} value for both. 326 * 327 * @param o The object to compare 328 * @return True if the objects are the same, false otherwise 329 * 330 * @see #halfToIntBits(short) 331 */ 332 @Override equals(@ullable Object o)333 public boolean equals(@Nullable Object o) { 334 return (o instanceof Half) && 335 (halfToIntBits(((Half) o).mValue) == halfToIntBits(mValue)); 336 } 337 338 /** 339 * Returns a hash code for this {@code Half} object. The result is the 340 * integer bit representation, exactly as produced by the method 341 * {@link #halfToIntBits(short)}, of the primitive half-precision float 342 * value represented by this {@code Half} object. 343 * 344 * @return A hash code value for this object 345 */ 346 @Override hashCode()347 public int hashCode() { 348 return hashCode(mValue); 349 } 350 351 /** 352 * Returns a string representation of the specified half-precision 353 * float value. See {@link #toString(short)} for more information. 354 * 355 * @return A string representation of this {@code Half} object 356 */ 357 @NonNull 358 @Override toString()359 public String toString() { 360 return toString(mValue); 361 } 362 363 /** 364 * <p>Compares the two specified half-precision float values. The following 365 * conditions apply during the comparison:</p> 366 * 367 * <ul> 368 * <li>{@link #NaN} is considered by this method to be equal to itself and greater 369 * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li> 370 * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than 371 * {@link #NEGATIVE_ZERO}.</li> 372 * </ul> 373 * 374 * @param h The half-precision float value to compare to the half-precision value 375 * represented by this {@code Half} object 376 * 377 * @return The value {@code 0} if {@code x} is numerically equal to {@code y}; a 378 * value less than {@code 0} if {@code x} is numerically less than {@code y}; 379 * and a value greater than {@code 0} if {@code x} is numerically greater 380 * than {@code y} 381 */ 382 @Override compareTo(@onNull Half h)383 public int compareTo(@NonNull Half h) { 384 return compare(mValue, h.mValue); 385 } 386 387 /** 388 * Returns a hash code for a half-precision float value. 389 * 390 * @param h The value to hash 391 * 392 * @return A hash code value for a half-precision float value 393 */ hashCode(@alfFloat short h)394 public static int hashCode(@HalfFloat short h) { 395 return halfToIntBits(h); 396 } 397 398 /** 399 * <p>Compares the two specified half-precision float values. The following 400 * conditions apply during the comparison:</p> 401 * 402 * <ul> 403 * <li>{@link #NaN} is considered by this method to be equal to itself and greater 404 * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li> 405 * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than 406 * {@link #NEGATIVE_ZERO}.</li> 407 * </ul> 408 * 409 * @param x The first half-precision float value to compare. 410 * @param y The second half-precision float value to compare 411 * 412 * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a 413 * value less than {@code 0} if {@code x} is numerically less than {@code y}, 414 * and a value greater than {@code 0} if {@code x} is numerically greater 415 * than {@code y} 416 */ compare(@alfFloat short x, @HalfFloat short y)417 public static int compare(@HalfFloat short x, @HalfFloat short y) { 418 if (less(x, y)) return -1; 419 if (greater(x, y)) return 1; 420 421 // Collapse NaNs, akin to halfToIntBits(), but we want to keep 422 // (signed) short value types to preserve the ordering of -0.0 423 // and +0.0 424 short xBits = (x & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : x; 425 short yBits = (y & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : y; 426 427 return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); 428 } 429 430 /** 431 * <p>Returns a representation of the specified half-precision float value 432 * according to the bit layout described in {@link Half}.</p> 433 * 434 * <p>Similar to {@link #halfToIntBits(short)}, this method collapses all 435 * possible Not-a-Number values to a single canonical Not-a-Number value 436 * defined by {@link #NaN}.</p> 437 * 438 * @param h A half-precision float value 439 * @return The bits that represent the half-precision float value 440 * 441 * @see #halfToIntBits(short) 442 */ halfToShortBits(@alfFloat short h)443 public static @HalfFloat short halfToShortBits(@HalfFloat short h) { 444 return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h; 445 } 446 447 /** 448 * <p>Returns a representation of the specified half-precision float value 449 * according to the bit layout described in {@link Half}.</p> 450 * 451 * <p>Unlike {@link #halfToRawIntBits(short)}, this method collapses all 452 * possible Not-a-Number values to a single canonical Not-a-Number value 453 * defined by {@link #NaN}.</p> 454 * 455 * @param h A half-precision float value 456 * @return The bits that represent the half-precision float value 457 * 458 * @see #halfToRawIntBits(short) 459 * @see #halfToShortBits(short) 460 * @see #intBitsToHalf(int) 461 */ halfToIntBits(@alfFloat short h)462 public static int halfToIntBits(@HalfFloat short h) { 463 return (h & FP16_COMBINED) > FP16_EXPONENT_MAX ? NaN : h & 0xffff; 464 } 465 466 /** 467 * <p>Returns a representation of the specified half-precision float value 468 * according to the bit layout described in {@link Half}.</p> 469 * 470 * <p>The argument is considered to be a representation of a half-precision 471 * float value according to the bit layout described in {@link Half}. The 16 472 * most significant bits of the returned value are set to 0.</p> 473 * 474 * @param h A half-precision float value 475 * @return The bits that represent the half-precision float value 476 * 477 * @see #halfToIntBits(short) 478 * @see #intBitsToHalf(int) 479 */ halfToRawIntBits(@alfFloat short h)480 public static int halfToRawIntBits(@HalfFloat short h) { 481 return h & 0xffff; 482 } 483 484 /** 485 * <p>Returns the half-precision float value corresponding to a given 486 * bit representation.</p> 487 * 488 * <p>The argument is considered to be a representation of a half-precision 489 * float value according to the bit layout described in {@link Half}. The 16 490 * most significant bits of the argument are ignored.</p> 491 * 492 * @param bits An integer 493 * @return The half-precision float value with the same bit pattern 494 */ intBitsToHalf(int bits)495 public static @HalfFloat short intBitsToHalf(int bits) { 496 return (short) (bits & 0xffff); 497 } 498 499 /** 500 * Returns the first parameter with the sign of the second parameter. 501 * This method treats NaNs as having a sign. 502 * 503 * @param magnitude A half-precision float value providing the magnitude of the result 504 * @param sign A half-precision float value providing the sign of the result 505 * @return A value with the magnitude of the first parameter and the sign 506 * of the second parameter 507 */ copySign(@alfFloat short magnitude, @HalfFloat short sign)508 public static @HalfFloat short copySign(@HalfFloat short magnitude, @HalfFloat short sign) { 509 return (short) ((sign & FP16_SIGN_MASK) | (magnitude & FP16_COMBINED)); 510 } 511 512 /** 513 * Returns the absolute value of the specified half-precision float. 514 * Special values are handled in the following ways: 515 * <ul> 516 * <li>If the specified half-precision float is NaN, the result is NaN</li> 517 * <li>If the specified half-precision float is zero (negative or positive), 518 * the result is positive zero (see {@link #POSITIVE_ZERO})</li> 519 * <li>If the specified half-precision float is infinity (negative or positive), 520 * the result is positive infinity (see {@link #POSITIVE_INFINITY})</li> 521 * </ul> 522 * 523 * @param h A half-precision float value 524 * @return The absolute value of the specified half-precision float 525 */ abs(@alfFloat short h)526 public static @HalfFloat short abs(@HalfFloat short h) { 527 return (short) (h & FP16_COMBINED); 528 } 529 530 /** 531 * Returns the closest integral half-precision float value to the specified 532 * half-precision float value. Special values are handled in the 533 * following ways: 534 * <ul> 535 * <li>If the specified half-precision float is NaN, the result is NaN</li> 536 * <li>If the specified half-precision float is infinity (negative or positive), 537 * the result is infinity (with the same sign)</li> 538 * <li>If the specified half-precision float is zero (negative or positive), 539 * the result is zero (with the same sign)</li> 540 * </ul> 541 * 542 * @param h A half-precision float value 543 * @return The value of the specified half-precision float rounded to the nearest 544 * half-precision float value 545 */ round(@alfFloat short h)546 public static @HalfFloat short round(@HalfFloat short h) { 547 int bits = h & 0xffff; 548 int e = bits & 0x7fff; 549 int result = bits; 550 551 if (e < 0x3c00) { 552 result &= FP16_SIGN_MASK; 553 result |= (0x3c00 & (e >= 0x3800 ? 0xffff : 0x0)); 554 } else if (e < 0x6400) { 555 e = 25 - (e >> 10); 556 int mask = (1 << e) - 1; 557 result += (1 << (e - 1)); 558 result &= ~mask; 559 } 560 561 return (short) result; 562 } 563 564 /** 565 * Returns the smallest half-precision float value toward negative infinity 566 * greater than or equal to the specified half-precision float value. 567 * Special values are handled in the following ways: 568 * <ul> 569 * <li>If the specified half-precision float is NaN, the result is NaN</li> 570 * <li>If the specified half-precision float is infinity (negative or positive), 571 * the result is infinity (with the same sign)</li> 572 * <li>If the specified half-precision float is zero (negative or positive), 573 * the result is zero (with the same sign)</li> 574 * </ul> 575 * 576 * @param h A half-precision float value 577 * @return The smallest half-precision float value toward negative infinity 578 * greater than or equal to the specified half-precision float value 579 */ ceil(@alfFloat short h)580 public static @HalfFloat short ceil(@HalfFloat short h) { 581 int bits = h & 0xffff; 582 int e = bits & 0x7fff; 583 int result = bits; 584 585 if (e < 0x3c00) { 586 result &= FP16_SIGN_MASK; 587 result |= 0x3c00 & -(~(bits >> 15) & (e != 0 ? 1 : 0)); 588 } else if (e < 0x6400) { 589 e = 25 - (e >> 10); 590 int mask = (1 << e) - 1; 591 result += mask & ((bits >> 15) - 1); 592 result &= ~mask; 593 } 594 595 return (short) result; 596 } 597 598 /** 599 * Returns the largest half-precision float value toward positive infinity 600 * less than or equal to the specified half-precision float value. 601 * Special values are handled in the following ways: 602 * <ul> 603 * <li>If the specified half-precision float is NaN, the result is NaN</li> 604 * <li>If the specified half-precision float is infinity (negative or positive), 605 * the result is infinity (with the same sign)</li> 606 * <li>If the specified half-precision float is zero (negative or positive), 607 * the result is zero (with the same sign)</li> 608 * </ul> 609 * 610 * @param h A half-precision float value 611 * @return The largest half-precision float value toward positive infinity 612 * less than or equal to the specified half-precision float value 613 */ floor(@alfFloat short h)614 public static @HalfFloat short floor(@HalfFloat short h) { 615 int bits = h & 0xffff; 616 int e = bits & 0x7fff; 617 int result = bits; 618 619 if (e < 0x3c00) { 620 result &= FP16_SIGN_MASK; 621 result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0); 622 } else if (e < 0x6400) { 623 e = 25 - (e >> 10); 624 int mask = (1 << e) - 1; 625 result += mask & -(bits >> 15); 626 result &= ~mask; 627 } 628 629 return (short) result; 630 } 631 632 /** 633 * Returns the truncated half-precision float value of the specified 634 * half-precision float value. Special values are handled in the following ways: 635 * <ul> 636 * <li>If the specified half-precision float is NaN, the result is NaN</li> 637 * <li>If the specified half-precision float is infinity (negative or positive), 638 * the result is infinity (with the same sign)</li> 639 * <li>If the specified half-precision float is zero (negative or positive), 640 * the result is zero (with the same sign)</li> 641 * </ul> 642 * 643 * @param h A half-precision float value 644 * @return The truncated half-precision float value of the specified 645 * half-precision float value 646 */ trunc(@alfFloat short h)647 public static @HalfFloat short trunc(@HalfFloat short h) { 648 int bits = h & 0xffff; 649 int e = bits & 0x7fff; 650 int result = bits; 651 652 if (e < 0x3c00) { 653 result &= FP16_SIGN_MASK; 654 } else if (e < 0x6400) { 655 e = 25 - (e >> 10); 656 int mask = (1 << e) - 1; 657 result &= ~mask; 658 } 659 660 return (short) result; 661 } 662 663 /** 664 * Returns the smaller of two half-precision float values (the value closest 665 * to negative infinity). Special values are handled in the following ways: 666 * <ul> 667 * <li>If either value is NaN, the result is NaN</li> 668 * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li> 669 * </ul> 670 * 671 * @param x The first half-precision value 672 * @param y The second half-precision value 673 * @return The smaller of the two specified half-precision values 674 */ min(@alfFloat short x, @HalfFloat short y)675 public static @HalfFloat short min(@HalfFloat short x, @HalfFloat short y) { 676 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 677 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 678 679 if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) { 680 return (x & FP16_SIGN_MASK) != 0 ? x : y; 681 } 682 683 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 684 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 685 } 686 687 /** 688 * Returns the larger of two half-precision float values (the value closest 689 * to positive infinity). Special values are handled in the following ways: 690 * <ul> 691 * <li>If either value is NaN, the result is NaN</li> 692 * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li> 693 * </ul> 694 * 695 * @param x The first half-precision value 696 * @param y The second half-precision value 697 * 698 * @return The larger of the two specified half-precision values 699 */ max(@alfFloat short x, @HalfFloat short y)700 public static @HalfFloat short max(@HalfFloat short x, @HalfFloat short y) { 701 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 702 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return NaN; 703 704 if ((x & FP16_COMBINED) == 0 && (y & FP16_COMBINED) == 0) { 705 return (x & FP16_SIGN_MASK) != 0 ? y : x; 706 } 707 708 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 709 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 710 } 711 712 /** 713 * Returns true if the first half-precision float value is less (smaller 714 * toward negative infinity) than the second half-precision float value. 715 * If either of the values is NaN, the result is false. 716 * 717 * @param x The first half-precision value 718 * @param y The second half-precision value 719 * 720 * @return True if x is less than y, false otherwise 721 */ less(@alfFloat short x, @HalfFloat short y)722 public static boolean less(@HalfFloat short x, @HalfFloat short y) { 723 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 724 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 725 726 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 727 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 728 } 729 730 /** 731 * Returns true if the first half-precision float value is less (smaller 732 * toward negative infinity) than or equal to the second half-precision 733 * float value. If either of the values is NaN, the result is false. 734 * 735 * @param x The first half-precision value 736 * @param y The second half-precision value 737 * 738 * @return True if x is less than or equal to y, false otherwise 739 */ lessEquals(@alfFloat short x, @HalfFloat short y)740 public static boolean lessEquals(@HalfFloat short x, @HalfFloat short y) { 741 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 742 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 743 744 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <= 745 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 746 } 747 748 /** 749 * Returns true if the first half-precision float value is greater (larger 750 * toward positive infinity) than the second half-precision float value. 751 * If either of the values is NaN, the result is false. 752 * 753 * @param x The first half-precision value 754 * @param y The second half-precision value 755 * 756 * @return True if x is greater than y, false otherwise 757 */ greater(@alfFloat short x, @HalfFloat short y)758 public static boolean greater(@HalfFloat short x, @HalfFloat short y) { 759 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 760 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 761 762 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 763 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 764 } 765 766 /** 767 * Returns true if the first half-precision float value is greater (larger 768 * toward positive infinity) than or equal to the second half-precision float 769 * value. If either of the values is NaN, the result is false. 770 * 771 * @param x The first half-precision value 772 * @param y The second half-precision value 773 * 774 * @return True if x is greater than y, false otherwise 775 */ greaterEquals(@alfFloat short x, @HalfFloat short y)776 public static boolean greaterEquals(@HalfFloat short x, @HalfFloat short y) { 777 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 778 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 779 780 return ((x & FP16_SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >= 781 ((y & FP16_SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 782 } 783 784 /** 785 * Returns true if the two half-precision float values are equal. 786 * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO} 787 * and {@link #NEGATIVE_ZERO} are considered equal. 788 * 789 * @param x The first half-precision value 790 * @param y The second half-precision value 791 * 792 * @return True if x is equal to y, false otherwise 793 */ equals(@alfFloat short x, @HalfFloat short y)794 public static boolean equals(@HalfFloat short x, @HalfFloat short y) { 795 if ((x & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 796 if ((y & FP16_COMBINED) > FP16_EXPONENT_MAX) return false; 797 798 return x == y || ((x | y) & FP16_COMBINED) == 0; 799 } 800 801 /** 802 * Returns the sign of the specified half-precision float. 803 * 804 * @param h A half-precision float value 805 * @return 1 if the value is positive, -1 if the value is negative 806 */ getSign(@alfFloat short h)807 public static int getSign(@HalfFloat short h) { 808 return (h & FP16_SIGN_MASK) == 0 ? 1 : -1; 809 } 810 811 /** 812 * Returns the unbiased exponent used in the representation of 813 * the specified half-precision float value. if the value is NaN 814 * or infinite, this* method returns {@link #MAX_EXPONENT} + 1. 815 * If the argument is 0 or a subnormal representation, this method 816 * returns {@link #MIN_EXPONENT} - 1. 817 * 818 * @param h A half-precision float value 819 * @return The unbiased exponent of the specified value 820 */ getExponent(@alfFloat short h)821 public static int getExponent(@HalfFloat short h) { 822 return ((h >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK) - FP16_EXPONENT_BIAS; 823 } 824 825 /** 826 * Returns the significand, or mantissa, used in the representation 827 * of the specified half-precision float value. 828 * 829 * @param h A half-precision float value 830 * @return The significand, or significand, of the specified vlaue 831 */ getSignificand(@alfFloat short h)832 public static int getSignificand(@HalfFloat short h) { 833 return h & FP16_SIGNIFICAND_MASK; 834 } 835 836 /** 837 * Returns true if the specified half-precision float value represents 838 * infinity, false otherwise. 839 * 840 * @param h A half-precision float value 841 * @return True if the value is positive infinity or negative infinity, 842 * false otherwise 843 */ isInfinite(@alfFloat short h)844 public static boolean isInfinite(@HalfFloat short h) { 845 return (h & FP16_COMBINED) == FP16_EXPONENT_MAX; 846 } 847 848 /** 849 * Returns true if the specified half-precision float value represents 850 * a Not-a-Number, false otherwise. 851 * 852 * @param h A half-precision float value 853 * @return True if the value is a NaN, false otherwise 854 */ isNaN(@alfFloat short h)855 public static boolean isNaN(@HalfFloat short h) { 856 return (h & FP16_COMBINED) > FP16_EXPONENT_MAX; 857 } 858 859 /** 860 * Returns true if the specified half-precision float value is normalized 861 * (does not have a subnormal representation). If the specified value is 862 * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY}, 863 * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal 864 * number, this method returns false. 865 * 866 * @param h A half-precision float value 867 * @return True if the value is normalized, false otherwise 868 */ isNormalized(@alfFloat short h)869 public static boolean isNormalized(@HalfFloat short h) { 870 return (h & FP16_EXPONENT_MAX) != 0 && (h & FP16_EXPONENT_MAX) != FP16_EXPONENT_MAX; 871 } 872 873 /** 874 * <p>Converts the specified half-precision float value into a 875 * single-precision float value. The following special cases are handled:</p> 876 * <ul> 877 * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li> 878 * <li>If the input is {@link #POSITIVE_INFINITY} or 879 * {@link #NEGATIVE_INFINITY}, the returned value is respectively 880 * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li> 881 * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li> 882 * <li>Otherwise, the returned value is a normalized single-precision float value</li> 883 * </ul> 884 * 885 * @param h The half-precision float value to convert to single-precision 886 * @return A normalized single-precision float value 887 */ toFloat(@alfFloat short h)888 public static float toFloat(@HalfFloat short h) { 889 int bits = h & 0xffff; 890 int s = bits & FP16_SIGN_MASK; 891 int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK; 892 int m = (bits ) & FP16_SIGNIFICAND_MASK; 893 894 int outE = 0; 895 int outM = 0; 896 897 if (e == 0) { // Denormal or 0 898 if (m != 0) { 899 // Convert denorm fp16 into normalized fp32 900 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); 901 o -= FP32_DENORMAL_FLOAT; 902 return s == 0 ? o : -o; 903 } 904 } else { 905 outM = m << 13; 906 if (e == 0x1f) { // Infinite or NaN 907 outE = 0xff; 908 } else { 909 outE = e - FP16_EXPONENT_BIAS + FP32_EXPONENT_BIAS; 910 } 911 } 912 913 int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; 914 return Float.intBitsToFloat(out); 915 } 916 917 /** 918 * <p>Converts the specified single-precision float value into a 919 * half-precision float value. The following special cases are handled:</p> 920 * <ul> 921 * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned 922 * value is {@link #NaN}</li> 923 * <li>If the input is {@link Float#POSITIVE_INFINITY} or 924 * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively 925 * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li> 926 * <li>If the input is 0 (positive or negative), the returned value is 927 * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 928 * <li>If the input is a less than {@link #MIN_VALUE}, the returned value 929 * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 930 * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value 931 * is a denorm half-precision float</li> 932 * <li>Otherwise, the returned value is rounded to the nearest 933 * representable half-precision float value</li> 934 * </ul> 935 * 936 * @param f The single-precision float value to convert to half-precision 937 * @return A half-precision float value 938 */ 939 @SuppressWarnings("StatementWithEmptyBody") toHalf(float f)940 public static @HalfFloat short toHalf(float f) { 941 int bits = Float.floatToRawIntBits(f); 942 int s = (bits >>> FP32_SIGN_SHIFT ); 943 int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_EXPONENT_MASK; 944 int m = (bits ) & FP32_SIGNIFICAND_MASK; 945 946 int outE = 0; 947 int outM = 0; 948 949 if (e == 0xff) { // Infinite or NaN 950 outE = 0x1f; 951 outM = m != 0 ? 0x200 : 0; 952 } else { 953 e = e - FP32_EXPONENT_BIAS + FP16_EXPONENT_BIAS; 954 if (e >= 0x1f) { // Overflow 955 outE = 0x31; 956 } else if (e <= 0) { // Underflow 957 if (e < -10) { 958 // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 959 } else { 960 // The fp32 value is a normalized float less than MIN_NORMAL, 961 // we convert to a denorm fp16 962 m = (m | 0x800000) >> (1 - e); 963 if ((m & 0x1000) != 0) m += 0x2000; 964 outM = m >> 13; 965 } 966 } else { 967 outE = e; 968 outM = m >> 13; 969 if ((m & 0x1000) != 0) { 970 // Round to nearest "0.5" up 971 int out = (outE << FP16_EXPONENT_SHIFT) | outM; 972 out++; 973 return (short) (out | (s << FP16_SIGN_SHIFT)); 974 } 975 } 976 } 977 978 return (short) ((s << FP16_SIGN_SHIFT) | (outE << FP16_EXPONENT_SHIFT) | outM); 979 } 980 981 /** 982 * Returns a {@code Half} instance representing the specified 983 * half-precision float value. 984 * 985 * @param h A half-precision float value 986 * @return a {@code Half} instance representing {@code h} 987 */ valueOf(@alfFloat short h)988 public static @NonNull Half valueOf(@HalfFloat short h) { 989 return new Half(h); 990 } 991 992 /** 993 * Returns a {@code Half} instance representing the specified float value. 994 * 995 * @param f A float value 996 * @return a {@code Half} instance representing {@code f} 997 */ valueOf(float f)998 public static @NonNull Half valueOf(float f) { 999 return new Half(f); 1000 } 1001 1002 /** 1003 * Returns a {@code Half} instance representing the specified string value. 1004 * Calling this method is equivalent to calling 1005 * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)} 1006 * for more information on the format of the string representation. 1007 * 1008 * @param s The string to be parsed 1009 * @return a {@code Half} instance representing {@code h} 1010 * @throws NumberFormatException if the string does not contain a parsable 1011 * half-precision float value 1012 */ valueOf(@onNull String s)1013 public static @NonNull Half valueOf(@NonNull String s) { 1014 return new Half(s); 1015 } 1016 1017 /** 1018 * Returns the half-precision float value represented by the specified string. 1019 * Calling this method is equivalent to calling 1020 * <code>toHalf(Float.parseString(h))</code>. See {@link Float#valueOf(String)} 1021 * for more information on the format of the string representation. 1022 * 1023 * @param s The string to be parsed 1024 * @return A half-precision float value represented by the string 1025 * @throws NumberFormatException if the string does not contain a parsable 1026 * half-precision float value 1027 */ parseHalf(@onNull String s)1028 public static @HalfFloat short parseHalf(@NonNull String s) throws NumberFormatException { 1029 return toHalf(FloatingDecimal.parseFloat(s)); 1030 } 1031 1032 /** 1033 * Returns a string representation of the specified half-precision 1034 * float value. Calling this method is equivalent to calling 1035 * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)} 1036 * for more information on the format of the string representation. 1037 * 1038 * @param h A half-precision float value 1039 * @return A string representation of the specified value 1040 */ 1041 @NonNull toString(@alfFloat short h)1042 public static String toString(@HalfFloat short h) { 1043 return Float.toString(toFloat(h)); 1044 } 1045 1046 /** 1047 * <p>Returns a hexadecimal string representation of the specified half-precision 1048 * float value. If the value is a NaN, the result is <code>"NaN"</code>, 1049 * otherwise the result follows this format:</p> 1050 * <ul> 1051 * <li>If the sign is positive, no sign character appears in the result</li> 1052 * <li>If the sign is negative, the first character is <code>'-'</code></li> 1053 * <li>If the value is inifinity, the string is <code>"Infinity"</code></li> 1054 * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li> 1055 * <li>If the value has a normalized representation, the exponent and 1056 * significand are represented in the string in two fields. The significand 1057 * starts with <code>"0x1."</code> followed by its lowercase hexadecimal 1058 * representation. Trailing zeroes are removed unless all digits are 0, then 1059 * a single zero is used. The significand representation is followed by the 1060 * exponent, represented by <code>"p"</code>, itself followed by a decimal 1061 * string of the unbiased exponent</li> 1062 * <li>If the value has a subnormal representation, the significand starts 1063 * with <code>"0x0."</code> followed by its lowercase hexadecimal 1064 * representation. Trailing zeroes are removed unless all digits are 0, then 1065 * a single zero is used. The significand representation is followed by the 1066 * exponent, represented by <code>"p-14"</code></li> 1067 * </ul> 1068 * 1069 * @param h A half-precision float value 1070 * @return A hexadecimal string representation of the specified value 1071 */ 1072 @NonNull toHexString(@alfFloat short h)1073 public static String toHexString(@HalfFloat short h) { 1074 StringBuilder o = new StringBuilder(); 1075 1076 int bits = h & 0xffff; 1077 int s = (bits >>> FP16_SIGN_SHIFT ); 1078 int e = (bits >>> FP16_EXPONENT_SHIFT) & FP16_EXPONENT_MASK; 1079 int m = (bits ) & FP16_SIGNIFICAND_MASK; 1080 1081 if (e == 0x1f) { // Infinite or NaN 1082 if (m == 0) { 1083 if (s != 0) o.append('-'); 1084 o.append("Infinity"); 1085 } else { 1086 o.append("NaN"); 1087 } 1088 } else { 1089 if (s == 1) o.append('-'); 1090 if (e == 0) { 1091 if (m == 0) { 1092 o.append("0x0.0p0"); 1093 } else { 1094 o.append("0x0."); 1095 String significand = Integer.toHexString(m); 1096 o.append(significand.replaceFirst("0{2,}$", "")); 1097 o.append("p-14"); 1098 } 1099 } else { 1100 o.append("0x1."); 1101 String significand = Integer.toHexString(m); 1102 o.append(significand.replaceFirst("0{2,}$", "")); 1103 o.append('p'); 1104 o.append(Integer.toString(e - FP16_EXPONENT_BIAS)); 1105 } 1106 } 1107 1108 return o.toString(); 1109 } 1110 } 1111