1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.util; 18 19 /** 20 * <p>The {@code FP16} class is a wrapper and a utility class to manipulate half-precision 16-bit 21 * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a> 22 * floating point data types (also called fp16 or binary16). A half-precision float can be 23 * created from or converted to single-precision floats, and is stored in a short data type. 24 * 25 * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p> 26 * <ul> 27 * <li>Sign bit: 1 bit</li> 28 * <li>Exponent width: 5 bits</li> 29 * <li>Significand: 10 bits</li> 30 * </ul> 31 * 32 * <p>The format is laid out as follows:</p> 33 * <pre> 34 * 1 11111 1111111111 35 * ^ --^-- -----^---- 36 * sign | |_______ significand 37 * | 38 * -- exponent 39 * </pre> 40 * 41 * <p>Half-precision floating points can be useful to save memory and/or 42 * bandwidth at the expense of range and precision when compared to single-precision 43 * floating points (fp32).</p> 44 * <p>To help you decide whether fp16 is the right storage type for you need, please 45 * refer to the table below that shows the available precision throughout the range of 46 * possible values. The <em>precision</em> column indicates the step size between two 47 * consecutive numbers in a specific part of the range.</p> 48 * 49 * <table summary="Precision of fp16 across the range"> 50 * <tr><th>Range start</th><th>Precision</th></tr> 51 * <tr><td>0</td><td>1 ⁄ 16,777,216</td></tr> 52 * <tr><td>1 ⁄ 16,384</td><td>1 ⁄ 16,777,216</td></tr> 53 * <tr><td>1 ⁄ 8,192</td><td>1 ⁄ 8,388,608</td></tr> 54 * <tr><td>1 ⁄ 4,096</td><td>1 ⁄ 4,194,304</td></tr> 55 * <tr><td>1 ⁄ 2,048</td><td>1 ⁄ 2,097,152</td></tr> 56 * <tr><td>1 ⁄ 1,024</td><td>1 ⁄ 1,048,576</td></tr> 57 * <tr><td>1 ⁄ 512</td><td>1 ⁄ 524,288</td></tr> 58 * <tr><td>1 ⁄ 256</td><td>1 ⁄ 262,144</td></tr> 59 * <tr><td>1 ⁄ 128</td><td>1 ⁄ 131,072</td></tr> 60 * <tr><td>1 ⁄ 64</td><td>1 ⁄ 65,536</td></tr> 61 * <tr><td>1 ⁄ 32</td><td>1 ⁄ 32,768</td></tr> 62 * <tr><td>1 ⁄ 16</td><td>1 ⁄ 16,384</td></tr> 63 * <tr><td>1 ⁄ 8</td><td>1 ⁄ 8,192</td></tr> 64 * <tr><td>1 ⁄ 4</td><td>1 ⁄ 4,096</td></tr> 65 * <tr><td>1 ⁄ 2</td><td>1 ⁄ 2,048</td></tr> 66 * <tr><td>1</td><td>1 ⁄ 1,024</td></tr> 67 * <tr><td>2</td><td>1 ⁄ 512</td></tr> 68 * <tr><td>4</td><td>1 ⁄ 256</td></tr> 69 * <tr><td>8</td><td>1 ⁄ 128</td></tr> 70 * <tr><td>16</td><td>1 ⁄ 64</td></tr> 71 * <tr><td>32</td><td>1 ⁄ 32</td></tr> 72 * <tr><td>64</td><td>1 ⁄ 16</td></tr> 73 * <tr><td>128</td><td>1 ⁄ 8</td></tr> 74 * <tr><td>256</td><td>1 ⁄ 4</td></tr> 75 * <tr><td>512</td><td>1 ⁄ 2</td></tr> 76 * <tr><td>1,024</td><td>1</td></tr> 77 * <tr><td>2,048</td><td>2</td></tr> 78 * <tr><td>4,096</td><td>4</td></tr> 79 * <tr><td>8,192</td><td>8</td></tr> 80 * <tr><td>16,384</td><td>16</td></tr> 81 * <tr><td>32,768</td><td>32</td></tr> 82 * </table> 83 * 84 * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p> 85 * 86 * @hide 87 */ 88 89 public final class FP16 { 90 /** 91 * The number of bits used to represent a half-precision float value. 92 * 93 * @hide 94 */ 95 public static final int SIZE = 16; 96 97 /** 98 * Epsilon is the difference between 1.0 and the next value representable 99 * by a half-precision floating-point. 100 * 101 * @hide 102 */ 103 public static final short EPSILON = (short) 0x1400; 104 105 /** 106 * Maximum exponent a finite half-precision float may have. 107 * 108 * @hide 109 */ 110 public static final int MAX_EXPONENT = 15; 111 /** 112 * Minimum exponent a normalized half-precision float may have. 113 * 114 * @hide 115 */ 116 public static final int MIN_EXPONENT = -14; 117 118 /** 119 * Smallest negative value a half-precision float may have. 120 * 121 * @hide 122 */ 123 public static final short LOWEST_VALUE = (short) 0xfbff; 124 /** 125 * Maximum positive finite value a half-precision float may have. 126 * 127 * @hide 128 */ 129 public static final short MAX_VALUE = (short) 0x7bff; 130 /** 131 * Smallest positive normal value a half-precision float may have. 132 * 133 * @hide 134 */ 135 public static final short MIN_NORMAL = (short) 0x0400; 136 /** 137 * Smallest positive non-zero value a half-precision float may have. 138 * 139 * @hide 140 */ 141 public static final short MIN_VALUE = (short) 0x0001; 142 /** 143 * A Not-a-Number representation of a half-precision float. 144 * 145 * @hide 146 */ 147 public static final short NaN = (short) 0x7e00; 148 /** 149 * Negative infinity of type half-precision float. 150 * 151 * @hide 152 */ 153 public static final short NEGATIVE_INFINITY = (short) 0xfc00; 154 /** 155 * Negative 0 of type half-precision float. 156 * 157 * @hide 158 */ 159 public static final short NEGATIVE_ZERO = (short) 0x8000; 160 /** 161 * Positive infinity of type half-precision float. 162 * 163 * @hide 164 */ 165 public static final short POSITIVE_INFINITY = (short) 0x7c00; 166 /** 167 * Positive 0 of type half-precision float. 168 * 169 * @hide 170 */ 171 public static final short POSITIVE_ZERO = (short) 0x0000; 172 173 /** 174 * The offset to shift by to obtain the sign bit. 175 * 176 * @hide 177 */ 178 public static final int SIGN_SHIFT = 15; 179 180 /** 181 * The offset to shift by to obtain the exponent bits. 182 * 183 * @hide 184 */ 185 public static final int EXPONENT_SHIFT = 10; 186 187 /** 188 * The bitmask to AND a number with to obtain the sign bit. 189 * 190 * @hide 191 */ 192 public static final int SIGN_MASK = 0x8000; 193 194 /** 195 * The bitmask to AND a number shifted by {@link #EXPONENT_SHIFT} right, to obtain exponent bits. 196 * 197 * @hide 198 */ 199 public static final int SHIFTED_EXPONENT_MASK = 0x1f; 200 201 /** 202 * The bitmask to AND a number with to obtain significand bits. 203 * 204 * @hide 205 */ 206 public static final int SIGNIFICAND_MASK = 0x3ff; 207 208 /** 209 * The bitmask to AND with to obtain exponent and significand bits. 210 * 211 * @hide 212 */ 213 public static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff; 214 215 /** 216 * The offset of the exponent from the actual value. 217 * 218 * @hide 219 */ 220 public static final int EXPONENT_BIAS = 15; 221 222 private static final int FP32_SIGN_SHIFT = 31; 223 private static final int FP32_EXPONENT_SHIFT = 23; 224 private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff; 225 private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; 226 private static final int FP32_EXPONENT_BIAS = 127; 227 private static final int FP32_QNAN_MASK = 0x400000; 228 private static final int FP32_DENORMAL_MAGIC = 126 << 23; 229 private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); 230 231 /** Hidden constructor to prevent instantiation. */ FP16()232 private FP16() {} 233 234 /** 235 * <p>Compares the two specified half-precision float values. The following 236 * conditions apply during the comparison:</p> 237 * 238 * <ul> 239 * <li>{@link #NaN} is considered by this method to be equal to itself and greater 240 * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li> 241 * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than 242 * {@link #NEGATIVE_ZERO}.</li> 243 * </ul> 244 * 245 * @param x The first half-precision float value to compare. 246 * @param y The second half-precision float value to compare 247 * 248 * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a 249 * value less than {@code 0} if {@code x} is numerically less than {@code y}, 250 * and a value greater than {@code 0} if {@code x} is numerically greater 251 * than {@code y} 252 * 253 * @hide 254 */ compare(short x, short y)255 public static int compare(short x, short y) { 256 if (less(x, y)) return -1; 257 if (greater(x, y)) return 1; 258 259 // Collapse NaNs, akin to halfToIntBits(), but we want to keep 260 // (signed) short value types to preserve the ordering of -0.0 261 // and +0.0 262 short xBits = isNaN(x) ? NaN : x; 263 short yBits = isNaN(y) ? NaN : y; 264 265 return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); 266 } 267 268 /** 269 * Returns the closest integral half-precision float value to the specified 270 * half-precision float value. Special values are handled in the 271 * following ways: 272 * <ul> 273 * <li>If the specified half-precision float is NaN, the result is NaN</li> 274 * <li>If the specified half-precision float is infinity (negative or positive), 275 * the result is infinity (with the same sign)</li> 276 * <li>If the specified half-precision float is zero (negative or positive), 277 * the result is zero (with the same sign)</li> 278 * </ul> 279 * 280 * @param h A half-precision float value 281 * @return The value of the specified half-precision float rounded to the nearest 282 * half-precision float value 283 * 284 * @hide 285 */ rint(short h)286 public static short rint(short h) { 287 int bits = h & 0xffff; 288 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 289 int result = bits; 290 291 if (abs < 0x3c00) { 292 result &= SIGN_MASK; 293 if (abs > 0x3800){ 294 result |= 0x3c00; 295 } 296 } else if (abs < 0x6400) { 297 int exp = 25 - (abs >> 10); 298 int mask = (1 << exp) - 1; 299 result += ((1 << (exp - 1)) - (~(abs >> exp) & 1)); 300 result &= ~mask; 301 } 302 if (isNaN((short) result)) { 303 // if result is NaN mask with qNaN 304 // (i.e. mask the most significant mantissa bit with 1) 305 // to comply with hardware implementations (ARM64, Intel, etc). 306 result |= NaN; 307 } 308 309 return (short) result; 310 } 311 312 /** 313 * Returns the smallest half-precision float value toward negative infinity 314 * greater than or equal to the specified half-precision float value. 315 * Special values are handled in the following ways: 316 * <ul> 317 * <li>If the specified half-precision float is NaN, the result is NaN</li> 318 * <li>If the specified half-precision float is infinity (negative or positive), 319 * the result is infinity (with the same sign)</li> 320 * <li>If the specified half-precision float is zero (negative or positive), 321 * the result is zero (with the same sign)</li> 322 * </ul> 323 * 324 * @param h A half-precision float value 325 * @return The smallest half-precision float value toward negative infinity 326 * greater than or equal to the specified half-precision float value 327 * 328 * @hide 329 */ ceil(short h)330 public static short ceil(short h) { 331 int bits = h & 0xffff; 332 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 333 int result = bits; 334 335 if (abs < 0x3c00) { 336 result &= SIGN_MASK; 337 result |= 0x3c00 & -(~(bits >> 15) & (abs != 0 ? 1 : 0)); 338 } else if (abs < 0x6400) { 339 abs = 25 - (abs >> 10); 340 int mask = (1 << abs) - 1; 341 result += mask & ((bits >> 15) - 1); 342 result &= ~mask; 343 } 344 if (isNaN((short) result)) { 345 // if result is NaN mask with qNaN 346 // (i.e. mask the most significant mantissa bit with 1) 347 // to comply with hardware implementations (ARM64, Intel, etc). 348 result |= NaN; 349 } 350 351 return (short) result; 352 } 353 354 /** 355 * Returns the largest half-precision float value toward positive infinity 356 * less than or equal to the specified half-precision float value. 357 * Special values are handled in the following ways: 358 * <ul> 359 * <li>If the specified half-precision float is NaN, the result is NaN</li> 360 * <li>If the specified half-precision float is infinity (negative or positive), 361 * the result is infinity (with the same sign)</li> 362 * <li>If the specified half-precision float is zero (negative or positive), 363 * the result is zero (with the same sign)</li> 364 * </ul> 365 * 366 * @param h A half-precision float value 367 * @return The largest half-precision float value toward positive infinity 368 * less than or equal to the specified half-precision float value 369 * 370 * @hide 371 */ floor(short h)372 public static short floor(short h) { 373 int bits = h & 0xffff; 374 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 375 int result = bits; 376 377 if (abs < 0x3c00) { 378 result &= SIGN_MASK; 379 result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0); 380 } else if (abs < 0x6400) { 381 abs = 25 - (abs >> 10); 382 int mask = (1 << abs) - 1; 383 result += mask & -(bits >> 15); 384 result &= ~mask; 385 } 386 if (isNaN((short) result)) { 387 // if result is NaN mask with qNaN 388 // i.e. (Mask the most significant mantissa bit with 1) 389 result |= NaN; 390 } 391 392 return (short) result; 393 } 394 395 /** 396 * Returns the truncated half-precision float value of the specified 397 * half-precision float value. Special values are handled in the following ways: 398 * <ul> 399 * <li>If the specified half-precision float is NaN, the result is NaN</li> 400 * <li>If the specified half-precision float is infinity (negative or positive), 401 * the result is infinity (with the same sign)</li> 402 * <li>If the specified half-precision float is zero (negative or positive), 403 * the result is zero (with the same sign)</li> 404 * </ul> 405 * 406 * @param h A half-precision float value 407 * @return The truncated half-precision float value of the specified 408 * half-precision float value 409 * 410 * @hide 411 */ trunc(short h)412 public static short trunc(short h) { 413 int bits = h & 0xffff; 414 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 415 int result = bits; 416 417 if (abs < 0x3c00) { 418 result &= SIGN_MASK; 419 } else if (abs < 0x6400) { 420 abs = 25 - (abs >> 10); 421 int mask = (1 << abs) - 1; 422 result &= ~mask; 423 } 424 425 return (short) result; 426 } 427 428 /** 429 * Returns the smaller of two half-precision float values (the value closest 430 * to negative infinity). Special values are handled in the following ways: 431 * <ul> 432 * <li>If either value is NaN, the result is NaN</li> 433 * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li> 434 * </ul> 435 * 436 * @param x The first half-precision value 437 * @param y The second half-precision value 438 * @return The smaller of the two specified half-precision values 439 * 440 * @hide 441 */ min(short x, short y)442 public static short min(short x, short y) { 443 if (isNaN(x)) return NaN; 444 if (isNaN(y)) return NaN; 445 446 if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) { 447 return (x & SIGN_MASK) != 0 ? x : y; 448 } 449 450 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 451 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 452 } 453 454 /** 455 * Returns the larger of two half-precision float values (the value closest 456 * to positive infinity). Special values are handled in the following ways: 457 * <ul> 458 * <li>If either value is NaN, the result is NaN</li> 459 * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li> 460 * </ul> 461 * 462 * @param x The first half-precision value 463 * @param y The second half-precision value 464 * 465 * @return The larger of the two specified half-precision values 466 * 467 * @hide 468 */ max(short x, short y)469 public static short max(short x, short y) { 470 if (isNaN(x)) return NaN; 471 if (isNaN(y)) return NaN; 472 473 if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) { 474 return (x & SIGN_MASK) != 0 ? y : x; 475 } 476 477 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 478 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 479 } 480 481 /** 482 * Returns true if the first half-precision float value is less (smaller 483 * toward negative infinity) than the second half-precision float value. 484 * If either of the values is NaN, the result is false. 485 * 486 * @param x The first half-precision value 487 * @param y The second half-precision value 488 * 489 * @return True if x is less than y, false otherwise 490 * 491 * @hide 492 */ less(short x, short y)493 public static boolean less(short x, short y) { 494 if (isNaN(x)) return false; 495 if (isNaN(y)) return false; 496 497 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 498 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 499 } 500 501 /** 502 * Returns true if the first half-precision float value is less (smaller 503 * toward negative infinity) than or equal to the second half-precision 504 * float value. If either of the values is NaN, the result is false. 505 * 506 * @param x The first half-precision value 507 * @param y The second half-precision value 508 * 509 * @return True if x is less than or equal to y, false otherwise 510 * 511 * @hide 512 */ lessEquals(short x, short y)513 public static boolean lessEquals(short x, short y) { 514 if (isNaN(x)) return false; 515 if (isNaN(y)) return false; 516 517 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <= 518 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 519 } 520 521 /** 522 * Returns true if the first half-precision float value is greater (larger 523 * toward positive infinity) than the second half-precision float value. 524 * If either of the values is NaN, the result is false. 525 * 526 * @param x The first half-precision value 527 * @param y The second half-precision value 528 * 529 * @return True if x is greater than y, false otherwise 530 * 531 * @hide 532 */ greater(short x, short y)533 public static boolean greater(short x, short y) { 534 if (isNaN(x)) return false; 535 if (isNaN(y)) return false; 536 537 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 538 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 539 } 540 541 /** 542 * Returns true if the first half-precision float value is greater (larger 543 * toward positive infinity) than or equal to the second half-precision float 544 * value. If either of the values is NaN, the result is false. 545 * 546 * @param x The first half-precision value 547 * @param y The second half-precision value 548 * 549 * @return True if x is greater than y, false otherwise 550 * 551 * @hide 552 */ greaterEquals(short x, short y)553 public static boolean greaterEquals(short x, short y) { 554 if (isNaN(x)) return false; 555 if (isNaN(y)) return false; 556 557 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >= 558 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 559 } 560 561 /** 562 * Returns true if the two half-precision float values are equal. 563 * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO} 564 * and {@link #NEGATIVE_ZERO} are considered equal. 565 * 566 * @param x The first half-precision value 567 * @param y The second half-precision value 568 * 569 * @return True if x is equal to y, false otherwise 570 * 571 * @hide 572 */ equals(short x, short y)573 public static boolean equals(short x, short y) { 574 if (isNaN(x)) return false; 575 if (isNaN(y)) return false; 576 577 return x == y || ((x | y) & EXPONENT_SIGNIFICAND_MASK) == 0; 578 } 579 580 /** 581 * Returns true if the specified half-precision float value represents 582 * infinity, false otherwise. 583 * 584 * @param h A half-precision float value 585 * @return True if the value is positive infinity or negative infinity, 586 * false otherwise 587 * 588 * @hide 589 */ isInfinite(short h)590 public static boolean isInfinite(short h) { 591 return (h & EXPONENT_SIGNIFICAND_MASK) == POSITIVE_INFINITY; 592 } 593 594 /** 595 * Returns true if the specified half-precision float value represents 596 * a Not-a-Number, false otherwise. 597 * 598 * @param h A half-precision float value 599 * @return True if the value is a NaN, false otherwise 600 * 601 * @hide 602 */ isNaN(short h)603 public static boolean isNaN(short h) { 604 return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; 605 } 606 607 /** 608 * Returns true if the specified half-precision float value is normalized 609 * (does not have a subnormal representation). If the specified value is 610 * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY}, 611 * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal 612 * number, this method returns false. 613 * 614 * @param h A half-precision float value 615 * @return True if the value is normalized, false otherwise 616 * 617 * @hide 618 */ isNormalized(short h)619 public static boolean isNormalized(short h) { 620 return (h & POSITIVE_INFINITY) != 0 && (h & POSITIVE_INFINITY) != POSITIVE_INFINITY; 621 } 622 623 /** 624 * <p>Converts the specified half-precision float value into a 625 * single-precision float value. The following special cases are handled:</p> 626 * <ul> 627 * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li> 628 * <li>If the input is {@link #POSITIVE_INFINITY} or 629 * {@link #NEGATIVE_INFINITY}, the returned value is respectively 630 * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li> 631 * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li> 632 * <li>Otherwise, the returned value is a normalized single-precision float value</li> 633 * </ul> 634 * 635 * @param h The half-precision float value to convert to single-precision 636 * @return A normalized single-precision float value 637 * 638 * @hide 639 */ toFloat(short h)640 public static float toFloat(short h) { 641 int bits = h & 0xffff; 642 int s = bits & SIGN_MASK; 643 int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; 644 int m = (bits ) & SIGNIFICAND_MASK; 645 646 int outE = 0; 647 int outM = 0; 648 649 if (e == 0) { // Denormal or 0 650 if (m != 0) { 651 // Convert denorm fp16 into normalized fp32 652 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); 653 o -= FP32_DENORMAL_FLOAT; 654 return s == 0 ? o : -o; 655 } 656 } else { 657 outM = m << 13; 658 if (e == 0x1f) { // Infinite or NaN 659 outE = 0xff; 660 if (outM != 0) { // SNaNs are quieted 661 outM |= FP32_QNAN_MASK; 662 } 663 } else { 664 outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS; 665 } 666 } 667 668 int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; 669 return Float.intBitsToFloat(out); 670 } 671 672 /** 673 * <p>Converts the specified single-precision float value into a 674 * half-precision float value. The following special cases are handled:</p> 675 * <ul> 676 * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned 677 * value is {@link #NaN}</li> 678 * <li>If the input is {@link Float#POSITIVE_INFINITY} or 679 * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively 680 * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li> 681 * <li>If the input is 0 (positive or negative), the returned value is 682 * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 683 * <li>If the input is a less than {@link #MIN_VALUE}, the returned value 684 * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 685 * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value 686 * is a denorm half-precision float</li> 687 * <li>Otherwise, the returned value is rounded to the nearest 688 * representable half-precision float value</li> 689 * </ul> 690 * 691 * @param f The single-precision float value to convert to half-precision 692 * @return A half-precision float value 693 * 694 * @hide 695 */ toHalf(float f)696 public static short toHalf(float f) { 697 int bits = Float.floatToRawIntBits(f); 698 int s = (bits >>> FP32_SIGN_SHIFT ); 699 int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; 700 int m = (bits ) & FP32_SIGNIFICAND_MASK; 701 702 int outE = 0; 703 int outM = 0; 704 705 if (e == 0xff) { // Infinite or NaN 706 outE = 0x1f; 707 outM = m != 0 ? 0x200 : 0; 708 } else { 709 e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS; 710 if (e >= 0x1f) { // Overflow 711 outE = 0x1f; 712 } else if (e <= 0) { // Underflow 713 if (e < -10) { 714 // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 715 } else { 716 // The fp32 value is a normalized float less than MIN_NORMAL, 717 // we convert to a denorm fp16 718 m = m | 0x800000; 719 int shift = 14 - e; 720 outM = m >> shift; 721 722 int lowm = m & ((1 << shift) - 1); 723 int hway = 1 << (shift - 1); 724 // if above halfway or exactly halfway and outM is odd 725 if (lowm + (outM & 1) > hway){ 726 // Round to nearest even 727 // Can overflow into exponent bit, which surprisingly is OK. 728 // This increment relies on the +outM in the return statement below 729 outM++; 730 } 731 } 732 } else { 733 outE = e; 734 outM = m >> 13; 735 // if above halfway or exactly halfway and outM is odd 736 if ((m & 0x1fff) + (outM & 0x1) > 0x1000) { 737 // Round to nearest even 738 // Can overflow into exponent bit, which surprisingly is OK. 739 // This increment relies on the +outM in the return statement below 740 outM++; 741 } 742 } 743 } 744 // The outM is added here as the +1 increments for outM above can 745 // cause an overflow in the exponent bit which is OK. 746 return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM); 747 } 748 749 /** 750 * <p>Returns a hexadecimal string representation of the specified half-precision 751 * float value. If the value is a NaN, the result is <code>"NaN"</code>, 752 * otherwise the result follows this format:</p> 753 * <ul> 754 * <li>If the sign is positive, no sign character appears in the result</li> 755 * <li>If the sign is negative, the first character is <code>'-'</code></li> 756 * <li>If the value is inifinity, the string is <code>"Infinity"</code></li> 757 * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li> 758 * <li>If the value has a normalized representation, the exponent and 759 * significand are represented in the string in two fields. The significand 760 * starts with <code>"0x1."</code> followed by its lowercase hexadecimal 761 * representation. Trailing zeroes are removed unless all digits are 0, then 762 * a single zero is used. The significand representation is followed by the 763 * exponent, represented by <code>"p"</code>, itself followed by a decimal 764 * string of the unbiased exponent</li> 765 * <li>If the value has a subnormal representation, the significand starts 766 * with <code>"0x0."</code> followed by its lowercase hexadecimal 767 * representation. Trailing zeroes are removed unless all digits are 0, then 768 * a single zero is used. The significand representation is followed by the 769 * exponent, represented by <code>"p-14"</code></li> 770 * </ul> 771 * 772 * @param h A half-precision float value 773 * @return A hexadecimal string representation of the specified value 774 * 775 * @hide 776 */ toHexString(short h)777 public static String toHexString(short h) { 778 StringBuilder o = new StringBuilder(); 779 780 int bits = h & 0xffff; 781 int s = (bits >>> SIGN_SHIFT ); 782 int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; 783 int m = (bits ) & SIGNIFICAND_MASK; 784 785 if (e == 0x1f) { // Infinite or NaN 786 if (m == 0) { 787 if (s != 0) o.append('-'); 788 o.append("Infinity"); 789 } else { 790 o.append("NaN"); 791 } 792 } else { 793 if (s == 1) o.append('-'); 794 if (e == 0) { 795 if (m == 0) { 796 o.append("0x0.0p0"); 797 } else { 798 o.append("0x0."); 799 String significand = Integer.toHexString(m); 800 o.append(significand.replaceFirst("0{2,}$", "")); 801 o.append("p-14"); 802 } 803 } else { 804 o.append("0x1."); 805 String significand = Integer.toHexString(m); 806 o.append(significand.replaceFirst("0{2,}$", "")); 807 o.append('p'); 808 o.append(Integer.toString(e - EXPONENT_BIAS)); 809 } 810 } 811 812 return o.toString(); 813 } 814 } 815