1 /* 2 * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 package test.java.lang.Float; 25 26 /* 27 * @test 28 * @bug 8289551 29 * @summary Verify conversion between float and the binary16 format 30 * @library ../Math 31 * @build FloatConsts 32 * @run main Binary16Conversion 33 * @run main/othervm -XX:+UnlockDiagnosticVMOptions 34 * -XX:DisableIntrinsic=_float16ToFloat,_floatToFloat16 Binary16Conversion 35 */ 36 37 import jdk.internal.math.FloatConsts; 38 39 public class Binary16Conversion { main(String... argv)40 public static void main(String... argv) { 41 int errors = 0; 42 errors += binary16RoundTrip(); 43 // Note that helper methods do sign-symmetric testing 44 errors += binary16CardinalValues(); 45 errors += roundFloatToBinary16(); 46 errors += roundFloatToBinary16HalfWayCases(); 47 errors += roundFloatToBinary16FullBinade(); 48 errors += alternativeImplementation(); 49 50 if (errors > 0) 51 throw new RuntimeException(errors + " errors"); 52 } 53 54 /* 55 * Put all 16-bit values through a conversion loop and make sure 56 * the values are preserved (NaN bit patterns notwithstanding). 57 */ binary16RoundTrip()58 private static int binary16RoundTrip() { 59 int errors = 0; 60 for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { 61 short s = (short)i; 62 float f = Float.float16ToFloat(s); 63 short s2 = Float.floatToFloat16(f); 64 65 if (!Binary16.equivalent(s, s2)) { 66 errors++; 67 System.out.println("Roundtrip failure on " + 68 Integer.toHexString(0xFFFF & (int)s) + 69 "\t got back " + Integer.toHexString(0xFFFF & (int)s2)); 70 } 71 } 72 return errors; 73 } 74 binary16CardinalValues()75 private static int binary16CardinalValues() { 76 int errors = 0; 77 // Encode short value for different binary16 cardinal values as an 78 // integer-valued float. 79 float[][] testCases = { 80 {Binary16.POSITIVE_ZERO, +0.0f}, 81 {Binary16.MIN_VALUE, 0x1.0p-24f}, 82 {Binary16.MAX_SUBNORMAL, 0x1.ff8p-15f}, 83 {Binary16.MIN_NORMAL, 0x1.0p-14f}, 84 {Binary16.ONE, 1.0f}, 85 {Binary16.MAX_VALUE, 65504.0f}, 86 {Binary16.POSITIVE_INFINITY, Float.POSITIVE_INFINITY}, 87 }; 88 89 // Check conversions in both directions 90 91 // short -> float 92 for (var testCase : testCases) { 93 errors += compareAndReportError((short)testCase[0], 94 testCase[1]); 95 } 96 97 // float -> short 98 for (var testCase : testCases) { 99 errors += compareAndReportError(testCase[1], 100 (short)testCase[0]); 101 } 102 103 return errors; 104 } 105 roundFloatToBinary16()106 private static int roundFloatToBinary16() { 107 int errors = 0; 108 109 float[][] testCases = { 110 // Test all combinations of LSB, round, and sticky bit 111 112 // LSB = 0, test combination of round and sticky 113 {0x1.ff8000p-1f, (short)0x3bfe}, // round = 0, sticky = 0 114 {0x1.ff8010p-1f, (short)0x3bfe}, // round = 0, sticky = 1 115 {0x1.ffa000p-1f, (short)0x3bfe}, // round = 1, sticky = 0 116 {0x1.ffa010p-1f, (short)0x3bff}, // round = 1, sticky = 1 => ++ 117 118 // LSB = 1, test combination of round and sticky 119 {0x1.ffc000p-1f, Binary16.ONE-1}, // round = 0, sticky = 0 120 {0x1.ffc010p-1f, Binary16.ONE-1}, // round = 0, sticky = 1 121 {0x1.ffe000p-1f, Binary16.ONE}, // round = 1, sticky = 0 => ++ 122 {0x1.ffe010p-1f, Binary16.ONE}, // round = 1, sticky = 1 => ++ 123 124 // Test subnormal rounding 125 // Largest subnormal binary16 0x03ff => 0x1.ff8p-15f; LSB = 1 126 {0x1.ff8000p-15f, Binary16.MAX_SUBNORMAL}, // round = 0, sticky = 0 127 {0x1.ff8010p-15f, Binary16.MAX_SUBNORMAL}, // round = 0, sticky = 1 128 {0x1.ffc000p-15f, Binary16.MIN_NORMAL}, // round = 1, sticky = 0 => ++ 129 {0x1.ffc010p-15f, Binary16.MIN_NORMAL}, // round = 1, sticky = 1 => ++ 130 131 // Test rounding near binary16 MIN_VALUE 132 // Smallest in magnitude subnormal binary16 value 0x0001 => 0x1.0p-24f 133 // Half-way case,0x1.0p-25f, and smaller should round down to zero 134 {0x1.fffffep-26f, Binary16.POSITIVE_ZERO}, // nextDown in float 135 {0x1.000000p-25f, Binary16.POSITIVE_ZERO}, 136 {0x1.000002p-25f, Binary16.MIN_VALUE}, // nextUp in float 137 {0x1.100000p-25f, Binary16.MIN_VALUE}, 138 139 // Test rounding near overflow threshold 140 // Largest normal binary16 number 0x7bff => 0x1.ffcp15f; LSB = 1 141 {0x1.ffc000p15f, Binary16.MAX_VALUE}, // round = 0, sticky = 0 142 {0x1.ffc010p15f, Binary16.MAX_VALUE}, // round = 0, sticky = 1 143 {0x1.ffe000p15f, Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 0 => ++ 144 {0x1.ffe010p15f, Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 1 => ++ 145 }; 146 147 for (var testCase : testCases) { 148 errors += compareAndReportError(testCase[0], 149 (short)testCase[1]); 150 } 151 return errors; 152 } 153 roundFloatToBinary16HalfWayCases()154 private static int roundFloatToBinary16HalfWayCases() { 155 int errors = 0; 156 157 // Test rounding of exact half-way cases between each pair of 158 // finite exactly-representable binary16 numbers. Also test 159 // rounding of half-way +/- ulp of the *float* value. 160 // Additionally, test +/- float ulp of the endpoints. (Other 161 // tests in this file make sure all short values round-trip so 162 // that doesn't need to be tested here.) 163 164 for (int i = Binary16.POSITIVE_ZERO; // 0x0000 165 i <= Binary16.MAX_VALUE; // 0x7bff 166 i += 2) { // Check every even/odd pair once 167 short lower = (short) i; 168 short upper = (short)(i+1); 169 170 float lowerFloat = Float.float16ToFloat(lower); 171 float upperFloat = Float.float16ToFloat(upper); 172 assert lowerFloat < upperFloat; 173 174 float midway = (lowerFloat + upperFloat) * 0.5f; // Exact midpoint 175 176 errors += compareAndReportError(Math.nextUp(lowerFloat), lower); 177 errors += compareAndReportError(Math.nextDown(midway), lower); 178 179 // Under round to nearest even, the midway point will 180 // round *down* to the (even) lower endpoint. 181 errors += compareAndReportError( midway, lower); 182 183 errors += compareAndReportError(Math.nextUp( midway), upper); 184 errors += compareAndReportError(Math.nextDown(upperFloat), upper); 185 } 186 187 // More testing around the overflow threshold 188 // Binary16.ulp(Binary16.MAX_VALUE) == 32.0f; test around Binary16.MAX_VALUE + 1/2 ulp 189 float binary16_MAX_VALUE = Float.float16ToFloat(Binary16.MAX_VALUE); 190 float binary16_MAX_VALUE_halfUlp = binary16_MAX_VALUE + 16.0f; 191 192 errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE), Binary16.MAX_VALUE); 193 errors += compareAndReportError( binary16_MAX_VALUE, Binary16.MAX_VALUE); 194 errors += compareAndReportError(Math.nextUp( binary16_MAX_VALUE), Binary16.MAX_VALUE); 195 196 // Binary16.MAX_VALUE is an "odd" value since its LSB = 1 so 197 // the half-way value greater than Binary16.MAX_VALUE should 198 // round up to the next even value, in this case Binary16.POSITIVE_INFINITY. 199 errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE_halfUlp), Binary16.MAX_VALUE); 200 errors += compareAndReportError( binary16_MAX_VALUE_halfUlp, Binary16.POSITIVE_INFINITY); 201 errors += compareAndReportError(Math.nextUp( binary16_MAX_VALUE_halfUlp), Binary16.POSITIVE_INFINITY); 202 203 return errors; 204 } 205 206 private static int compareAndReportError(float input, 207 short expected) { 208 // Round to nearest even is sign symmetric 209 return compareAndReportError0( input, expected) + 210 compareAndReportError0(-input, Binary16.negate(expected)); 211 } 212 213 private static int compareAndReportError0(float input, 214 short expected) { 215 short actual = Float.floatToFloat16(input); 216 if (!Binary16.equivalent(actual, expected)) { 217 System.out.println("Unexpected result of converting " + 218 Float.toHexString(input) + 219 " to short. Expected 0x" + Integer.toHexString(0xFFFF & expected) + 220 " got 0x" + Integer.toHexString(0xFFFF & actual)); 221 return 1; 222 } 223 return 0; 224 } 225 226 private static int compareAndReportError0(short input, 227 float expected) { 228 float actual = Float.float16ToFloat(input); 229 if (Float.compare(actual, expected) != 0) { 230 System.out.println("Unexpected result of converting " + 231 Integer.toHexString(input & 0xFFFF) + 232 " to float. Expected " + Float.toHexString(expected) + 233 " got " + Float.toHexString(actual)); 234 return 1; 235 } 236 return 0; 237 } 238 239 private static int compareAndReportError(short input, 240 float expected) { 241 // Round to nearest even is sign symmetric 242 return compareAndReportError0( input, expected) + 243 compareAndReportError0(Binary16.negate(input), -expected); 244 } 245 246 private static int roundFloatToBinary16FullBinade() { 247 int errors = 0; 248 249 // For each float value between 1.0 and less than 2.0 250 // (i.e. set of float values with an exponent of 0), convert 251 // each value to binary16 and then convert that binary16 value 252 // back to float. 253 // 254 // Any exponent could be used; the maximum exponent for normal 255 // values would not exercise the full set of code paths since 256 // there is an up-front check on values that would overflow, 257 // which correspond to a ripple-carry of the significand that 258 // bumps the exponent. 259 short previous = (short)0; 260 for (int i = Float.floatToIntBits(1.0f); 261 i <= Float.floatToIntBits(Math.nextDown(2.0f)); 262 i++) { 263 // (Could also express the loop control directly in terms 264 // of floating-point operations, incrementing by ulp(1.0), 265 // etc.) 266 267 float f = Float.intBitsToFloat(i); 268 short f_as_bin16 = Float.floatToFloat16(f); 269 short f_as_bin16_down = (short)(f_as_bin16 - 1); 270 short f_as_bin16_up = (short)(f_as_bin16 + 1); 271 272 // Across successive float values to convert to binary16, 273 // the binary16 results should be semi-monotonic, 274 // non-decreasing in this case. 275 276 // Only positive binary16 values so can compare using integer operations 277 if (f_as_bin16 < previous) { 278 errors++; 279 System.out.println("Semi-monotonicity violation observed on " + 280 Integer.toHexString(0xfff & f_as_bin16)); 281 } 282 previous = f_as_bin16; 283 284 // If round-to-nearest was correctly done, when exactly 285 // mapped back to float, f_as_bin16 should be at least as 286 // close as either of its neighbors to the original value 287 // of f. 288 289 float f_prime_down = Float.float16ToFloat(f_as_bin16_down); 290 float f_prime = Float.float16ToFloat(f_as_bin16); 291 float f_prime_up = Float.float16ToFloat(f_as_bin16_up); 292 293 float f_prime_diff = Math.abs(f - f_prime); 294 if (f_prime_diff == 0.0) { 295 continue; 296 } 297 float f_prime_down_diff = Math.abs(f - f_prime_down); 298 float f_prime_up_diff = Math.abs(f - f_prime_up); 299 300 if (f_prime_diff > f_prime_down_diff || 301 f_prime_diff > f_prime_up_diff) { 302 errors++; 303 System.out.println("Round-to-nearest violation on converting " + 304 Float.toHexString(f) + " to binary16 and back."); 305 } 306 } 307 return errors; 308 } 309 310 private static int alternativeImplementation() { 311 int errors = 0; 312 313 // For exhaustive test of all float values use 314 // for (long ell = Integer.MIN_VALUE; ell <= Integer.MAX_VALUE; ell++) { 315 316 for (long ell = Float.floatToIntBits(2.0f); 317 ell <= Float.floatToIntBits(4.0f); 318 ell++) { 319 float f = Float.intBitsToFloat((int)ell); 320 short s1 = Float.floatToFloat16(f); 321 short s2 = altFloatToFloat16(f); 322 323 if (s1 != s2) { 324 errors++; 325 System.out.println("Different conversion of float value " + Float.toHexString(f)); 326 } 327 } 328 329 return errors; 330 } 331 332 /* 333 * Rely on float operations to do rounding in both normal and 334 * subnormal binary16 cases. 335 */ 336 public static short altFloatToFloat16(float f) { 337 int doppel = Float.floatToRawIntBits(f); 338 short sign_bit = (short)((doppel & 0x8000_0000) >> 16); 339 340 if (Float.isNaN(f)) { 341 // Preserve sign and attempt to preserve significand bits 342 return (short)(sign_bit 343 | 0x7c00 // max exponent + 1 344 // Preserve high order bit of float NaN in the 345 // binary16 result NaN (tenth bit); OR in remaining 346 // bits into lower 9 bits of binary 16 significand. 347 | (doppel & 0x007f_e000) >> 13 // 10 bits 348 | (doppel & 0x0000_1ff0) >> 4 // 9 bits 349 | (doppel & 0x0000_000f)); // 4 bits 350 } 351 352 float abs_f = Math.abs(f); 353 354 // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp 355 if (abs_f >= (65504.0f + 16.0f) ) { 356 return (short)(sign_bit | 0x7c00); // Positive or negative infinity 357 } else { 358 // Smallest magnitude nonzero representable binary16 value 359 // is equal to 0x1.0p-24; half-way and smaller rounds to zero. 360 if (abs_f <= 0x1.0p-25f) { // Covers float zeros and subnormals. 361 return sign_bit; // Positive or negative zero 362 } 363 364 // Dealing with finite values in exponent range of 365 // binary16 (when rounding is done, could still round up) 366 int exp = Math.getExponent(f); 367 assert -25 <= exp && exp <= 15; 368 short signif_bits; 369 370 if (exp <= -15) { // scale down to float subnormal range to do rounding 371 // Use a float multiply to compute the correct 372 // trailing significand bits for a binary16 subnormal. 373 // 374 // The exponent range of normalized binary16 subnormal 375 // values is [-24, -15]. The exponent range of float 376 // subnormals is [-149, -140]. Multiply abs_f down by 377 // 2^(-125) -- since (-125 = -149 - (-24)) -- so that 378 // the trailing bits of a subnormal float represent 379 // the correct trailing bits of a binary16 subnormal. 380 exp = -15; // Subnormal encoding using -E_max. 381 float f_adjust = abs_f * 0x1.0p-125f; 382 383 // In case the significand rounds up and has a carry 384 // propagate all the way up, take the bottom 11 bits 385 // rather than bottom 10 bits. Adding this value, 386 // rather than OR'ing htis value, will cause the right 387 // exponent adjustment. 388 signif_bits = (short)(Float.floatToRawIntBits(f_adjust) & 0x07ff); 389 return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) ); 390 } else { 391 // Scale down to subnormal range to round off excess bits 392 int scalingExp = -139 - exp; 393 float scaled = Math.scalb(Math.scalb(f, scalingExp), 394 -scalingExp); 395 exp = Math.getExponent(scaled); 396 doppel = Float.floatToRawIntBits(scaled); 397 398 signif_bits = (short)((doppel & 0x007f_e000) >> 399 (FloatConsts.SIGNIFICAND_WIDTH - 11)); 400 return (short)(sign_bit | ( ((exp + 15) << 10) | signif_bits ) ); 401 } 402 } 403 } 404 405 public static class Binary16 { 406 public static final short POSITIVE_INFINITY = (short)0x7c00; 407 public static final short MAX_VALUE = 0x7bff; 408 public static final short ONE = 0x3c00; 409 public static final short MIN_NORMAL = 0x0400; 410 public static final short MAX_SUBNORMAL = 0x03ff; 411 public static final short MIN_VALUE = 0x0001; 412 public static final short POSITIVE_ZERO = 0x0000; 413 isNaN(short binary16)414 public static boolean isNaN(short binary16) { 415 return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and... 416 && ((binary16 & 0x03ff) != 0 ); // significand nonzero. 417 } 418 negate(short binary16)419 public static short negate(short binary16) { 420 return (short)(binary16 ^ 0x8000 ); // Flip only sign bit. 421 } 422 equivalent(short bin16_1, short bin16_2)423 public static boolean equivalent(short bin16_1, short bin16_2) { 424 return (bin16_1 == bin16_2) || 425 isNaN(bin16_1) && isNaN(bin16_2); 426 } 427 } 428 } 429