• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 package test.java.lang.Float;
25 
26 /*
27  * @test
28  * @bug 8289551
29  * @summary Verify conversion between float and the binary16 format
30  * @library ../Math
31  * @build FloatConsts
32  * @run main Binary16Conversion
33  * @run main/othervm -XX:+UnlockDiagnosticVMOptions
34  * -XX:DisableIntrinsic=_float16ToFloat,_floatToFloat16 Binary16Conversion
35  */
36 
37 import jdk.internal.math.FloatConsts;
38 
39 public class Binary16Conversion {
main(String... argv)40     public static void main(String... argv) {
41         int errors = 0;
42         errors += binary16RoundTrip();
43         // Note that helper methods do sign-symmetric testing
44         errors += binary16CardinalValues();
45         errors += roundFloatToBinary16();
46         errors += roundFloatToBinary16HalfWayCases();
47         errors += roundFloatToBinary16FullBinade();
48         errors += alternativeImplementation();
49 
50         if (errors > 0)
51             throw new RuntimeException(errors + " errors");
52     }
53 
54     /*
55      * Put all 16-bit values through a conversion loop and make sure
56      * the values are preserved (NaN bit patterns notwithstanding).
57      */
binary16RoundTrip()58     private static int binary16RoundTrip() {
59         int errors = 0;
60         for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
61             short s = (short)i;
62             float f =  Float.float16ToFloat(s);
63             short s2 = Float.floatToFloat16(f);
64 
65             if (!Binary16.equivalent(s, s2)) {
66                 errors++;
67                 System.out.println("Roundtrip failure on " +
68                                    Integer.toHexString(0xFFFF & (int)s) +
69                                    "\t got back " + Integer.toHexString(0xFFFF & (int)s2));
70             }
71         }
72         return errors;
73     }
74 
binary16CardinalValues()75     private static int binary16CardinalValues() {
76         int errors = 0;
77         // Encode short value for different binary16 cardinal values as an
78         // integer-valued float.
79         float[][] testCases = {
80             {Binary16.POSITIVE_ZERO,         +0.0f},
81             {Binary16.MIN_VALUE,              0x1.0p-24f},
82             {Binary16.MAX_SUBNORMAL,          0x1.ff8p-15f},
83             {Binary16.MIN_NORMAL,             0x1.0p-14f},
84             {Binary16.ONE,                    1.0f},
85             {Binary16.MAX_VALUE,              65504.0f},
86             {Binary16.POSITIVE_INFINITY,      Float.POSITIVE_INFINITY},
87         };
88 
89         // Check conversions in both directions
90 
91         // short -> float
92         for (var testCase : testCases) {
93             errors += compareAndReportError((short)testCase[0],
94                                             testCase[1]);
95         }
96 
97         // float -> short
98         for (var testCase : testCases) {
99             errors += compareAndReportError(testCase[1],
100                                             (short)testCase[0]);
101         }
102 
103         return errors;
104     }
105 
roundFloatToBinary16()106     private static int roundFloatToBinary16() {
107         int errors = 0;
108 
109         float[][] testCases = {
110             // Test all combinations of LSB, round, and sticky bit
111 
112             // LSB = 0, test combination of round and sticky
113             {0x1.ff8000p-1f,       (short)0x3bfe},              // round = 0, sticky = 0
114             {0x1.ff8010p-1f,       (short)0x3bfe},              // round = 0, sticky = 1
115             {0x1.ffa000p-1f,       (short)0x3bfe},              // round = 1, sticky = 0
116             {0x1.ffa010p-1f,       (short)0x3bff},              // round = 1, sticky = 1 => ++
117 
118             // LSB = 1, test combination of round and sticky
119             {0x1.ffc000p-1f,       Binary16.ONE-1},             // round = 0, sticky = 0
120             {0x1.ffc010p-1f,       Binary16.ONE-1},             // round = 0, sticky = 1
121             {0x1.ffe000p-1f,       Binary16.ONE},               // round = 1, sticky = 0 => ++
122             {0x1.ffe010p-1f,       Binary16.ONE},               // round = 1, sticky = 1 => ++
123 
124             // Test subnormal rounding
125             // Largest subnormal binary16 0x03ff => 0x1.ff8p-15f; LSB = 1
126             {0x1.ff8000p-15f,      Binary16.MAX_SUBNORMAL},     // round = 0, sticky = 0
127             {0x1.ff8010p-15f,      Binary16.MAX_SUBNORMAL},     // round = 0, sticky = 1
128             {0x1.ffc000p-15f,      Binary16.MIN_NORMAL},        // round = 1, sticky = 0 => ++
129             {0x1.ffc010p-15f,      Binary16.MIN_NORMAL},        // round = 1, sticky = 1 => ++
130 
131             // Test rounding near binary16 MIN_VALUE
132             // Smallest in magnitude subnormal binary16 value 0x0001 => 0x1.0p-24f
133             // Half-way case,0x1.0p-25f, and smaller should round down to zero
134             {0x1.fffffep-26f,      Binary16.POSITIVE_ZERO},     // nextDown in float
135             {0x1.000000p-25f,      Binary16.POSITIVE_ZERO},
136             {0x1.000002p-25f,      Binary16.MIN_VALUE},         // nextUp in float
137             {0x1.100000p-25f,      Binary16.MIN_VALUE},
138 
139             // Test rounding near overflow threshold
140             // Largest normal binary16 number 0x7bff => 0x1.ffcp15f; LSB = 1
141             {0x1.ffc000p15f,       Binary16.MAX_VALUE},         // round = 0, sticky = 0
142             {0x1.ffc010p15f,       Binary16.MAX_VALUE},         // round = 0, sticky = 1
143             {0x1.ffe000p15f,       Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 0 => ++
144             {0x1.ffe010p15f,       Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 1 => ++
145         };
146 
147         for (var testCase : testCases) {
148             errors += compareAndReportError(testCase[0],
149                                             (short)testCase[1]);
150         }
151         return errors;
152     }
153 
roundFloatToBinary16HalfWayCases()154     private static int roundFloatToBinary16HalfWayCases() {
155         int errors = 0;
156 
157         // Test rounding of exact half-way cases between each pair of
158         // finite exactly-representable binary16 numbers. Also test
159         // rounding of half-way +/- ulp of the *float* value.
160         // Additionally, test +/- float ulp of the endpoints. (Other
161         // tests in this file make sure all short values round-trip so
162         // that doesn't need to be tested here.)
163 
164         for (int i = Binary16.POSITIVE_ZERO; // 0x0000
165              i    <= Binary16.MAX_VALUE;     // 0x7bff
166              i += 2) {     // Check every even/odd pair once
167             short lower = (short) i;
168             short upper = (short)(i+1);
169 
170             float lowerFloat = Float.float16ToFloat(lower);
171             float upperFloat = Float.float16ToFloat(upper);
172             assert lowerFloat < upperFloat;
173 
174             float midway = (lowerFloat + upperFloat) * 0.5f; // Exact midpoint
175 
176             errors += compareAndReportError(Math.nextUp(lowerFloat),   lower);
177             errors += compareAndReportError(Math.nextDown(midway),     lower);
178 
179             // Under round to nearest even, the midway point will
180             // round *down* to the (even) lower endpoint.
181             errors += compareAndReportError(              midway,      lower);
182 
183             errors += compareAndReportError(Math.nextUp(  midway),     upper);
184             errors += compareAndReportError(Math.nextDown(upperFloat), upper);
185         }
186 
187         // More testing around the overflow threshold
188         // Binary16.ulp(Binary16.MAX_VALUE) == 32.0f; test around Binary16.MAX_VALUE + 1/2 ulp
189         float binary16_MAX_VALUE = Float.float16ToFloat(Binary16.MAX_VALUE);
190         float binary16_MAX_VALUE_halfUlp = binary16_MAX_VALUE + 16.0f;
191 
192         errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE), Binary16.MAX_VALUE);
193         errors += compareAndReportError(              binary16_MAX_VALUE,  Binary16.MAX_VALUE);
194         errors += compareAndReportError(Math.nextUp(  binary16_MAX_VALUE), Binary16.MAX_VALUE);
195 
196         // Binary16.MAX_VALUE is an "odd" value since its LSB = 1 so
197         // the half-way value greater than Binary16.MAX_VALUE should
198         // round up to the next even value, in this case Binary16.POSITIVE_INFINITY.
199         errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE_halfUlp), Binary16.MAX_VALUE);
200         errors += compareAndReportError(              binary16_MAX_VALUE_halfUlp,  Binary16.POSITIVE_INFINITY);
201         errors += compareAndReportError(Math.nextUp(  binary16_MAX_VALUE_halfUlp), Binary16.POSITIVE_INFINITY);
202 
203         return errors;
204     }
205 
206     private static int compareAndReportError(float input,
207                                              short expected) {
208         // Round to nearest even is sign symmetric
209         return compareAndReportError0( input,                 expected) +
210                compareAndReportError0(-input, Binary16.negate(expected));
211     }
212 
213     private static int compareAndReportError0(float input,
214                                               short expected) {
215         short actual = Float.floatToFloat16(input);
216         if (!Binary16.equivalent(actual, expected)) {
217             System.out.println("Unexpected result of converting " +
218                                Float.toHexString(input) +
219                                " to short. Expected 0x" + Integer.toHexString(0xFFFF & expected) +
220                                " got 0x" + Integer.toHexString(0xFFFF & actual));
221             return 1;
222             }
223         return 0;
224     }
225 
226     private static int compareAndReportError0(short input,
227                                               float expected) {
228         float actual = Float.float16ToFloat(input);
229         if (Float.compare(actual, expected) != 0) {
230             System.out.println("Unexpected result of converting " +
231                                Integer.toHexString(input & 0xFFFF) +
232                                " to float. Expected " + Float.toHexString(expected) +
233                                " got " + Float.toHexString(actual));
234             return 1;
235             }
236         return 0;
237     }
238 
239     private static int compareAndReportError(short input,
240                                              float expected) {
241         // Round to nearest even is sign symmetric
242         return compareAndReportError0(                input,   expected) +
243                compareAndReportError0(Binary16.negate(input), -expected);
244     }
245 
246     private static int roundFloatToBinary16FullBinade() {
247         int errors = 0;
248 
249         // For each float value between 1.0 and less than 2.0
250         // (i.e. set of float values with an exponent of 0), convert
251         // each value to binary16 and then convert that binary16 value
252         // back to float.
253         //
254         // Any exponent could be used; the maximum exponent for normal
255         // values would not exercise the full set of code paths since
256         // there is an up-front check on values that would overflow,
257         // which correspond to a ripple-carry of the significand that
258         // bumps the exponent.
259         short previous = (short)0;
260         for (int i = Float.floatToIntBits(1.0f);
261              i <= Float.floatToIntBits(Math.nextDown(2.0f));
262              i++) {
263             // (Could also express the loop control directly in terms
264             // of floating-point operations, incrementing by ulp(1.0),
265             // etc.)
266 
267             float f = Float.intBitsToFloat(i);
268             short f_as_bin16 = Float.floatToFloat16(f);
269             short f_as_bin16_down = (short)(f_as_bin16 - 1);
270             short f_as_bin16_up   = (short)(f_as_bin16 + 1);
271 
272             // Across successive float values to convert to binary16,
273             // the binary16 results should be semi-monotonic,
274             // non-decreasing in this case.
275 
276             // Only positive binary16 values so can compare using integer operations
277             if (f_as_bin16 < previous) {
278                 errors++;
279                 System.out.println("Semi-monotonicity violation observed on " +
280                                    Integer.toHexString(0xfff & f_as_bin16));
281             }
282             previous = f_as_bin16;
283 
284             // If round-to-nearest was correctly done, when exactly
285             // mapped back to float, f_as_bin16 should be at least as
286             // close as either of its neighbors to the original value
287             // of f.
288 
289             float f_prime_down = Float.float16ToFloat(f_as_bin16_down);
290             float f_prime      = Float.float16ToFloat(f_as_bin16);
291             float f_prime_up   = Float.float16ToFloat(f_as_bin16_up);
292 
293             float f_prime_diff = Math.abs(f - f_prime);
294             if (f_prime_diff == 0.0) {
295                 continue;
296             }
297             float f_prime_down_diff = Math.abs(f - f_prime_down);
298             float f_prime_up_diff   = Math.abs(f - f_prime_up);
299 
300             if (f_prime_diff > f_prime_down_diff ||
301                 f_prime_diff > f_prime_up_diff) {
302                 errors++;
303                 System.out.println("Round-to-nearest violation on converting " +
304                                    Float.toHexString(f) + " to binary16 and back.");
305             }
306         }
307         return errors;
308     }
309 
310     private static int alternativeImplementation() {
311         int errors = 0;
312 
313         // For exhaustive test of all float values use
314         // for (long ell = Integer.MIN_VALUE; ell <= Integer.MAX_VALUE; ell++) {
315 
316         for (long ell   = Float.floatToIntBits(2.0f);
317              ell       <= Float.floatToIntBits(4.0f);
318              ell++) {
319             float f = Float.intBitsToFloat((int)ell);
320             short s1 = Float.floatToFloat16(f);
321             short s2 =    altFloatToFloat16(f);
322 
323             if (s1 != s2) {
324                 errors++;
325                 System.out.println("Different conversion of float value " + Float.toHexString(f));
326             }
327         }
328 
329         return errors;
330     }
331 
332     /*
333      * Rely on float operations to do rounding in both normal and
334      * subnormal binary16 cases.
335      */
336     public static short altFloatToFloat16(float f) {
337         int doppel = Float.floatToRawIntBits(f);
338         short sign_bit = (short)((doppel & 0x8000_0000) >> 16);
339 
340         if (Float.isNaN(f)) {
341             // Preserve sign and attempt to preserve significand bits
342             return (short)(sign_bit
343                     | 0x7c00 // max exponent + 1
344                     // Preserve high order bit of float NaN in the
345                     // binary16 result NaN (tenth bit); OR in remaining
346                     // bits into lower 9 bits of binary 16 significand.
347                     | (doppel & 0x007f_e000) >> 13 // 10 bits
348                     | (doppel & 0x0000_1ff0) >> 4  //  9 bits
349                     | (doppel & 0x0000_000f));     //  4 bits
350         }
351 
352         float abs_f = Math.abs(f);
353 
354         // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp
355         if (abs_f >= (65504.0f + 16.0f) ) {
356             return (short)(sign_bit | 0x7c00); // Positive or negative infinity
357         } else {
358             // Smallest magnitude nonzero representable binary16 value
359             // is equal to 0x1.0p-24; half-way and smaller rounds to zero.
360             if (abs_f <= 0x1.0p-25f) { // Covers float zeros and subnormals.
361                 return sign_bit; // Positive or negative zero
362             }
363 
364             // Dealing with finite values in exponent range of
365             // binary16 (when rounding is done, could still round up)
366             int exp = Math.getExponent(f);
367             assert -25 <= exp && exp <= 15;
368             short signif_bits;
369 
370             if (exp <= -15) { // scale down to float subnormal range to do rounding
371                 // Use a float multiply to compute the correct
372                 // trailing significand bits for a binary16 subnormal.
373                 //
374                 // The exponent range of normalized binary16 subnormal
375                 // values is [-24, -15]. The exponent range of float
376                 // subnormals is [-149, -140]. Multiply abs_f down by
377                 // 2^(-125) -- since (-125 = -149 - (-24)) -- so that
378                 // the trailing bits of a subnormal float represent
379                 // the correct trailing bits of a binary16 subnormal.
380                 exp = -15; // Subnormal encoding using -E_max.
381                 float f_adjust = abs_f * 0x1.0p-125f;
382 
383                 // In case the significand rounds up and has a carry
384                 // propagate all the way up, take the bottom 11 bits
385                 // rather than bottom 10 bits. Adding this value,
386                 // rather than OR'ing htis value, will cause the right
387                 // exponent adjustment.
388                 signif_bits = (short)(Float.floatToRawIntBits(f_adjust) & 0x07ff);
389                 return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) );
390             } else {
391                 // Scale down to subnormal range to round off excess bits
392                 int scalingExp = -139 - exp;
393                 float scaled = Math.scalb(Math.scalb(f, scalingExp),
394                                                        -scalingExp);
395                 exp = Math.getExponent(scaled);
396                 doppel = Float.floatToRawIntBits(scaled);
397 
398                 signif_bits = (short)((doppel & 0x007f_e000) >>
399                                       (FloatConsts.SIGNIFICAND_WIDTH - 11));
400                 return (short)(sign_bit | ( ((exp + 15) << 10) | signif_bits ) );
401             }
402         }
403     }
404 
405     public static class Binary16 {
406         public static final short POSITIVE_INFINITY = (short)0x7c00;
407         public static final short MAX_VALUE         = 0x7bff;
408         public static final short ONE               = 0x3c00;
409         public static final short MIN_NORMAL        = 0x0400;
410         public static final short MAX_SUBNORMAL     = 0x03ff;
411         public static final short MIN_VALUE         = 0x0001;
412         public static final short POSITIVE_ZERO     = 0x0000;
413 
isNaN(short binary16)414         public static boolean isNaN(short binary16) {
415             return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and...
416                 && ((binary16 & 0x03ff) != 0 );    // significand nonzero.
417         }
418 
negate(short binary16)419         public static short negate(short binary16) {
420             return (short)(binary16 ^ 0x8000 ); // Flip only sign bit.
421         }
422 
equivalent(short bin16_1, short bin16_2)423         public static boolean equivalent(short bin16_1, short bin16_2) {
424             return (bin16_1 == bin16_2) ||
425                 isNaN(bin16_1) && isNaN(bin16_2);
426         }
427     }
428 }
429