• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.renderscript.cts;
18 
19 import android.renderscript.RSRuntimeException;
20 import android.util.Log;
21 
22 import junit.framework.Assert;
23 
24 /** This class contains utility functions needed by RenderScript CTS tests to handle Float16
25  * operations.
26  */
27 class Float16Utils {
28     // 16-bit masks for extracting sign, exponent and mantissa bits
29     private static short SIGN_MASK     = (short) 0x8000;
30     private static short EXPONENT_MASK = (short) 0x7C00;
31     private static short MANTISSA_MASK = (short) 0x03FF;
32 
33     private static long DOUBLE_SIGN_MASK = 0x8000000000000000L;
34     private static long DOUBLE_EXPONENT_MASK = 0x7ff0000000000000L;
35     private static long DOUBLE_MANTISSA_MASK = 0x000fffffffffffffL;
36 
37     static double MIN_NORMAL = Math.scalb(1.0, -14); // smallest Float16 normal is 2 ^ -14
38     static double MIN_VALUE = Math.scalb(1.0, -24); // smallest Float16 value is 2 ^ -24
39     static double MAX_VALUE = 65504; // largest Float16 value is 2^16 - 32
40 
41     // NaN has all exponent bits set to 1 and a non-zero mantissa
isFloat16NaN(short val)42     static boolean isFloat16NaN(short val) {
43         return (val & EXPONENT_MASK) == EXPONENT_MASK &&
44                (val & MANTISSA_MASK) != 0;
45     }
46 
47     // Infinity has all exponent bits set to 1 and zeroes in mantissa
isFloat16Infinite(short val)48     static boolean isFloat16Infinite(short val) {
49         return (val & EXPONENT_MASK) == EXPONENT_MASK &&
50                (val & MANTISSA_MASK) == 0;
51     }
52 
53     // Subnormal numbers have exponent bits set to 0 and a non-zero mantissa
isFloat16SubNormal(short val)54     static boolean isFloat16SubNormal(short val) {
55         return (val & EXPONENT_MASK) == 0 && (val & MANTISSA_MASK) != 0;
56     }
57 
58     // Zero has all but the sign bit set to zero
isFloat16Zero(short val)59     static boolean isFloat16Zero(short val) {
60         return (val & ~SIGN_MASK) == 0;
61     }
62 
63     // Negativity test checks the sign bit
isFloat16Negative(short val)64     static boolean isFloat16Negative(short val) {
65         return (val & SIGN_MASK) != 0;
66     }
67 
68     // Check if this is a finite, non-zero FP16 value
isFloat16FiniteNonZero(short val)69     static boolean isFloat16FiniteNonZero(short val) {
70         return !isFloat16NaN(val) && !isFloat16Infinite(val) && !isFloat16Zero(val);
71     }
72 
convertFloat16ToFloat(short val)73     static float convertFloat16ToFloat(short val) {
74         // Extract sign, exponent and mantissa
75         int sign = val & SIGN_MASK;
76         int exponent = (val & EXPONENT_MASK) >> 10;
77         int mantissa = val & MANTISSA_MASK;
78 
79         // 0.<mantissa> = <mantissa> * 2^-10
80         float mantissaAsFloat = Math.scalb(mantissa, -10);
81 
82         float result;
83         if (isFloat16Zero(val))
84             result = 0.0f;
85         else if (isFloat16Infinite(val))
86             result = java.lang.Float.POSITIVE_INFINITY;
87         else if (isFloat16NaN(val))
88             result = java.lang.Float.NaN;
89         else if (isFloat16SubNormal(val)) {
90             // value is 2^-14 * mantissaAsFloat
91             result = Math.scalb(1, -14) * mantissaAsFloat;
92         }
93         else {
94             // value is 2^(exponent - 15) * 1.<mantissa>
95             result = Math.scalb(1, exponent - 15) * (1 + mantissaAsFloat);
96         }
97 
98         if (sign != 0)
99             result = -result;
100         return result;
101     }
102 
convertFloat16ToDouble(short val)103     static double convertFloat16ToDouble(short val) {
104         return (double) convertFloat16ToFloat(val);
105     }
106 
107     /* This utility function accepts the mantissa, exponent and an isNegative flag and constructs a
108      * double value.  The exponent should be biased, but not shifted left by 52-bits.
109      */
constructDouble(long mantissa, long exponent, boolean isNegative)110     private static double constructDouble(long mantissa, long exponent, boolean isNegative) {
111         exponent = exponent << 52;
112         long bits = (exponent & DOUBLE_EXPONENT_MASK) | (mantissa & DOUBLE_MANTISSA_MASK);
113         if (isNegative) bits |= DOUBLE_SIGN_MASK;
114         return Double.longBitsToDouble(bits);
115     }
116 
117     /* This function takes a double value and returns an array with the double representations of
118      * the Float16 values immediately smaller and larger than the input.  If the input value is
119      * precisely representable in Float16, it is copied into both the entries of the array.
120      *
121      * The returned values can be subnormal Float16 numbers.  Handling subnormals is delegated to
122      * the caller.
123      *
124      * TODO Extend this function to handle rounding for both float16 and float32.
125      */
roundToFloat16(double value)126     static double[] roundToFloat16(double value) {
127         long valueBits = Double.doubleToLongBits(value);
128         long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa
129         long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent
130         long unbiasedExponent = (exponent >> 52) - 1023;
131         boolean isNegative = (valueBits & DOUBLE_SIGN_MASK) != 0;
132 
133         double[] result = new double[2];
134         if (Double.isNaN(value) || Double.isInfinite(value)) {
135             // Input is NaN or Infinity.  Return unchanged.
136             result[0] = value;
137             result[1] = value;
138             return result; // Note that we skip the negation at the end of this function
139         }
140 
141         if (unbiasedExponent == -1023 && mantissa == 0) {
142             // Zero.  Assign 0 and adjust sign at the end of this function
143             result[0] = 0.;
144             result[1] = 0.;
145         }
146         else if (unbiasedExponent < -24) {
147             // Absolute value is between 0 and MIN_VALUE.  Return 0 and MIN_VALUE
148             result[0] = 0.;
149             result[1] = MIN_VALUE;
150         }
151         else if (unbiasedExponent <= 15) {
152             /*
153              * Either subnormal or normal.  We compute a mask for the excess precision bits in the
154              * mantissa.
155              *
156              * (a) If none of these bits are set, the current value's mantissa and exponent are used
157              * for both the low and high values.
158              * (b) If some of these bits are set, we zero-out the extra bits to get the mantissa and
159              * exponent of the lower value.  For the higher value, we increment the masked mantissa
160              * at the least-significant bit within the range of this Float16 value.  To handle
161              * overflows during the the increment, we need to increment the exponent and round up to
162              * infinity if needed.
163              */
164 
165             // 'mask' is used to detect and zero-out excess bits set.  'mask + 1' is the value
166             // added to zero-ed out mantissa to get the next higher Float16 value.
167             long mask;
168             long maxSigMantissaBits;
169 
170             if (unbiasedExponent < -14) {
171                 // Subnormal Float16.  For Float16's MIN_VALUE, mantissa can have no bits set (after
172                 // adjusting for the implied one bit.  For each higher exponent, an extra bit of
173                 // precision is allowed in the mantissa.  This computes to "24 + unbiasedExponent".
174                 maxSigMantissaBits = 24 + unbiasedExponent;
175             } else {
176                 // For normal Float16 values have 10 bits of precision in the mantissa.
177                 maxSigMantissaBits = 10;
178             }
179             mask = DOUBLE_MANTISSA_MASK >> maxSigMantissaBits;
180 
181             // zero-out the excess precision bits for the mantissa for both low and high values.
182             long lowFloat16Mantissa = mantissa & ~mask;
183             long highFloat16Mantissa = mantissa & ~mask;
184 
185             long lowFloat16Exponent = unbiasedExponent;
186             long highFloat16Exponent = unbiasedExponent;
187 
188             if ((mantissa & mask) != 0) {
189                 // If mantissa has extra bits set, increment the mantissa at the LSB (for this
190                 // Float16 value)
191                 highFloat16Mantissa += mask + 1;
192 
193                 // If this overflows the mantissa into the exponent, set mantissa to zero and
194                 // increment the exponent.
195                 if ((highFloat16Mantissa & DOUBLE_EXPONENT_MASK) != 0) {
196                     highFloat16Mantissa = 0;
197                     highFloat16Exponent += 1;
198                 }
199 
200                 // If the exponent exceeds the range of Float16 exponents, set it to 1024, so the
201                 // value gets rounded up to Double.POSITIVE_INFINITY.
202                 if (highFloat16Exponent == 16) {
203                     highFloat16Exponent = 1024;
204                 }
205             }
206 
207             result[0] = constructDouble(lowFloat16Mantissa, lowFloat16Exponent + 1023, false);
208             result[1] = constructDouble(highFloat16Mantissa, highFloat16Exponent + 1023, false);
209         } else {
210             // Exponent is outside Float16's range.  Use POSITIVE_INFINITY for both bounds.
211             result[0] = Double.POSITIVE_INFINITY;
212             result[1] = Double.POSITIVE_INFINITY;
213         }
214 
215         // Swap values in result and negate them if the input value is negative.
216         if (isNegative) {
217             double tmp = result[0];
218             result[0] = -result[1];
219             result[1] = -tmp;
220         }
221 
222         return result;
223     }
224 
225     // This function takes a double value and returns 1 ulp, in Float16 precision, of that value.
226     // Both the parameter and return value have 'double' type but they should be exactly
227     // representable in Float16.  If the parameter exceeds the precision of Float16, an exception is
228     // thrown.
float16Ulp(double value)229     static double float16Ulp(double value) {
230         long valueBits = Double.doubleToLongBits(value);
231         long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa
232         long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent
233         long unbiasedExponent = (exponent >> 52) - 1023;
234 
235         if (unbiasedExponent == 1024) { // i.e. NaN or infinity
236             if (mantissa == 0) {
237                 return Double.POSITIVE_INFINITY; // ulp of +/- infinity is +infinity
238             } else {
239                 return Double.NaN; // ulp for NaN is NaN
240             }
241         }
242 
243         if (unbiasedExponent == -1023) {
244             // assert that mantissa is zero, i.e. value is zero and not a subnormal value.
245             if (mantissa != 0) {
246                 throw new RSRuntimeException("float16ulp: Double parameter is subnormal");
247             }
248             return MIN_VALUE;
249         }
250 
251         if (unbiasedExponent < -24 || unbiasedExponent > 15) {
252             throw new RSRuntimeException("float16Ulp: Double parameter's exponent out of range");
253         }
254 
255         if (unbiasedExponent >= -24 && unbiasedExponent < -14) {
256             // Exponent within the range of Float16 subnormals.
257 
258             // Ensure that mantissa doesn't have too much precision.  For example, the smallest
259             // normal number has an unbiased exponent of -24 and has one bit in mantissa.  Each
260             // higher exponent allows one extra bit of precision in the mantissa.  Combined with the
261             // implied one bit, the mantissa can have "24 + unbiasedExponent" significant bits.  The
262             // rest of the 52 bits in mantissa must be zero.
263 
264             long maxSigMantissaBits = 24 + unbiasedExponent;
265             long mask = DOUBLE_MANTISSA_MASK >> maxSigMantissaBits;
266 
267             if((mask & mantissa) != 0) {
268                 throw new RSRuntimeException("float16ulp: Double parameter is too precise for subnormal Float16 values.");
269             }
270             return MIN_VALUE;
271         }
272         if (unbiasedExponent >= -14) {
273             // Exponent within the range of Float16 normals.  Ensure that the mantissa has at most
274             // 10 significant bits.
275             long mask = DOUBLE_MANTISSA_MASK >> 10;
276             if ((mantissa & mask) != 0) {
277                 throw new RSRuntimeException("float16ulp: Double parameter is too precise for normal Float16 values.");
278             }
279             return Math.scalb(1.0, (int) (unbiasedExponent - 10));
280         }
281         throw new RSRuntimeException("float16Ulp: unreachable line executed");
282     }
283 
284     // This function converts its double input value to its Float16 representation (represented as a
285     // short).  It assumes, but does not check, that the input is precisely representable in Float16
286     // precision.  No rounding is performed either.
convertDoubleToFloat16(double value)287     static short convertDoubleToFloat16(double value) {
288         if (value == 0.) {
289             if (Double.doubleToLongBits(value) == 0)
290                 return (short) 0x0;
291             else
292                 return (short) 0x8000;
293         } else if (Double.isNaN(value)) {
294             // return Quiet NaN irrespective of what kind of NaN 'value' is.
295             return (short) 0x7e00;
296         } else if (value == Double.POSITIVE_INFINITY) {
297             return (short) 0x7c00;
298         } else if (value == Double.NEGATIVE_INFINITY) {
299             return (short) 0xfc00;
300         }
301 
302         double positiveValue = Math.abs(value);
303         boolean isNegative = (value < 0.);
304         if (positiveValue < MIN_NORMAL) {
305             short quotient = (short) (positiveValue / MIN_VALUE);
306             return (isNegative) ? (short) (0x8000 | quotient) : quotient;
307         } else {
308             long valueBits = Double.doubleToLongBits(value);
309             long mantissa = valueBits & DOUBLE_MANTISSA_MASK; // 52-bit mantissa
310             long exponent = valueBits & DOUBLE_EXPONENT_MASK; // 11-bit exponent
311             long unbiasedExponent = (exponent >> 52) - 1023;
312 
313             short halfExponent = (short) ((unbiasedExponent + 15) << 10);
314             short halfMantissa = (short) (mantissa >> 42);
315             short halfValue = (short) (halfExponent | halfMantissa);
316             return (isNegative) ? (short) (0x8000 | halfValue) : halfValue;
317         }
318     }
319 
320 }
321