• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   * Mesa 3-D graphics library
3   *
4   * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
5   * Copyright 2015 Philip Taylor <philip@zaynar.co.uk>
6   * Copyright 2018 Advanced Micro Devices, Inc.
7   * Copyright (C) 2018-2019 Intel Corporation
8   *
9   * Permission is hereby granted, free of charge, to any person obtaining a
10   * copy of this software and associated documentation files (the "Software"),
11   * to deal in the Software without restriction, including without limitation
12   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13   * and/or sell copies of the Software, and to permit persons to whom the
14   * Software is furnished to do so, subject to the following conditions:
15   *
16   * The above copyright notice and this permission notice shall be included
17   * in all copies or substantial portions of the Software.
18   *
19   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23   * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24   * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25   * OTHER DEALINGS IN THE SOFTWARE.
26   */
27  
28  #include <math.h>
29  #include <assert.h>
30  #include "half_float.h"
31  #include "rounding.h"
32  #include "softfloat.h"
33  #include "macros.h"
34  #include "u_math.h"
35  
36  typedef union { float f; int32_t i; uint32_t u; } fi_type;
37  
38  /**
39   * Convert a 4-byte float to a 2-byte half float.
40   *
41   * Not all float32 values can be represented exactly as a float16 value. We
42   * round such intermediate float32 values to the nearest float16. When the
43   * float32 lies exactly between to float16 values, we round to the one with
44   * an even mantissa.
45   *
46   * This rounding behavior has several benefits:
47   *   - It has no sign bias.
48   *
49   *   - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's
50   *     GPU ISA.
51   *
52   *   - By reproducing the behavior of the GPU (at least on Intel hardware),
53   *     compile-time evaluation of constant packHalf2x16 GLSL expressions will
54   *     result in the same value as if the expression were executed on the GPU.
55   */
56  uint16_t
_mesa_float_to_half_slow(float val)57  _mesa_float_to_half_slow(float val)
58  {
59     const fi_type fi = {val};
60     const int flt_m = fi.i & 0x7fffff;
61     const int flt_e = (fi.i >> 23) & 0xff;
62     const int flt_s = (fi.i >> 31) & 0x1;
63     int s, e, m = 0;
64     uint16_t result;
65  
66     /* sign bit */
67     s = flt_s;
68  
69     /* handle special cases */
70     if ((flt_e == 0) && (flt_m == 0)) {
71        /* zero */
72        /* m = 0; - already set */
73        e = 0;
74     }
75     else if ((flt_e == 0) && (flt_m != 0)) {
76        /* denorm -- denorm float maps to 0 half */
77        /* m = 0; - already set */
78        e = 0;
79     }
80     else if ((flt_e == 0xff) && (flt_m == 0)) {
81        /* infinity */
82        /* m = 0; - already set */
83        e = 31;
84     }
85     else if ((flt_e == 0xff) && (flt_m != 0)) {
86        /* Retain the top bits of a NaN to make sure that the quiet/signaling
87         * status stays the same.
88         */
89        m = flt_m >> 13;
90        if (!m)
91           m = 1;
92        e = 31;
93     }
94     else {
95        /* regular number */
96        const int new_exp = flt_e - 127;
97        if (new_exp < -14) {
98           /* The float32 lies in the range (0.0, min_normal16) and is rounded
99            * to a nearby float16 value. The result will be either zero, subnormal,
100            * or normal.
101            */
102           e = 0;
103           m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f));
104        }
105        else if (new_exp > 15) {
106           /* map this value to infinity */
107           /* m = 0; - already set */
108           e = 31;
109        }
110        else {
111           /* The float32 lies in the range
112            *   [min_normal16, max_normal16 + max_step16)
113            * and is rounded to a nearby float16 value. The result will be
114            * either normal or infinite.
115            */
116           e = new_exp + 15;
117           m = _mesa_lroundevenf(flt_m / (float) (1 << 13));
118        }
119     }
120  
121     assert(0 <= m && m <= 1024);
122     if (m == 1024) {
123        /* The float32 was rounded upwards into the range of the next exponent,
124         * so bump the exponent. This correctly handles the case where f32
125         * should be rounded up to float16 infinity.
126         */
127        ++e;
128        m = 0;
129     }
130  
131     result = (s << 15) | (e << 10) | m;
132     return result;
133  }
134  
135  uint16_t
_mesa_float_to_float16_rtz_slow(float val)136  _mesa_float_to_float16_rtz_slow(float val)
137  {
138      return _mesa_float_to_half_rtz_slow(val);
139  }
140  
141  /**
142   * Convert a 2-byte half float to a 4-byte float.
143   * Based on code from:
144   * http://www.opengl.org/discussion_boards/ubb/Forum3/HTML/008786.html
145   */
146  float
_mesa_half_to_float_slow(uint16_t val)147  _mesa_half_to_float_slow(uint16_t val)
148  {
149     union fi infnan;
150     union fi magic;
151     union fi f32;
152  
153     infnan.ui = 0x8f << 23;
154     infnan.f = 65536.0f;
155     magic.ui  = 0xef << 23;
156  
157     /* Exponent / Mantissa */
158     f32.ui = (val & 0x7fff) << 13;
159  
160     /* Adjust */
161     f32.f *= magic.f;
162     /* XXX: The magic mul relies on denorms being available */
163  
164     /* Inf / NaN */
165     if (f32.f >= infnan.f)
166        f32.ui |= 0xff << 23;
167  
168     /* Sign */
169     f32.ui |= (uint32_t)(val & 0x8000) << 16;
170  
171     return f32.f;
172  }
173  
174  /**
175    * Convert 0.0 to 0x00, 1.0 to 0xff.
176    * Values outside the range [0.0, 1.0] will give undefined results.
177    */
_mesa_half_to_unorm8(uint16_t val)178  uint8_t _mesa_half_to_unorm8(uint16_t val)
179  {
180     const int m = val & 0x3ff;
181     const int e = (val >> 10) & 0x1f;
182     ASSERTED const int s = (val >> 15) & 0x1;
183  
184     /* v = round_to_nearest(1.mmmmmmmmmm * 2^(e-15) * 255)
185      *   = round_to_nearest((1.mmmmmmmmmm * 255) * 2^(e-15))
186      *   = round_to_nearest((1mmmmmmmmmm * 255) * 2^(e-25))
187      *   = round_to_zero((1mmmmmmmmmm * 255) * 2^(e-25) + 0.5)
188      *   = round_to_zero(((1mmmmmmmmmm * 255) * 2^(e-24) + 1) / 2)
189      *
190      * This happens to give the correct answer for zero/subnormals too
191      */
192     assert(s == 0 && val <= FP16_ONE); /* check 0 <= this <= 1 */
193     /* (implies e <= 15, which means the bit-shifts below are safe) */
194  
195     uint32_t v = ((1 << 10) | m) * 255;
196     v = ((v >> (24 - e)) + 1) >> 1;
197     return v;
198  }
199  
200  /**
201    * Takes a uint16_t, divides by 65536, converts the infinite-precision
202    * result to fp16 with round-to-zero. Used by the ASTC decoder.
203    */
_mesa_uint16_div_64k_to_half(uint16_t v)204  uint16_t _mesa_uint16_div_64k_to_half(uint16_t v)
205  {
206     /* Zero or subnormal. Set the mantissa to (v << 8) and return. */
207     if (v < 4)
208        return v << 8;
209  
210     /* Count the leading 0s in the uint16_t */
211  #ifdef HAVE___BUILTIN_CLZ
212     int n = __builtin_clz(v) - 16;
213  #else
214     int n = 16;
215     for (int i = 15; i >= 0; i--) {
216        if (v & (1 << i)) {
217           n = 15 - i;
218           break;
219        }
220     }
221  #endif
222  
223     /* Shift the mantissa up so bit 16 is the hidden 1 bit,
224      * mask it off, then shift back down to 10 bits
225      */
226     int m = ( ((uint32_t)v << (n + 1)) & 0xffff ) >> 6;
227  
228     /*  (0{n} 1 X{15-n}) * 2^-16
229      * = 1.X * 2^(15-n-16)
230      * = 1.X * 2^(14-n - 15)
231      * which is the FP16 form with e = 14 - n
232      */
233     int e = 14 - n;
234  
235     assert(e >= 1 && e <= 30);
236     assert(m >= 0 && m < 0x400);
237  
238     return (e << 10) | m;
239  }
240