• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef API_BASE_MATH_FLOAT_PACKER_H
17 #define API_BASE_MATH_FLOAT_PACKER_H
18 
19 #include <base/math/vector_util.h>
20 #include <base/namespace.h>
21 
BASE_BEGIN_NAMESPACE()22 BASE_BEGIN_NAMESPACE()
23 namespace Math {
24 constexpr const uint32_t F32_EXPONENT_BITS = 0xFF;
25 constexpr const uint32_t F32_EXPONENT_SHIFT = 23;
26 constexpr const uint32_t F32_SIGN_BIT = 31;
27 constexpr const uint32_t F32_INFINITY = (F32_EXPONENT_BITS << F32_EXPONENT_SHIFT);
28 
29 constexpr const uint32_t F16_EXPONENT_BITS = 0x1F;
30 constexpr const uint32_t F16_EXPONENT_SHIFT = 10;
31 constexpr const uint32_t F16_SIGN_BIT = 15;
32 constexpr const uint32_t F16_SIGN_SHIFT = (F32_SIGN_BIT - F16_SIGN_BIT);
33 constexpr const uint32_t F16_MANTISSA_SHIFT = (F32_EXPONENT_SHIFT - F16_EXPONENT_SHIFT);
34 constexpr const uint32_t F16_INFINITY = (F16_EXPONENT_BITS << F16_EXPONENT_SHIFT);
35 
36 /** \addtogroup group_math_floatpacker
37  *  @{
38  */
39 /** Converts 32 bit floating point number to 16 bit float value
40  */
41 inline uint16_t F32ToF16(float val)
42 {
43     union {
44         float f;
45         uint32_t ui;
46     } f32 { val };
47 
48     uint32_t noSign = f32.ui & 0x7fffffff;     // Non-sign bits
49     uint32_t sign = f32.ui & 0x80000000;       // Sign bit
50     uint32_t exponent = f32.ui & F32_INFINITY; // Exponent
51 
52     noSign >>= F16_MANTISSA_SHIFT; // Align mantissa on MSB
53     sign >>= F16_SIGN_SHIFT;       // Shift sign bit into position
54 
55     // 16bit bias = 15, 32bit bias = 127
56     // (-127 + 15) << 10 = (-112) << 10 = -0x1c000
57     noSign -= 0x1c000; // Adjust bias
58 
59     // 16bit min exponent = -14, 32bit bias 127
60     // (-14 + 127) << 23 = 0x38800000
61     noSign = (exponent < 0x38800000) ? 0 : noSign; // Flush-to-zero
62     // 16bit max exponent = 15, 32bit bias 127
63     // (15 + 127) << 23 = 0x47000000
64     noSign = (exponent > 0x47000000) ? F16_INFINITY : noSign; // Clamp-to-inf
65 
66     // Re-insert sign bit
67     return noSign | sign;
68 }
69 
70 /** Converts 16 bit floating point number to 32 bit float
71  */
72 inline constexpr float F16ToF32(uint16_t val)
73 {
74     union {
75         float f = 0.f;
76         uint32_t ui;
77     } f32;
78 
79     uint32_t noSign = val & 0x7fff;         // Non-sign bits
80     uint32_t sign = val & 0x8000;           // Sign bit
81     uint32_t exponent = val & F16_INFINITY; // Exponent
82 
83     noSign <<= F16_MANTISSA_SHIFT; // Align mantissa on MSB
84     sign <<= F16_SIGN_SHIFT;       // Shift sign bit into position
85 
86     // 16bit bias = 15, 32bit bias = 127
87     // (-15 + 127) << 23 = 0x38000000
88     noSign += 0x38000000; // Adjust bias
89 
90     noSign = (exponent == 0 ? 0 : noSign);                       // Denormals-as-zero
91     noSign = (exponent == F16_INFINITY ? F32_INFINITY : noSign); // Clamp-to-inf
92 
93     f32.ui = noSign | sign; // Re-insert sign bit
94 
95     return f32.f;
96 }
97 
98 /** Pack single vector2(32bit x 2) to 32 bit integer (unsigned packed values) */
99 inline uint32_t PackUnorm2X16(const Vec2& v)
100 {
101     union {
102         uint16_t in[2];
103         uint32_t out;
104     } u;
105 
106     u.in[0] = uint16_t(round(clamp(v[0], 0, +1) * 65535.0f));
107     u.in[1] = uint16_t(round(clamp(v[1], 0, +1) * 65535.0f));
108 
109     return u.out;
110 }
111 
112 /** Unpack 32 bit integer to default lume vector2
113  */
114 constexpr Vec2 UnpackUnorm2X16(uint32_t p)
115 {
116     const union {
117         uint32_t in;
118         uint16_t out[2];
119     } u { p };
120 
121     return Vec2(u.out[0] * 1.5259021896696421759365224689097e-5f, u.out[1] * 1.5259021896696421759365224689097e-5f);
122 }
123 
124 /** Pack single vector2(32bit x 2) to 32 bit integer (signed packed values)
125  */
126 inline uint32_t PackSnorm2X16(const Vec2& v)
127 {
128     union {
129         int16_t in[2];
130         uint32_t out;
131     } u;
132 
133     u.in[0] = (int16_t)(round(clamp(v.x, -1.0f, +1.0f) * 32767.0f));
134     u.in[1] = (int16_t)(round(clamp(v.y, -1.0f, +1.0f) * 32767.0f));
135 
136     return u.out;
137 }
138 
139 /** Unpack 32 bit integer to default lume vector2
140  */
141 constexpr Vec2 UnpackSnorm2X16(uint32_t p)
142 {
143     const union {
144         uint32_t in;
145         int16_t out[2];
146     } u { p };
147 
148     return Vec2(clamp(u.out[0] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f),
149         clamp(u.out[1] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f));
150 }
151 
152 /** Pack vector2 to 32 bit integer with half precision
153  */
154 inline uint32_t PackHalf2X16(const Vec2& v)
155 {
156     const union {
157         uint16_t in[2];
158         uint32_t out;
159     } u { { F32ToF16(v.x), F32ToF16(v.y) } };
160 
161     return u.out;
162 }
163 
164 /** Unpack 32 bit integer to normal lume vector2 and rise precision from 16 bit to 32 bits
165  */
166 constexpr Vec2 UnpackHalf2X16(uint32_t v)
167 {
168     const union {
169         uint32_t in;
170         uint16_t out[2];
171     } u { v };
172 
173     return Vec2(F16ToF32(u.out[0]), F16ToF32(u.out[1]));
174 }
175 /** @} */
176 } // namespace Math
177 BASE_END_NAMESPACE()
178 
179 #endif // API_BASE_MATH_FLOAT_PACKER_H
180