1 /*
2 * Copyright (C) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef API_BASE_MATH_FLOAT_PACKER_H
17 #define API_BASE_MATH_FLOAT_PACKER_H
18
19 #include <base/math/vector_util.h>
20 #include <base/namespace.h>
21
BASE_BEGIN_NAMESPACE()22 BASE_BEGIN_NAMESPACE()
23 namespace Math {
24 constexpr const uint32_t F32_EXPONENT_BITS = 0xFF;
25 constexpr const uint32_t F32_EXPONENT_SHIFT = 23;
26 constexpr const uint32_t F32_SIGN_BIT = 31;
27 constexpr const uint32_t F32_INFINITY = (F32_EXPONENT_BITS << F32_EXPONENT_SHIFT);
28
29 constexpr const uint32_t F16_EXPONENT_BITS = 0x1F;
30 constexpr const uint32_t F16_EXPONENT_SHIFT = 10;
31 constexpr const uint32_t F16_SIGN_BIT = 15;
32 constexpr const uint32_t F16_SIGN_SHIFT = (F32_SIGN_BIT - F16_SIGN_BIT);
33 constexpr const uint32_t F16_MANTISSA_SHIFT = (F32_EXPONENT_SHIFT - F16_EXPONENT_SHIFT);
34 constexpr const uint32_t F16_INFINITY = (F16_EXPONENT_BITS << F16_EXPONENT_SHIFT);
35
36 /** \addtogroup group_math_floatpacker
37 * @{
38 */
39 /** Converts 32 bit floating point number to 16 bit float value
40 */
41 inline uint16_t F32ToF16(float val)
42 {
43 union {
44 float f;
45 uint32_t ui;
46 } f32 { val };
47
48 uint32_t noSign = f32.ui & 0x7fffffff; // Non-sign bits
49 uint32_t sign = f32.ui & 0x80000000; // Sign bit
50 uint32_t exponent = f32.ui & F32_INFINITY; // Exponent
51
52 noSign >>= F16_MANTISSA_SHIFT; // Align mantissa on MSB
53 sign >>= F16_SIGN_SHIFT; // Shift sign bit into position
54
55 // 16bit bias = 15, 32bit bias = 127
56 // (-127 + 15) << 10 = (-112) << 10 = -0x1c000
57 noSign -= 0x1c000; // Adjust bias
58
59 // 16bit min exponent = -14, 32bit bias 127
60 // (-14 + 127) << 23 = 0x38800000
61 noSign = (exponent < 0x38800000) ? 0 : noSign; // Flush-to-zero
62 // 16bit max exponent = 15, 32bit bias 127
63 // (15 + 127) << 23 = 0x47000000
64 noSign = (exponent > 0x47000000) ? F16_INFINITY : noSign; // Clamp-to-inf
65
66 // Re-insert sign bit
67 return noSign | sign;
68 }
69
70 /** Converts 16 bit floating point number to 32 bit float
71 */
72 inline constexpr float F16ToF32(uint16_t val)
73 {
74 union {
75 float f = 0.f;
76 uint32_t ui;
77 } f32;
78
79 uint32_t noSign = val & 0x7fff; // Non-sign bits
80 uint32_t sign = val & 0x8000; // Sign bit
81 uint32_t exponent = val & F16_INFINITY; // Exponent
82
83 noSign <<= F16_MANTISSA_SHIFT; // Align mantissa on MSB
84 sign <<= F16_SIGN_SHIFT; // Shift sign bit into position
85
86 // 16bit bias = 15, 32bit bias = 127
87 // (-15 + 127) << 23 = 0x38000000
88 noSign += 0x38000000; // Adjust bias
89
90 noSign = (exponent == 0 ? 0 : noSign); // Denormals-as-zero
91 noSign = (exponent == F16_INFINITY ? F32_INFINITY : noSign); // Clamp-to-inf
92
93 f32.ui = noSign | sign; // Re-insert sign bit
94
95 return f32.f;
96 }
97
98 /** Pack single vector2(32bit x 2) to 32 bit integer (unsigned packed values) */
99 inline uint32_t PackUnorm2X16(const Vec2& v)
100 {
101 union {
102 uint16_t in[2];
103 uint32_t out;
104 } u;
105
106 u.in[0] = uint16_t(round(clamp(v[0], 0, +1) * 65535.0f));
107 u.in[1] = uint16_t(round(clamp(v[1], 0, +1) * 65535.0f));
108
109 return u.out;
110 }
111
112 /** Unpack 32 bit integer to default lume vector2
113 */
114 constexpr Vec2 UnpackUnorm2X16(uint32_t p)
115 {
116 const union {
117 uint32_t in;
118 uint16_t out[2];
119 } u { p };
120
121 return Vec2(u.out[0] * 1.5259021896696421759365224689097e-5f, u.out[1] * 1.5259021896696421759365224689097e-5f);
122 }
123
124 /** Pack single vector2(32bit x 2) to 32 bit integer (signed packed values)
125 */
126 inline uint32_t PackSnorm2X16(const Vec2& v)
127 {
128 union {
129 int16_t in[2];
130 uint32_t out;
131 } u;
132
133 u.in[0] = (int16_t)(round(clamp(v.x, -1.0f, +1.0f) * 32767.0f));
134 u.in[1] = (int16_t)(round(clamp(v.y, -1.0f, +1.0f) * 32767.0f));
135
136 return u.out;
137 }
138
139 /** Unpack 32 bit integer to default lume vector2
140 */
141 constexpr Vec2 UnpackSnorm2X16(uint32_t p)
142 {
143 const union {
144 uint32_t in;
145 int16_t out[2];
146 } u { p };
147
148 return Vec2(clamp(u.out[0] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f),
149 clamp(u.out[1] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f));
150 }
151
152 /** Pack vector2 to 32 bit integer with half precision
153 */
154 inline uint32_t PackHalf2X16(const Vec2& v)
155 {
156 const union {
157 uint16_t in[2];
158 uint32_t out;
159 } u { { F32ToF16(v.x), F32ToF16(v.y) } };
160
161 return u.out;
162 }
163
164 /** Unpack 32 bit integer to normal lume vector2 and rise precision from 16 bit to 32 bits
165 */
166 constexpr Vec2 UnpackHalf2X16(uint32_t v)
167 {
168 const union {
169 uint32_t in;
170 uint16_t out[2];
171 } u { v };
172
173 return Vec2(F16ToF32(u.out[0]), F16ToF32(u.out[1]));
174 }
175 /** @} */
176 } // namespace Math
177 BASE_END_NAMESPACE()
178
179 #endif // API_BASE_MATH_FLOAT_PACKER_H
180