/*
* Copyright (C) 2023 Huawei Device Co., Ltd.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef API_BASE_MATH_FLOAT_PACKER_H
#define API_BASE_MATH_FLOAT_PACKER_H
#include
#include
BASE_BEGIN_NAMESPACE()
namespace Math {
constexpr const uint32_t F32_EXPONENT_BITS = 0xFF;
constexpr const uint32_t F32_EXPONENT_SHIFT = 23;
constexpr const uint32_t F32_SIGN_BIT = 31;
constexpr const uint32_t F32_INFINITY = (F32_EXPONENT_BITS << F32_EXPONENT_SHIFT);
constexpr const uint32_t F16_EXPONENT_BITS = 0x1F;
constexpr const uint32_t F16_EXPONENT_SHIFT = 10;
constexpr const uint32_t F16_SIGN_BIT = 15;
constexpr const uint32_t F16_SIGN_SHIFT = (F32_SIGN_BIT - F16_SIGN_BIT);
constexpr const uint32_t F16_MANTISSA_SHIFT = (F32_EXPONENT_SHIFT - F16_EXPONENT_SHIFT);
constexpr const uint32_t F16_INFINITY = (F16_EXPONENT_BITS << F16_EXPONENT_SHIFT);
/** \addtogroup group_math_floatpacker
* @{
*/
/** Converts 32 bit floating point number to 16 bit float value
*/
inline uint16_t F32ToF16(float val)
{
union {
float f;
uint32_t ui;
} f32 { val };
uint32_t noSign = f32.ui & 0x7fffffff; // Non-sign bits
uint32_t sign = f32.ui & 0x80000000; // Sign bit
uint32_t exponent = f32.ui & F32_INFINITY; // Exponent
noSign >>= F16_MANTISSA_SHIFT; // Align mantissa on MSB
sign >>= F16_SIGN_SHIFT; // Shift sign bit into position
// 16bit bias = 15, 32bit bias = 127
// (-127 + 15) << 10 = (-112) << 10 = -0x1c000
noSign -= 0x1c000; // Adjust bias
// 16bit min exponent = -14, 32bit bias 127
// (-14 + 127) << 23 = 0x38800000
noSign = (exponent < 0x38800000) ? 0 : noSign; // Flush-to-zero
// 16bit max exponent = 15, 32bit bias 127
// (15 + 127) << 23 = 0x47000000
noSign = (exponent > 0x47000000) ? F16_INFINITY : noSign; // Clamp-to-inf
// Re-insert sign bit
return noSign | sign;
}
/** Converts 16 bit floating point number to 32 bit float
*/
inline constexpr float F16ToF32(uint16_t val)
{
union {
float f = 0.f;
uint32_t ui;
} f32;
uint32_t noSign = val & 0x7fff; // Non-sign bits
uint32_t sign = val & 0x8000; // Sign bit
uint32_t exponent = val & F16_INFINITY; // Exponent
noSign <<= F16_MANTISSA_SHIFT; // Align mantissa on MSB
sign <<= F16_SIGN_SHIFT; // Shift sign bit into position
// 16bit bias = 15, 32bit bias = 127
// (-15 + 127) << 23 = 0x38000000
noSign += 0x38000000; // Adjust bias
noSign = (exponent == 0 ? 0 : noSign); // Denormals-as-zero
noSign = (exponent == F16_INFINITY ? F32_INFINITY : noSign); // Clamp-to-inf
f32.ui = noSign | sign; // Re-insert sign bit
return f32.f;
}
/** Pack single vector2(32bit x 2) to 32 bit integer (unsigned packed values) */
inline uint32_t PackUnorm2X16(const Vec2& v)
{
union {
uint16_t in[2];
uint32_t out;
} u;
u.in[0] = uint16_t(round(clamp(v[0], 0, +1) * 65535.0f));
u.in[1] = uint16_t(round(clamp(v[1], 0, +1) * 65535.0f));
return u.out;
}
/** Unpack 32 bit integer to default lume vector2
*/
constexpr Vec2 UnpackUnorm2X16(uint32_t p)
{
const union {
uint32_t in;
uint16_t out[2];
} u { p };
return Vec2(u.out[0] * 1.5259021896696421759365224689097e-5f, u.out[1] * 1.5259021896696421759365224689097e-5f);
}
/** Pack single vector2(32bit x 2) to 32 bit integer (signed packed values)
*/
inline uint32_t PackSnorm2X16(const Vec2& v)
{
union {
int16_t in[2];
uint32_t out;
} u;
u.in[0] = (int16_t)(round(clamp(v.x, -1.0f, +1.0f) * 32767.0f));
u.in[1] = (int16_t)(round(clamp(v.y, -1.0f, +1.0f) * 32767.0f));
return u.out;
}
/** Unpack 32 bit integer to default lume vector2
*/
constexpr Vec2 UnpackSnorm2X16(uint32_t p)
{
const union {
uint32_t in;
int16_t out[2];
} u { p };
return Vec2(clamp(u.out[0] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f),
clamp(u.out[1] * 3.0518509475997192297128208258309e-5f, -1.0f, 1.0f));
}
/** Pack vector2 to 32 bit integer with half precision
*/
inline uint32_t PackHalf2X16(const Vec2& v)
{
const union {
uint16_t in[2];
uint32_t out;
} u { { F32ToF16(v.x), F32ToF16(v.y) } };
return u.out;
}
/** Unpack 32 bit integer to normal lume vector2 and rise precision from 16 bit to 32 bits
*/
constexpr Vec2 UnpackHalf2X16(uint32_t v)
{
const union {
uint32_t in;
uint16_t out[2];
} u { v };
return Vec2(F16ToF32(u.out[0]), F16ToF32(u.out[1]));
}
/** @} */
} // namespace Math
BASE_END_NAMESPACE()
#endif // API_BASE_MATH_FLOAT_PACKER_H