// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef sw_Half_hpp #define sw_Half_hpp #include "Math.hpp" #include #include namespace sw { class half { public: half() = default; explicit half(float f); operator float() const; half &operator=(float f); private: unsigned short fp16i; }; inline half shortAsHalf(short s) { union { half h; short s; } hs; hs.s = s; return hs.h; } class RGB9E5 { union { struct { unsigned int R : 9; unsigned int G : 9; unsigned int B : 9; unsigned int E : 5; }; uint32_t packed; }; public: RGB9E5(const float rgb[3]) : RGB9E5(rgb[0], rgb[1], rgb[2]) { } RGB9E5(float r, float g, float b) { // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion // B is the exponent bias (15) constexpr int g_sharedexp_bias = 15; // N is the number of mantissa bits per component (9) constexpr int g_sharedexp_mantissabits = 9; // Emax is the maximum allowed biased exponent value (31) constexpr int g_sharedexp_maxexponent = 31; constexpr float g_sharedexp_max = ((static_cast(1 << g_sharedexp_mantissabits) - 1) / static_cast(1 << g_sharedexp_mantissabits)) * static_cast(1 << (g_sharedexp_maxexponent - g_sharedexp_bias)); // Clamp components to valid range. NaN becomes 0. const float red_c = std::min(!(r > 0) ? 0 : r, g_sharedexp_max); const float green_c = std::min(!(g > 0) ? 0 : g, g_sharedexp_max); const float blue_c = std::min(!(b > 0) ? 0 : b, g_sharedexp_max); // We're reducing the mantissa to 9 bits, so we must round up if the next // bit is 1. In other words add 0.5 to the new mantissa's position and // allow overflow into the exponent so we can scale correctly. constexpr int half = 1 << (23 - g_sharedexp_mantissabits); const float red_r = bit_cast(bit_cast(red_c) + half); const float green_r = bit_cast(bit_cast(green_c) + half); const float blue_r = bit_cast(bit_cast(blue_c) + half); // The largest component determines the shared exponent. It can't be lower // than 0 (after bias subtraction) so also limit to the mimimum representable. constexpr float min_s = 0.5f / (1 << g_sharedexp_bias); float max_s = std::max(std::max(red_r, green_r), std::max(blue_r, min_s)); // Obtain the reciprocal of the shared exponent by inverting the bits, // and scale by the new mantissa's size. Note that the IEEE-754 single-precision // format has an implicit leading 1, but this shared component format does not. float scale = bit_cast((bit_cast(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (g_sharedexp_mantissabits - 2)); R = static_cast(round(red_c * scale)); G = static_cast(round(green_c * scale)); B = static_cast(round(blue_c * scale)); E = (bit_cast(max_s) >> 23) - 127 + 15 + 1; } operator unsigned int() const { return packed; } void toRGB16F(half rgb[3]) const { constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24 const float factor = (1u << E) * (1.0f / (1 << offset)); rgb[0] = half(R * factor); rgb[1] = half(G * factor); rgb[2] = half(B * factor); } }; class R11G11B10F { union { struct { unsigned int R : 11; unsigned int G : 11; unsigned int B : 10; }; uint32_t packed; }; public: R11G11B10F(const float rgb[3]) { R = float32ToFloat11(rgb[0]); G = float32ToFloat11(rgb[1]); B = float32ToFloat10(rgb[2]); } operator unsigned int() const { return packed; } void toRGB16F(half rgb[3]) const { rgb[0] = float11ToFloat16(R); rgb[1] = float11ToFloat16(G); rgb[2] = float10ToFloat16(B); } static inline half float11ToFloat16(unsigned short fp11) { return shortAsHalf(fp11 << 4); // Sign bit 0 } static inline half float10ToFloat16(unsigned short fp10) { return shortAsHalf(fp10 << 5); // Sign bit 0 } static inline unsigned short float32ToFloat11(float fp32) { const unsigned int float32MantissaMask = 0x7FFFFF; const unsigned int float32ExponentMask = 0x7F800000; const unsigned int float32SignMask = 0x80000000; const unsigned int float32ValueMask = ~float32SignMask; const unsigned int float32ExponentFirstBit = 23; const unsigned int float32ExponentBias = 127; const unsigned short float11Max = 0x7BF; const unsigned short float11MantissaMask = 0x3F; const unsigned short float11ExponentMask = 0x7C0; const unsigned short float11BitMask = 0x7FF; const unsigned int float11ExponentBias = 14; const unsigned int float32Maxfloat11 = 0x477E0000; const unsigned int float32MinNormfloat11 = 0x38800000; const unsigned int float32MinDenormfloat11 = 0x35000080; const unsigned int float32Bits = bit_cast(fp32); const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask; unsigned int float32Val = float32Bits & float32ValueMask; if((float32Val & float32ExponentMask) == float32ExponentMask) { // INF or NAN if((float32Val & float32MantissaMask) != 0) { return float11ExponentMask | (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) & float11MantissaMask); } else if(float32Sign) { // -INF is clamped to 0 since float11 is positive only return 0; } else { return float11ExponentMask; } } else if(float32Sign) { // float11 is positive only, so clamp to zero return 0; } else if(float32Val > float32Maxfloat11) { // The number is too large to be represented as a float11, set to max return float11Max; } else if(float32Val < float32MinDenormfloat11) { // The number is too small to be represented as a denormalized float11, set to 0 return 0; } else { if(float32Val < float32MinNormfloat11) { // The number is too small to be represented as a normalized float11 // Convert it to a denormalized value. const unsigned int shift = (float32ExponentBias - float11ExponentBias) - (float32Val >> float32ExponentFirstBit); float32Val = ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift; } else { // Rebias the exponent to represent the value as a normalized float11 float32Val += 0xC8000000; } return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask; } } static inline unsigned short float32ToFloat10(float fp32) { const unsigned int float32MantissaMask = 0x7FFFFF; const unsigned int float32ExponentMask = 0x7F800000; const unsigned int float32SignMask = 0x80000000; const unsigned int float32ValueMask = ~float32SignMask; const unsigned int float32ExponentFirstBit = 23; const unsigned int float32ExponentBias = 127; const unsigned short float10Max = 0x3DF; const unsigned short float10MantissaMask = 0x1F; const unsigned short float10ExponentMask = 0x3E0; const unsigned short float10BitMask = 0x3FF; const unsigned int float10ExponentBias = 14; const unsigned int float32Maxfloat10 = 0x477C0000; const unsigned int float32MinNormfloat10 = 0x38800000; const unsigned int float32MinDenormfloat10 = 0x35800040; const unsigned int float32Bits = bit_cast(fp32); const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask; unsigned int float32Val = float32Bits & float32ValueMask; if((float32Val & float32ExponentMask) == float32ExponentMask) { // INF or NAN if((float32Val & float32MantissaMask) != 0) { return float10ExponentMask | (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) & float10MantissaMask); } else if(float32Sign) { // -INF is clamped to 0 since float10 is positive only return 0; } else { return float10ExponentMask; } } else if(float32Sign) { // float10 is positive only, so clamp to zero return 0; } else if(float32Val > float32Maxfloat10) { // The number is too large to be represented as a float10, set to max return float10Max; } else if(float32Val < float32MinDenormfloat10) { // The number is too small to be represented as a denormalized float10, set to 0 return 0; } else { if(float32Val < float32MinNormfloat10) { // The number is too small to be represented as a normalized float10 // Convert it to a denormalized value. const unsigned int shift = (float32ExponentBias - float10ExponentBias) - (float32Val >> float32ExponentFirstBit); float32Val = ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift; } else { // Rebias the exponent to represent the value as a normalized float10 float32Val += 0xC8000000; } return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask; } } }; } // namespace sw #endif // sw_Half_hpp