1 // Copyright 2019 The Amber Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "src/float16_helper.h" 16 17 #include <cassert> 18 19 // Float10 20 // | 9 8 7 6 5 | 4 3 2 1 0 | 21 // | exponent | mantissa | 22 // 23 // Float11 24 // | 10 9 8 7 6 | 5 4 3 2 1 0 | 25 // | exponent | mantissa | 26 // 27 // Float16 28 // | 15 | 14 13 12 11 10 | 9 8 7 6 5 4 3 2 1 0 | 29 // | s | exponent | mantissa | 30 // 31 // Float32 32 // | 31 | 30 ... 23 | 22 ... 0 | 33 // | s | exponent | mantissa | 34 35 namespace amber { 36 namespace float16 { 37 namespace { 38 39 // Return sign value of 32 bits float. FloatSign(const uint32_t hex_float)40uint16_t FloatSign(const uint32_t hex_float) { 41 return static_cast<uint16_t>(hex_float >> 31U); 42 } 43 44 // Return exponent value of 32 bits float. FloatExponent(const uint32_t hex_float)45uint16_t FloatExponent(const uint32_t hex_float) { 46 uint32_t exponent_bits = ((hex_float >> 23U) & ((1U << 8U) - 1U)); 47 // Handle zero and denormals. 48 if (exponent_bits == 0U) 49 return 0; 50 uint32_t exponent = exponent_bits - 112U; 51 const uint32_t half_exponent_mask = (1U << 5U) - 1U; 52 assert(((exponent & ~half_exponent_mask) == 0U) && "Float exponent overflow"); 53 return static_cast<uint16_t>(exponent & half_exponent_mask); 54 } 55 56 // Return mantissa value of 32 bits float. Note that mantissa for 32 57 // bits float is 23 bits and this method must return uint32_t. FloatMantissa(const uint32_t hex_float)58uint32_t FloatMantissa(const uint32_t hex_float) { 59 return static_cast<uint32_t>(hex_float & ((1U << 23U) - 1U)); 60 } 61 62 // Convert float |value| whose size is 16 bits to 32 bits float 63 // based on IEEE-754. HexFloat16ToFloat(const uint8_t * value)64float HexFloat16ToFloat(const uint8_t* value) { 65 uint32_t sign = (static_cast<uint32_t>(value[1]) & 0x80) << 24U; 66 uint32_t exponent_bits = (static_cast<uint32_t>(value[1]) & 0x7c) >> 2U; 67 uint32_t exponent = 0U; 68 uint32_t mantissa = 0U; 69 // Handle zero and flush denormals to zero. 70 if (exponent_bits != 0U) { 71 exponent = (exponent_bits + 112U) << 23U; 72 mantissa = ((static_cast<uint32_t>(value[1]) & 0x3) << 8U | 73 static_cast<uint32_t>(value[0])) 74 << 13U; 75 } 76 77 uint32_t hex = sign | exponent | mantissa; 78 float* hex_float = reinterpret_cast<float*>(&hex); 79 return *hex_float; 80 } 81 82 // Convert float |value| whose size is 11 bits to 32 bits float 83 // based on IEEE-754. HexFloat11ToFloat(const uint8_t * value)84float HexFloat11ToFloat(const uint8_t* value) { 85 uint32_t exponent = (((static_cast<uint32_t>(value[1]) << 2U) | 86 ((static_cast<uint32_t>(value[0]) & 0xc0) >> 6U)) + 87 112U) 88 << 23U; 89 uint32_t mantissa = (static_cast<uint32_t>(value[0]) & 0x3f) << 17U; 90 91 uint32_t hex = exponent | mantissa; 92 float* hex_float = reinterpret_cast<float*>(&hex); 93 return *hex_float; 94 } 95 96 // Convert float |value| whose size is 10 bits to 32 bits float 97 // based on IEEE-754. HexFloat10ToFloat(const uint8_t * value)98float HexFloat10ToFloat(const uint8_t* value) { 99 uint32_t exponent = (((static_cast<uint32_t>(value[1]) << 3U) | 100 ((static_cast<uint32_t>(value[0]) & 0xe0) >> 5U)) + 101 112U) 102 << 23U; 103 uint32_t mantissa = (static_cast<uint32_t>(value[0]) & 0x1f) << 18U; 104 105 uint32_t hex = exponent | mantissa; 106 float* hex_float = reinterpret_cast<float*>(&hex); 107 return *hex_float; 108 } 109 110 } // namespace 111 HexFloatToFloat(const uint8_t * value,uint8_t bits)112float HexFloatToFloat(const uint8_t* value, uint8_t bits) { 113 switch (bits) { 114 case 10: 115 return HexFloat10ToFloat(value); 116 case 11: 117 return HexFloat11ToFloat(value); 118 case 16: 119 return HexFloat16ToFloat(value); 120 } 121 122 assert(false && "Invalid bits"); 123 return 0; 124 } 125 FloatToHexFloat16(const float value)126uint16_t FloatToHexFloat16(const float value) { 127 const uint32_t* hex = reinterpret_cast<const uint32_t*>(&value); 128 uint16_t sign = FloatSign(*hex); 129 uint16_t exponent = FloatExponent(*hex); 130 // Flush denormals. 131 uint32_t mantissa = ((exponent == 0) ? 0U : FloatMantissa(*hex)); 132 return static_cast<uint16_t>(static_cast<uint16_t>(sign << 15U) | 133 static_cast<uint16_t>(exponent << 10U) | 134 static_cast<uint16_t>(mantissa >> 13U)); 135 } 136 137 } // namespace float16 138 } // namespace amber 139