• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The Amber Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/float16_helper.h"
16 
17 #include <cassert>
18 
19 // Float10
20 // | 9 8 7 6 5 | 4 3 2 1 0 |
21 // | exponent  | mantissa  |
22 //
23 // Float11
24 // | 10 9 8 7 6 | 5 4 3 2 1 0 |
25 // | exponent   |  mantissa   |
26 //
27 // Float16
28 // | 15 | 14 13 12 11 10 | 9 8 7 6 5 4 3 2 1 0 |
29 // | s  |     exponent   |  mantissa           |
30 //
31 // Float32
32 // | 31 | 30 ... 23 | 22 ... 0 |
33 // | s  |  exponent | mantissa |
34 
35 namespace amber {
36 namespace float16 {
37 namespace {
38 
39 // Return sign value of 32 bits float.
FloatSign(const uint32_t hex_float)40 uint16_t FloatSign(const uint32_t hex_float) {
41   return static_cast<uint16_t>(hex_float >> 31U);
42 }
43 
44 // Return exponent value of 32 bits float.
FloatExponent(const uint32_t hex_float)45 uint16_t FloatExponent(const uint32_t hex_float) {
46   uint32_t exponent_bits = ((hex_float >> 23U) & ((1U << 8U) - 1U));
47   // Handle zero and denormals.
48   if (exponent_bits == 0U)
49     return 0;
50   uint32_t exponent = exponent_bits - 112U;
51   const uint32_t half_exponent_mask = (1U << 5U) - 1U;
52   assert(((exponent & ~half_exponent_mask) == 0U) && "Float exponent overflow");
53   return static_cast<uint16_t>(exponent & half_exponent_mask);
54 }
55 
56 // Return mantissa value of 32 bits float. Note that mantissa for 32
57 // bits float is 23 bits and this method must return uint32_t.
FloatMantissa(const uint32_t hex_float)58 uint32_t FloatMantissa(const uint32_t hex_float) {
59   return static_cast<uint32_t>(hex_float & ((1U << 23U) - 1U));
60 }
61 
62 // Convert float |value| whose size is 16 bits to 32 bits float
63 // based on IEEE-754.
HexFloat16ToFloat(const uint8_t * value)64 float HexFloat16ToFloat(const uint8_t* value) {
65   uint32_t sign = (static_cast<uint32_t>(value[1]) & 0x80) << 24U;
66   uint32_t exponent_bits = (static_cast<uint32_t>(value[1]) & 0x7c) >> 2U;
67   uint32_t exponent = 0U;
68   uint32_t mantissa = 0U;
69   // Handle zero and flush denormals to zero.
70   if (exponent_bits != 0U) {
71     exponent = (exponent_bits + 112U) << 23U;
72     mantissa = ((static_cast<uint32_t>(value[1]) & 0x3) << 8U |
73                 static_cast<uint32_t>(value[0]))
74                << 13U;
75   }
76 
77   uint32_t hex = sign | exponent | mantissa;
78   float* hex_float = reinterpret_cast<float*>(&hex);
79   return *hex_float;
80 }
81 
82 // Convert float |value| whose size is 11 bits to 32 bits float
83 // based on IEEE-754.
HexFloat11ToFloat(const uint8_t * value)84 float HexFloat11ToFloat(const uint8_t* value) {
85   uint32_t exponent = (((static_cast<uint32_t>(value[1]) << 2U) |
86                         ((static_cast<uint32_t>(value[0]) & 0xc0) >> 6U)) +
87                        112U)
88                       << 23U;
89   uint32_t mantissa = (static_cast<uint32_t>(value[0]) & 0x3f) << 17U;
90 
91   uint32_t hex = exponent | mantissa;
92   float* hex_float = reinterpret_cast<float*>(&hex);
93   return *hex_float;
94 }
95 
96 // Convert float |value| whose size is 10 bits to 32 bits float
97 // based on IEEE-754.
HexFloat10ToFloat(const uint8_t * value)98 float HexFloat10ToFloat(const uint8_t* value) {
99   uint32_t exponent = (((static_cast<uint32_t>(value[1]) << 3U) |
100                         ((static_cast<uint32_t>(value[0]) & 0xe0) >> 5U)) +
101                        112U)
102                       << 23U;
103   uint32_t mantissa = (static_cast<uint32_t>(value[0]) & 0x1f) << 18U;
104 
105   uint32_t hex = exponent | mantissa;
106   float* hex_float = reinterpret_cast<float*>(&hex);
107   return *hex_float;
108 }
109 
110 }  // namespace
111 
HexFloatToFloat(const uint8_t * value,uint8_t bits)112 float HexFloatToFloat(const uint8_t* value, uint8_t bits) {
113   switch (bits) {
114     case 10:
115       return HexFloat10ToFloat(value);
116     case 11:
117       return HexFloat11ToFloat(value);
118     case 16:
119       return HexFloat16ToFloat(value);
120   }
121 
122   assert(false && "Invalid bits");
123   return 0;
124 }
125 
FloatToHexFloat16(const float value)126 uint16_t FloatToHexFloat16(const float value) {
127   const uint32_t* hex = reinterpret_cast<const uint32_t*>(&value);
128   uint16_t sign = FloatSign(*hex);
129   uint16_t exponent = FloatExponent(*hex);
130   // Flush denormals.
131   uint32_t mantissa = ((exponent == 0) ? 0U : FloatMantissa(*hex));
132   return static_cast<uint16_t>(static_cast<uint16_t>(sign << 15U) |
133                                static_cast<uint16_t>(exponent << 10U) |
134                                static_cast<uint16_t>(mantissa >> 13U));
135 }
136 
137 }  // namespace float16
138 }  // namespace amber
139