1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdint.h>
17 #include <math.h>
18 #include "fplib.h"
19
20 #if !defined(FLT_MANT_DIG)
21 #define FLT_MANT_DIG 24
22 #endif
23 #define as_float(x) (*((float *)(&x)))
24 #define as_long(x) (*((int64_t *)(&x)))
25
clz(uint64_t value)26 static uint32_t clz(uint64_t value)
27 {
28 uint32_t num_zeros;
29
30 for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
31 {
32 volatile uint64_t v = 0x8000000000000000ull & (value << num_zeros);
33 if (v) break;
34 }
35 return num_zeros;
36 }
37
qcom_s64_2_f32(int64_t data,bool sat,roundingMode rnd)38 float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
39 {
40 switch (rnd) {
41 case qcomRTZ: {
42 int sign = 0;
43 if (!data)
44 return 0.0f;
45 if (data < 0){
46 data = - data;
47 sign = 1;
48 }
49 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
50 int mantShift = 40 - clz(data);
51 uint32_t mantissa;
52 if (mantShift >= 0)
53 mantissa = (uint32_t)((uint64_t)data >> mantShift);
54 else
55 mantissa = (uint32_t)((uint64_t)data << -mantShift);
56 mantissa &= 0x7fffff;//mask off the leading 1
57
58 uint32_t result = exponent | mantissa;
59 if (sign)
60 result |= 0x80000000;
61 return as_float(result);
62 break;
63 }
64 case qcomRTE: return (float)(data); break;
65 case qcomRTP: {
66 int sign = 0;
67 int inExact = 0;
68 uint32_t f = 0xdf000000;
69 if (!data)
70 return 0.0f;
71 if (data == 0x8000000000000000)
72 return as_float(f);
73 if (data < 0){
74 data = - data;
75 sign = 1;
76 }
77 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
78 int mantShift = 40 - clz(data);
79 uint32_t mantissa;
80 if (mantShift >= 0){
81 uint64_t temp = (uint64_t)data >> mantShift;
82 uint64_t mask = (1 << mantShift) - 1;
83 if ((temp << mantShift) != data)
84 inExact = 1;
85 mantissa = (uint32_t)temp;
86 }
87 else
88 {
89 mantissa = (uint32_t)((uint64_t)data << -mantShift);
90 }
91 mantissa &= 0x7fffff;//mask off the leading 1
92
93 uint32_t result = exponent | mantissa;
94 if (sign)
95 result |= 0x80000000;
96 if (sign)
97 return as_float(result); // for negative inputs return rtz results
98 else
99 {
100 if(inExact)
101 { // for positive inputs return higher next fp
102 uint32_t high_float = 0x7f7fffff;
103 return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
104 }
105 else
106 return as_float(result);
107 }
108 }
109 break;
110 case qcomRTN: {
111 int sign = 0;
112 int inExact = 0;
113 uint32_t f = 0xdf000000;
114 if (!data)
115 return 0.0f;
116 if (data == 0x8000000000000000)
117 return as_float(f);
118 if (data < 0){
119 data = - data;
120 sign = 1;
121 }
122 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
123 int mantShift = 40 - clz(data);
124 uint32_t mantissa;
125 if (mantShift >= 0){
126 uint64_t temp = (uint64_t)data >> mantShift;
127 uint64_t mask = (1 << mantShift) - 1;
128 if (temp << mantShift != data)
129 inExact = 1;
130 mantissa = (uint32_t)temp;
131 }
132 else
133 mantissa = (uint32_t)((uint64_t)data << -mantShift);
134 mantissa &= 0x7fffff;//mask off the leading 1
135
136 uint32_t result = exponent | mantissa;
137 if (sign)
138 result |= 0x80000000;
139 if (!sign)
140 return as_float(result); // for positive inputs return RTZ result
141 else{
142 if(inExact){ // for negative inputs find the lower next fp number
143 uint32_t low_float = 0xff7fffff;
144 return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation
145 }
146 else
147 return as_float(result);
148 }
149 }
150 case qcomRoundingModeCount: {
151 break; // Avoid build error for unhandled enum value
152 }
153 }
154 return 0.0f;
155 }
156
qcom_u64_2_f32(uint64_t data,bool sat,roundingMode rnd)157 float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
158 {
159 switch (rnd) {
160 case qcomRTZ: {
161 if (!data)
162 return 0.0f;
163 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
164 int mantShift = 40 - clz(data);
165 uint32_t mantissa;
166 if (mantShift >= 0)
167 mantissa = (uint32_t)(data >> mantShift);
168 else
169 mantissa = (uint32_t)(data << -mantShift);
170 mantissa &= 0x7fffff;//mask off the leading 1
171
172 uint32_t result = exponent | mantissa;
173 return as_float(result);
174 break;
175 }
176 case qcomRTE: return (float)(data); break;
177 case qcomRTP: {
178 int inExact = 0;
179 if (!data)
180 return 0.0f;
181 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
182 int mantShift = 40 - clz(data);
183 uint32_t mantissa;
184 if (mantShift >= 0){
185 uint64_t temp = data >> mantShift;
186 uint64_t mask = (1 << mantShift) - 1;
187 if (temp << mantShift != data)
188 inExact = 1;
189 mantissa = (uint32_t)temp;
190 }
191 else
192 mantissa = (uint32_t)(data << -mantShift);
193 mantissa &= 0x7fffff;//mask off the leading 1
194
195 uint32_t result = exponent | mantissa;
196 if(inExact){ // for positive inputs return higher next fp
197 uint32_t high_float = 0x7f7fffff;
198 return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
199 }
200 else
201 return as_float(result);
202 }
203 case qcomRTN: {
204 int inExact = 0;
205 if (!data)
206 return 0.0f;
207 uint32_t exponent = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
208 int mantShift = 40 - clz(data);
209 uint32_t mantissa;
210 if (mantShift >= 0){
211 uint64_t temp = (uint64_t)data >> mantShift;
212 uint64_t mask = (1 << mantShift) - 1;
213 if (temp << mantShift != data)
214 inExact = 1;
215 mantissa = (uint32_t)temp;
216 }
217 else
218 mantissa = (uint32_t)((uint64_t)data << -mantShift);
219 mantissa &= 0x7fffff;//mask off the leading 1
220
221 uint32_t result = exponent | mantissa;
222 return as_float(result); // for positive inputs return RTZ result
223 }
224 case qcomRoundingModeCount: {
225 break; // Avoid build error for unhandled enum value
226 }
227 }
228 return 0.0f;
229 }
230