• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef sw_Half_hpp
16 #define sw_Half_hpp
17 
18 #include "Math.hpp"
19 
20 #include <algorithm>
21 #include <cmath>
22 
23 namespace sw {
24 
25 class half
26 {
27 public:
28 	half() = default;
29 	explicit half(float f);
30 
31 	operator float() const;
32 
33 	half &operator=(half h);
34 	half &operator=(float f);
35 
36 private:
37 	unsigned short fp16i;
38 };
39 
shortAsHalf(short s)40 inline half shortAsHalf(short s)
41 {
42 	union
43 	{
44 		half h;
45 		short s;
46 	} hs;
47 
48 	hs.s = s;
49 
50 	return hs.h;
51 }
52 
53 class RGB9E5
54 {
55 	unsigned int R : 9;
56 	unsigned int G : 9;
57 	unsigned int B : 9;
58 	unsigned int E : 5;
59 
60 public:
RGB9E5(float rgb[3])61 	RGB9E5(float rgb[3])
62 	    : RGB9E5(rgb[0], rgb[1], rgb[2])
63 	{
64 	}
65 
RGB9E5(float r,float g,float b)66 	RGB9E5(float r, float g, float b)
67 	{
68 		// Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
69 
70 		// B is the exponent bias (15)
71 		constexpr int g_sharedexp_bias = 15;
72 
73 		// N is the number of mantissa bits per component (9)
74 		constexpr int g_sharedexp_mantissabits = 9;
75 
76 		// Emax is the maximum allowed biased exponent value (31)
77 		constexpr int g_sharedexp_maxexponent = 31;
78 
79 		constexpr float g_sharedexp_max =
80 		    ((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
81 		     static_cast<float>(1 << g_sharedexp_mantissabits)) *
82 		    static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));
83 
84 		// Clamp components to valid range. NaN becomes 0.
85 		const float red_c = std::min(!(r > 0) ? 0 : r, g_sharedexp_max);
86 		const float green_c = std::min(!(g > 0) ? 0 : g, g_sharedexp_max);
87 		const float blue_c = std::min(!(b > 0) ? 0 : b, g_sharedexp_max);
88 
89 		// We're reducing the mantissa to 9 bits, so we must round up if the next
90 		// bit is 1. In other words add 0.5 to the new mantissa's position and
91 		// allow overflow into the exponent so we can scale correctly.
92 		constexpr int half = 1 << (23 - g_sharedexp_mantissabits);
93 		const float red_r = bit_cast<float>(bit_cast<int>(red_c) + half);
94 		const float green_r = bit_cast<float>(bit_cast<int>(green_c) + half);
95 		const float blue_r = bit_cast<float>(bit_cast<int>(blue_c) + half);
96 
97 		// The largest component determines the shared exponent. It can't be lower
98 		// than 0 (after bias subtraction) so also limit to the mimimum representable.
99 		constexpr float min_s = 0.5f / (1 << g_sharedexp_bias);
100 		float max_s = std::max(std::max(red_r, green_r), std::max(blue_r, min_s));
101 
102 		// Obtain the reciprocal of the shared exponent by inverting the bits,
103 		// and scale by the new mantissa's size. Note that the IEEE-754 single-precision
104 		// format has an implicit leading 1, but this shared component format does not.
105 		float scale = bit_cast<float>((bit_cast<int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (g_sharedexp_mantissabits - 2));
106 
107 		R = static_cast<unsigned int>(round(red_c * scale));
108 		G = static_cast<unsigned int>(round(green_c * scale));
109 		B = static_cast<unsigned int>(round(blue_c * scale));
110 		E = (bit_cast<unsigned int>(max_s) >> 23) - 127 + 15 + 1;
111 	}
112 
operator unsigned int() const113 	operator unsigned int() const
114 	{
115 		return *reinterpret_cast<const unsigned int *>(this);
116 	}
117 
toRGB16F(half rgb[3]) const118 	void toRGB16F(half rgb[3]) const
119 	{
120 		constexpr int offset = 24;  // Exponent bias (15) + number of mantissa bits per component (9) = 24
121 
122 		const float factor = (1u << E) * (1.0f / (1 << offset));
123 		rgb[0] = half(R * factor);
124 		rgb[1] = half(G * factor);
125 		rgb[2] = half(B * factor);
126 	}
127 };
128 
129 class R11G11B10F
130 {
131 public:
R11G11B10F(float rgb[3])132 	R11G11B10F(float rgb[3])
133 	{
134 		R = float32ToFloat11(rgb[0]);
135 		G = float32ToFloat11(rgb[1]);
136 		B = float32ToFloat10(rgb[2]);
137 	}
138 
operator unsigned int() const139 	operator unsigned int() const
140 	{
141 		return *reinterpret_cast<const unsigned int *>(this);
142 	}
143 
toRGB16F(half rgb[3]) const144 	void toRGB16F(half rgb[3]) const
145 	{
146 		rgb[0] = float11ToFloat16(R);
147 		rgb[1] = float11ToFloat16(G);
148 		rgb[2] = float10ToFloat16(B);
149 	}
150 
float11ToFloat16(unsigned short fp11)151 	static inline half float11ToFloat16(unsigned short fp11)
152 	{
153 		return shortAsHalf(fp11 << 4);  // Sign bit 0
154 	}
155 
float10ToFloat16(unsigned short fp10)156 	static inline half float10ToFloat16(unsigned short fp10)
157 	{
158 		return shortAsHalf(fp10 << 5);  // Sign bit 0
159 	}
160 
float32ToFloat11(float fp32)161 	static inline unsigned short float32ToFloat11(float fp32)
162 	{
163 		const unsigned int float32MantissaMask = 0x7FFFFF;
164 		const unsigned int float32ExponentMask = 0x7F800000;
165 		const unsigned int float32SignMask = 0x80000000;
166 		const unsigned int float32ValueMask = ~float32SignMask;
167 		const unsigned int float32ExponentFirstBit = 23;
168 		const unsigned int float32ExponentBias = 127;
169 
170 		const unsigned short float11Max = 0x7BF;
171 		const unsigned short float11MantissaMask = 0x3F;
172 		const unsigned short float11ExponentMask = 0x7C0;
173 		const unsigned short float11BitMask = 0x7FF;
174 		const unsigned int float11ExponentBias = 14;
175 
176 		const unsigned int float32Maxfloat11 = 0x477E0000;
177 		const unsigned int float32MinNormfloat11 = 0x38800000;
178 		const unsigned int float32MinDenormfloat11 = 0x35000080;
179 
180 		const unsigned int float32Bits = *reinterpret_cast<unsigned int *>(&fp32);
181 		const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
182 
183 		unsigned int float32Val = float32Bits & float32ValueMask;
184 
185 		if((float32Val & float32ExponentMask) == float32ExponentMask)
186 		{
187 			// INF or NAN
188 			if((float32Val & float32MantissaMask) != 0)
189 			{
190 				return float11ExponentMask |
191 				       (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
192 				        float11MantissaMask);
193 			}
194 			else if(float32Sign)
195 			{
196 				// -INF is clamped to 0 since float11 is positive only
197 				return 0;
198 			}
199 			else
200 			{
201 				return float11ExponentMask;
202 			}
203 		}
204 		else if(float32Sign)
205 		{
206 			// float11 is positive only, so clamp to zero
207 			return 0;
208 		}
209 		else if(float32Val > float32Maxfloat11)
210 		{
211 			// The number is too large to be represented as a float11, set to max
212 			return float11Max;
213 		}
214 		else if(float32Val < float32MinDenormfloat11)
215 		{
216 			// The number is too small to be represented as a denormalized float11, set to 0
217 			return 0;
218 		}
219 		else
220 		{
221 			if(float32Val < float32MinNormfloat11)
222 			{
223 				// The number is too small to be represented as a normalized float11
224 				// Convert it to a denormalized value.
225 				const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
226 				                           (float32Val >> float32ExponentFirstBit);
227 				float32Val =
228 				    ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
229 			}
230 			else
231 			{
232 				// Rebias the exponent to represent the value as a normalized float11
233 				float32Val += 0xC8000000;
234 			}
235 
236 			return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
237 		}
238 	}
239 
float32ToFloat10(float fp32)240 	static inline unsigned short float32ToFloat10(float fp32)
241 	{
242 		const unsigned int float32MantissaMask = 0x7FFFFF;
243 		const unsigned int float32ExponentMask = 0x7F800000;
244 		const unsigned int float32SignMask = 0x80000000;
245 		const unsigned int float32ValueMask = ~float32SignMask;
246 		const unsigned int float32ExponentFirstBit = 23;
247 		const unsigned int float32ExponentBias = 127;
248 
249 		const unsigned short float10Max = 0x3DF;
250 		const unsigned short float10MantissaMask = 0x1F;
251 		const unsigned short float10ExponentMask = 0x3E0;
252 		const unsigned short float10BitMask = 0x3FF;
253 		const unsigned int float10ExponentBias = 14;
254 
255 		const unsigned int float32Maxfloat10 = 0x477C0000;
256 		const unsigned int float32MinNormfloat10 = 0x38800000;
257 		const unsigned int float32MinDenormfloat10 = 0x35800040;
258 
259 		const unsigned int float32Bits = *reinterpret_cast<unsigned int *>(&fp32);
260 		const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
261 
262 		unsigned int float32Val = float32Bits & float32ValueMask;
263 
264 		if((float32Val & float32ExponentMask) == float32ExponentMask)
265 		{
266 			// INF or NAN
267 			if((float32Val & float32MantissaMask) != 0)
268 			{
269 				return float10ExponentMask |
270 				       (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
271 				        float10MantissaMask);
272 			}
273 			else if(float32Sign)
274 			{
275 				// -INF is clamped to 0 since float10 is positive only
276 				return 0;
277 			}
278 			else
279 			{
280 				return float10ExponentMask;
281 			}
282 		}
283 		else if(float32Sign)
284 		{
285 			// float10 is positive only, so clamp to zero
286 			return 0;
287 		}
288 		else if(float32Val > float32Maxfloat10)
289 		{
290 			// The number is too large to be represented as a float10, set to max
291 			return float10Max;
292 		}
293 		else if(float32Val < float32MinDenormfloat10)
294 		{
295 			// The number is too small to be represented as a denormalized float10, set to 0
296 			return 0;
297 		}
298 		else
299 		{
300 			if(float32Val < float32MinNormfloat10)
301 			{
302 				// The number is too small to be represented as a normalized float10
303 				// Convert it to a denormalized value.
304 				const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
305 				                           (float32Val >> float32ExponentFirstBit);
306 				float32Val =
307 				    ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
308 			}
309 			else
310 			{
311 				// Rebias the exponent to represent the value as a normalized float10
312 				float32Val += 0xC8000000;
313 			}
314 
315 			return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
316 		}
317 	}
318 
319 private:
320 	unsigned int R : 11;
321 	unsigned int G : 11;
322 	unsigned int B : 10;
323 };
324 
325 }  // namespace sw
326 
327 #endif  // sw_Half_hpp
328