• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2021 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /*
19  * This module implements a variety of mathematical data types and library
20  * functions used by the codec.
21  */
22 
23 #ifndef ASTC_MATHLIB_H_INCLUDED
24 #define ASTC_MATHLIB_H_INCLUDED
25 
26 #include <cassert>
27 #include <cstdint>
28 #include <cmath>
29 
30 #ifndef ASTCENC_POPCNT
31   #if defined(__POPCNT__)
32     #define ASTCENC_POPCNT 1
33   #else
34     #define ASTCENC_POPCNT 0
35   #endif
36 #endif
37 
38 #ifndef ASTCENC_F16C
39   #if defined(__F16C__)
40     #define ASTCENC_F16C 1
41   #else
42     #define ASTCENC_F16C 0
43   #endif
44 #endif
45 
46 #ifndef ASTCENC_SSE
47   #if defined(__SSE4_2__)
48     #define ASTCENC_SSE 42
49   #elif defined(__SSE4_1__)
50     #define ASTCENC_SSE 41
51   #elif defined(__SSE3__)
52     #define ASTCENC_SSE 30
53   #elif defined(__SSE2__)
54     #define ASTCENC_SSE 20
55   #else
56     #define ASTCENC_SSE 0
57   #endif
58 #endif
59 
60 #ifndef ASTCENC_AVX
61   #if defined(__AVX2__)
62     #define ASTCENC_AVX 2
63   #elif defined(__AVX__)
64     #define ASTCENC_AVX 1
65   #else
66     #define ASTCENC_AVX 0
67   #endif
68 #endif
69 
70 #ifndef ASTCENC_NEON
71   #if defined(__aarch64__)
72     #define ASTCENC_NEON 1
73   #else
74     #define ASTCENC_NEON 0
75   #endif
76 #endif
77 
78 #if ASTCENC_AVX
79   #define ASTCENC_VECALIGN 32
80 #else
81   #define ASTCENC_VECALIGN 16
82 #endif
83 
84 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
85 	#include <immintrin.h>
86 #endif
87 
88 /* ============================================================================
89   Fast math library; note that many of the higher-order functions in this set
90   use approximations which are less accurate, but faster, than <cmath> standard
91   library equivalents.
92 
93   Note: Many of these are not necessarily faster than simple C versions when
94   used on a single scalar value, but are included for testing purposes as most
95   have an option based on SSE intrinsics and therefore provide an obvious route
96   to future vectorization.
97 ============================================================================ */
98 
99 // Union for manipulation of float bit patterns
100 typedef union
101 {
102 	uint32_t u;
103 	int32_t s;
104 	float f;
105 } if32;
106 
107 // These are namespaced to avoid colliding with C standard library functions.
108 namespace astc
109 {
110 
111 static const float PI          = 3.14159265358979323846f;
112 static const float PI_OVER_TWO = 1.57079632679489661923f;
113 
114 /**
115  * @brief SP float absolute value.
116  *
117  * @param v   The value to make absolute.
118  *
119  * @return The absolute value.
120  */
fabs(float v)121 static inline float fabs(float v)
122 {
123 	return std::fabs(v);
124 }
125 
126 /**
127  * @brief Test if a float value is a nan.
128  *
129  * @param v    The value test.
130  *
131  * @return Zero is not a NaN, non-zero otherwise.
132  */
isnan(float v)133 static inline bool isnan(float v)
134 {
135 	return v != v;
136 }
137 
138 /**
139  * @brief Return the minimum of two values.
140  *
141  * For floats, NaNs are turned into @c q.
142  *
143  * @param p   The first value to compare.
144  * @param q   The second value to compare.
145  *
146  * @return The smallest value.
147  */
148 template<typename T>
min(T p,T q)149 static inline T min(T p, T q)
150 {
151 	return p < q ? p : q;
152 }
153 
154 /**
155  * @brief Return the minimum of three values.
156  *
157  * For floats, NaNs are turned into @c r.
158  *
159  * @param p   The first value to compare.
160  * @param q   The second value to compare.
161  * @param r   The third value to compare.
162  *
163  * @return The smallest value.
164  */
165 template<typename T>
min(T p,T q,T r)166 static inline T min(T p, T q, T r)
167 {
168 	return min(min(p, q), r);
169 }
170 
171 /**
172  * @brief Return the minimum of four values.
173  *
174  * For floats, NaNs are turned into @c s.
175  *
176  * @param p   The first value to compare.
177  * @param q   The second value to compare.
178  * @param r   The third value to compare.
179  * @param s   The fourth value to compare.
180  *
181  * @return The smallest value.
182  */
183 template<typename T>
min(T p,T q,T r,T s)184 static inline T min(T p, T q, T r, T s)
185 {
186 	return min(min(p, q), min(r, s));
187 }
188 
189 /**
190  * @brief Return the maximum of two values.
191  *
192  * For floats, NaNs are turned into @c q.
193  *
194  * @param p   The first value to compare.
195  * @param q   The second value to compare.
196  *
197  * @return The largest value.
198  */
199 template<typename T>
max(T p,T q)200 static inline T max(T p, T q)
201 {
202 	return p > q ? p : q;
203 }
204 
205 /**
206  * @brief Return the maximum of three values.
207  *
208  * For floats, NaNs are turned into @c r.
209  *
210  * @param p   The first value to compare.
211  * @param q   The second value to compare.
212  * @param r   The third value to compare.
213  *
214  * @return The largest value.
215  */
216 template<typename T>
max(T p,T q,T r)217 static inline T max(T p, T q, T r)
218 {
219 	return max(max(p, q), r);
220 }
221 
222 /**
223  * @brief Return the maximum of four values.
224  *
225  * For floats, NaNs are turned into @c s.
226  *
227  * @param p   The first value to compare.
228  * @param q   The second value to compare.
229  * @param r   The third value to compare.
230  * @param s   The fourth value to compare.
231  *
232  * @return The largest value.
233  */
234 template<typename T>
max(T p,T q,T r,T s)235 static inline T max(T p, T q, T r, T s)
236 {
237 	return max(max(p, q), max(r, s));
238 }
239 
240 /**
241  * @brief Clamp a value value between @c mn and @c mx.
242  *
243  * For floats, NaNs are turned into @c mn.
244  *
245  * @param v      The value to clamp.
246  * @param mn     The min value (inclusive).
247  * @param mx     The max value (inclusive).
248  *
249  * @return The clamped value.
250  */
251 template<typename T>
clamp(T v,T mn,T mx)252 inline T clamp(T v, T mn, T mx)
253 {
254 	// Do not reorder; correct NaN handling relies on the fact that comparison
255 	// with NaN returns false and will fall-though to the "min" value.
256 	if (v > mx) return mx;
257 	if (v > mn) return v;
258 	return mn;
259 }
260 
261 /**
262  * @brief Clamp a float value between 0.0f and 1.0f.
263  *
264  * NaNs are turned into 0.0f.
265  *
266  * @param v   The value to clamp.
267  *
268  * @return The clamped value.
269  */
clamp1f(float v)270 static inline float clamp1f(float v)
271 {
272 	return astc::clamp(v, 0.0f, 1.0f);
273 }
274 
275 /**
276  * @brief Clamp a float value between 0.0f and 255.0f.
277  *
278  * NaNs are turned into 0.0f.
279  *
280  * @param v  The value to clamp.
281  *
282  * @return The clamped value.
283  */
clamp255f(float v)284 static inline float clamp255f(float v)
285 {
286 	return astc::clamp(v, 0.0f, 255.0f);
287 }
288 
289 /**
290  * @brief SP float round-down.
291  *
292  * @param v   The value to round.
293  *
294  * @return The rounded value.
295  */
flt_rd(float v)296 static inline float flt_rd(float v)
297 {
298 	return std::floor(v);
299 }
300 
301 /**
302  * @brief SP float round-to-nearest and convert to integer.
303  *
304  * @param v   The value to round.
305  *
306  * @return The rounded value.
307  */
flt2int_rtn(float v)308 static inline int flt2int_rtn(float v)
309 {
310 
311 	return static_cast<int>(v + 0.5f);
312 }
313 
314 /**
315  * @brief SP float round down and convert to integer.
316  *
317  * @param v   The value to round.
318  *
319  * @return The rounded value.
320  */
flt2int_rd(float v)321 static inline int flt2int_rd(float v)
322 {
323 	return static_cast<int>(v);
324 }
325 
326 /**
327  * @brief SP float bit-interpreted as an integer.
328  *
329  * @param v   The value to bitcast.
330  *
331  * @return The converted value.
332  */
float_as_int(float v)333 static inline int float_as_int(float v)
334 {
335 	union { int a; float b; } u;
336 	u.b = v;
337 	return u.a;
338 }
339 
340 /**
341  * @brief Integer bit-interpreted as an SP float.
342  *
343  * @param v   The value to bitcast.
344  *
345  * @return The converted value.
346  */
int_as_float(int v)347 static inline float int_as_float(int v)
348 {
349 	union { int a; float b; } u;
350 	u.a = v;
351 	return u.b;
352 }
353 
354 /**
355  * @brief Fast approximation of 1.0 / sqrt(val).
356  *
357  * @param v   The input value.
358  *
359  * @return The approximated result.
360  */
rsqrt(float v)361 static inline float rsqrt(float v)
362 {
363 	return 1.0f / std::sqrt(v);
364 }
365 
366 /**
367  * @brief Fast approximation of sqrt(val).
368  *
369  * @param v   The input value.
370  *
371  * @return The approximated result.
372  */
sqrt(float v)373 static inline float sqrt(float v)
374 {
375 	return std::sqrt(v);
376 }
377 
378 /**
379  * @brief Extract mantissa and exponent of a float value.
380  *
381  * @param      v      The input value.
382  * @param[out] expo   The output exponent.
383  *
384  * @return The mantissa.
385  */
frexp(float v,int * expo)386 static inline float frexp(float v, int* expo)
387 {
388 	if32 p;
389 	p.f = v;
390 	*expo = ((p.u >> 23) & 0xFF) - 126;
391 	p.u = (p.u & 0x807fffff) | 0x3f000000;
392 	return p.f;
393 }
394 
395 /**
396  * @brief Initialize the seed structure for a random number generator.
397  *
398  * Important note: For the purposes of ASTC we want sets of random numbers to
399  * use the codec, but we want the same seed value across instances and threads
400  * to ensure that image output is stable across compressor runs and across
401  * platforms. Every PRNG created by this call will therefore return the same
402  * sequence of values ...
403  *
404  * @param state The state structure to initialize.
405  */
406 void rand_init(uint64_t state[2]);
407 
408 /**
409  * @brief Return the next random number from the generator.
410  *
411  * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
412  * public-domain implementation given by David Blackman & Sebastiano Vigna at
413  * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
414  *
415  * @param state The state structure to use/update.
416  */
417 uint64_t rand(uint64_t state[2]);
418 
419 }
420 
421 /* ============================================================================
422   Softfloat library with fp32 and fp16 conversion functionality.
423 ============================================================================ */
424 #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
425 	/* narrowing float->float conversions */
426 	uint16_t float_to_sf16(float val);
427 	float sf16_to_float(uint16_t val);
428 #endif
429 
430 /*********************************
431   Vector library
432 *********************************/
433 #include "astcenc_vecmathlib.h"
434 
435 /*********************************
436   Declaration of line types
437 *********************************/
438 // parametric line, 2D: The line is given by line = a + b * t.
439 
440 struct line2
441 {
442 	vfloat4 a;
443 	vfloat4 b;
444 };
445 
446 // parametric line, 3D
447 struct line3
448 {
449 	vfloat4 a;
450 	vfloat4 b;
451 };
452 
453 struct line4
454 {
455 	vfloat4 a;
456 	vfloat4 b;
457 };
458 
459 
460 struct processed_line2
461 {
462 	vfloat4 amod;
463 	vfloat4 bs;
464 };
465 
466 struct processed_line3
467 {
468 	vfloat4 amod;
469 	vfloat4 bs;
470 };
471 
472 struct processed_line4
473 {
474 	vfloat4 amod;
475 	vfloat4 bs;
476 };
477 
478 #endif
479