• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2024 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /*
19  * This module implements a variety of mathematical data types and library
20  * functions used by the codec.
21  */
22 
23 #ifndef ASTC_MATHLIB_H_INCLUDED
24 #define ASTC_MATHLIB_H_INCLUDED
25 
26 #include <cassert>
27 #include <cstdint>
28 #include <cmath>
29 
30 #ifndef ASTCENC_POPCNT
31   #if defined(__POPCNT__)
32     #define ASTCENC_POPCNT 1
33   #else
34     #define ASTCENC_POPCNT 0
35   #endif
36 #endif
37 
38 #ifndef ASTCENC_F16C
39   #if defined(__F16C__)
40     #define ASTCENC_F16C 1
41   #else
42     #define ASTCENC_F16C 0
43   #endif
44 #endif
45 
46 #ifndef ASTCENC_SSE
47   #if defined(__SSE4_2__)
48     #define ASTCENC_SSE 42
49   #elif defined(__SSE4_1__)
50     #define ASTCENC_SSE 41
51   #elif defined(__SSE2__)
52     #define ASTCENC_SSE 20
53   #else
54     #define ASTCENC_SSE 0
55   #endif
56 #endif
57 
58 #ifndef ASTCENC_AVX
59   #if defined(__AVX2__)
60     #define ASTCENC_AVX 2
61   #elif defined(__AVX__)
62     #define ASTCENC_AVX 1
63   #else
64     #define ASTCENC_AVX 0
65   #endif
66 #endif
67 
68 #ifndef ASTCENC_NEON
69   #if defined(__aarch64__)
70     #define ASTCENC_NEON 1
71   #else
72     #define ASTCENC_NEON 0
73   #endif
74 #endif
75 
76 // Force vector-sized SIMD alignment
77 #if ASTCENC_AVX
78   #define ASTCENC_VECALIGN 32
79 #elif ASTCENC_SSE || ASTCENC_NEON
80   #define ASTCENC_VECALIGN 16
81 // Use default alignment for non-SIMD builds
82 #else
83   #define ASTCENC_VECALIGN 0
84 #endif
85 
86 // C++11 states that alignas(0) should be ignored but GCC doesn't do
87 // this on some versions, so workaround and avoid emitting alignas(0)
88 #if ASTCENC_VECALIGN > 0
89 	#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
90 #else
91 	#define ASTCENC_ALIGNAS
92 #endif
93 
94 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
95 	#include <immintrin.h>
96 #endif
97 
98 /* ============================================================================
99   Fast math library; note that many of the higher-order functions in this set
100   use approximations which are less accurate, but faster, than <cmath> standard
101   library equivalents.
102 
103   Note: Many of these are not necessarily faster than simple C versions when
104   used on a single scalar value, but are included for testing purposes as most
105   have an option based on SSE intrinsics and therefore provide an obvious route
106   to future vectorization.
107 ============================================================================ */
108 
109 // Union for manipulation of float bit patterns
110 typedef union
111 {
112 	uint32_t u;
113 	int32_t s;
114 	float f;
115 } if32;
116 
117 // These are namespaced to avoid colliding with C standard library functions.
118 namespace astc
119 {
120 
121 static const float PI          = 3.14159265358979323846f;
122 static const float PI_OVER_TWO = 1.57079632679489661923f;
123 
124 /**
125  * @brief SP float absolute value.
126  *
127  * @param v   The value to make absolute.
128  *
129  * @return The absolute value.
130  */
fabs(float v)131 static inline float fabs(float v)
132 {
133 	return std::fabs(v);
134 }
135 
136 /**
137  * @brief Test if a float value is a nan.
138  *
139  * @param v    The value test.
140  *
141  * @return Zero is not a NaN, non-zero otherwise.
142  */
isnan(float v)143 static inline bool isnan(float v)
144 {
145 	return v != v;
146 }
147 
148 /**
149  * @brief Return the minimum of two values.
150  *
151  * For floats, NaNs are turned into @c q.
152  *
153  * @param p   The first value to compare.
154  * @param q   The second value to compare.
155  *
156  * @return The smallest value.
157  */
158 template<typename T>
min(T p,T q)159 static inline T min(T p, T q)
160 {
161 	return p < q ? p : q;
162 }
163 
164 /**
165  * @brief Return the minimum of three values.
166  *
167  * For floats, NaNs are turned into @c r.
168  *
169  * @param p   The first value to compare.
170  * @param q   The second value to compare.
171  * @param r   The third value to compare.
172  *
173  * @return The smallest value.
174  */
175 template<typename T>
min(T p,T q,T r)176 static inline T min(T p, T q, T r)
177 {
178 	return min(min(p, q), r);
179 }
180 
181 /**
182  * @brief Return the minimum of four values.
183  *
184  * For floats, NaNs are turned into @c s.
185  *
186  * @param p   The first value to compare.
187  * @param q   The second value to compare.
188  * @param r   The third value to compare.
189  * @param s   The fourth value to compare.
190  *
191  * @return The smallest value.
192  */
193 template<typename T>
min(T p,T q,T r,T s)194 static inline T min(T p, T q, T r, T s)
195 {
196 	return min(min(p, q), min(r, s));
197 }
198 
199 /**
200  * @brief Return the maximum of two values.
201  *
202  * For floats, NaNs are turned into @c q.
203  *
204  * @param p   The first value to compare.
205  * @param q   The second value to compare.
206  *
207  * @return The largest value.
208  */
209 template<typename T>
max(T p,T q)210 static inline T max(T p, T q)
211 {
212 	return p > q ? p : q;
213 }
214 
215 /**
216  * @brief Return the maximum of three values.
217  *
218  * For floats, NaNs are turned into @c r.
219  *
220  * @param p   The first value to compare.
221  * @param q   The second value to compare.
222  * @param r   The third value to compare.
223  *
224  * @return The largest value.
225  */
226 template<typename T>
max(T p,T q,T r)227 static inline T max(T p, T q, T r)
228 {
229 	return max(max(p, q), r);
230 }
231 
232 /**
233  * @brief Return the maximum of four values.
234  *
235  * For floats, NaNs are turned into @c s.
236  *
237  * @param p   The first value to compare.
238  * @param q   The second value to compare.
239  * @param r   The third value to compare.
240  * @param s   The fourth value to compare.
241  *
242  * @return The largest value.
243  */
244 template<typename T>
max(T p,T q,T r,T s)245 static inline T max(T p, T q, T r, T s)
246 {
247 	return max(max(p, q), max(r, s));
248 }
249 
250 /**
251  * @brief Clamp a value value between @c mn and @c mx.
252  *
253  * For floats, NaNs are turned into @c mn.
254  *
255  * @param v      The value to clamp.
256  * @param mn     The min value (inclusive).
257  * @param mx     The max value (inclusive).
258  *
259  * @return The clamped value.
260  */
261 template<typename T>
clamp(T v,T mn,T mx)262 inline T clamp(T v, T mn, T mx)
263 {
264 	// Do not reorder; correct NaN handling relies on the fact that comparison
265 	// with NaN returns false and will fall-though to the "min" value.
266 	if (v > mx) return mx;
267 	if (v > mn) return v;
268 	return mn;
269 }
270 
271 /**
272  * @brief Clamp a float value between 0.0f and 1.0f.
273  *
274  * NaNs are turned into 0.0f.
275  *
276  * @param v   The value to clamp.
277  *
278  * @return The clamped value.
279  */
clamp1f(float v)280 static inline float clamp1f(float v)
281 {
282 	return astc::clamp(v, 0.0f, 1.0f);
283 }
284 
285 /**
286  * @brief Clamp a float value between 0.0f and 255.0f.
287  *
288  * NaNs are turned into 0.0f.
289  *
290  * @param v  The value to clamp.
291  *
292  * @return The clamped value.
293  */
clamp255f(float v)294 static inline float clamp255f(float v)
295 {
296 	return astc::clamp(v, 0.0f, 255.0f);
297 }
298 
299 /**
300  * @brief SP float round-down.
301  *
302  * @param v   The value to round.
303  *
304  * @return The rounded value.
305  */
flt_rd(float v)306 static inline float flt_rd(float v)
307 {
308 	return std::floor(v);
309 }
310 
311 /**
312  * @brief SP float round-to-nearest and convert to integer.
313  *
314  * @param v   The value to round.
315  *
316  * @return The rounded value.
317  */
flt2int_rtn(float v)318 static inline int flt2int_rtn(float v)
319 {
320 
321 	return static_cast<int>(v + 0.5f);
322 }
323 
324 /**
325  * @brief SP float round down and convert to integer.
326  *
327  * @param v   The value to round.
328  *
329  * @return The rounded value.
330  */
flt2int_rd(float v)331 static inline int flt2int_rd(float v)
332 {
333 	return static_cast<int>(v);
334 }
335 
336 /**
337  * @brief SP float bit-interpreted as an integer.
338  *
339  * @param v   The value to bitcast.
340  *
341  * @return The converted value.
342  */
float_as_int(float v)343 static inline int float_as_int(float v)
344 {
345 	union { int a; float b; } u;
346 	u.b = v;
347 	return u.a;
348 }
349 
350 /**
351  * @brief Integer bit-interpreted as an SP float.
352  *
353  * @param v   The value to bitcast.
354  *
355  * @return The converted value.
356  */
int_as_float(int v)357 static inline float int_as_float(int v)
358 {
359 	union { int a; float b; } u;
360 	u.a = v;
361 	return u.b;
362 }
363 
364 /**
365  * @brief Fast approximation of 1.0 / sqrt(val).
366  *
367  * @param v   The input value.
368  *
369  * @return The approximated result.
370  */
rsqrt(float v)371 static inline float rsqrt(float v)
372 {
373 	return 1.0f / std::sqrt(v);
374 }
375 
376 /**
377  * @brief Fast approximation of sqrt(val).
378  *
379  * @param v   The input value.
380  *
381  * @return The approximated result.
382  */
sqrt(float v)383 static inline float sqrt(float v)
384 {
385 	return std::sqrt(v);
386 }
387 
388 /**
389  * @brief Extract mantissa and exponent of a float value.
390  *
391  * @param      v      The input value.
392  * @param[out] expo   The output exponent.
393  *
394  * @return The mantissa.
395  */
frexp(float v,int * expo)396 static inline float frexp(float v, int* expo)
397 {
398 	if32 p;
399 	p.f = v;
400 	*expo = ((p.u >> 23) & 0xFF) - 126;
401 	p.u = (p.u & 0x807fffff) | 0x3f000000;
402 	return p.f;
403 }
404 
405 /**
406  * @brief Initialize the seed structure for a random number generator.
407  *
408  * Important note: For the purposes of ASTC we want sets of random numbers to
409  * use the codec, but we want the same seed value across instances and threads
410  * to ensure that image output is stable across compressor runs and across
411  * platforms. Every PRNG created by this call will therefore return the same
412  * sequence of values ...
413  *
414  * @param state The state structure to initialize.
415  */
416 void rand_init(uint64_t state[2]);
417 
418 /**
419  * @brief Return the next random number from the generator.
420  *
421  * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
422  * public-domain implementation given by David Blackman & Sebastiano Vigna at
423  * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
424  *
425  * @param state The state structure to use/update.
426  */
427 uint64_t rand(uint64_t state[2]);
428 
429 }
430 
431 /* ============================================================================
432   Softfloat library with fp32 and fp16 conversion functionality.
433 ============================================================================ */
434 #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
435 	/* narrowing float->float conversions */
436 	uint16_t float_to_sf16(float val);
437 	float sf16_to_float(uint16_t val);
438 #endif
439 
440 /*********************************
441   Vector library
442 *********************************/
443 #include "astcenc_vecmathlib.h"
444 
445 /*********************************
446   Declaration of line types
447 *********************************/
448 // parametric line, 2D: The line is given by line = a + b * t.
449 
450 struct line2
451 {
452 	vfloat4 a;
453 	vfloat4 b;
454 };
455 
456 // parametric line, 3D
457 struct line3
458 {
459 	vfloat4 a;
460 	vfloat4 b;
461 };
462 
463 struct line4
464 {
465 	vfloat4 a;
466 	vfloat4 b;
467 };
468 
469 
470 struct processed_line2
471 {
472 	vfloat4 amod;
473 	vfloat4 bs;
474 };
475 
476 struct processed_line3
477 {
478 	vfloat4 amod;
479 	vfloat4 bs;
480 };
481 
482 struct processed_line4
483 {
484 	vfloat4 amod;
485 	vfloat4 bs;
486 };
487 
488 #endif
489