1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2021 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /*
19 * This module implements a variety of mathematical data types and library
20 * functions used by the codec.
21 */
22
23 #ifndef ASTC_MATHLIB_H_INCLUDED
24 #define ASTC_MATHLIB_H_INCLUDED
25
26 #include <cassert>
27 #include <cstdint>
28 #include <cmath>
29
30 #ifndef ASTCENC_POPCNT
31 #if defined(__POPCNT__)
32 #define ASTCENC_POPCNT 1
33 #else
34 #define ASTCENC_POPCNT 0
35 #endif
36 #endif
37
38 #ifndef ASTCENC_F16C
39 #if defined(__F16C__)
40 #define ASTCENC_F16C 1
41 #else
42 #define ASTCENC_F16C 0
43 #endif
44 #endif
45
46 #ifndef ASTCENC_SSE
47 #if defined(__SSE4_2__)
48 #define ASTCENC_SSE 42
49 #elif defined(__SSE4_1__)
50 #define ASTCENC_SSE 41
51 #elif defined(__SSE3__)
52 #define ASTCENC_SSE 30
53 #elif defined(__SSE2__)
54 #define ASTCENC_SSE 20
55 #else
56 #define ASTCENC_SSE 0
57 #endif
58 #endif
59
60 #ifndef ASTCENC_AVX
61 #if defined(__AVX2__)
62 #define ASTCENC_AVX 2
63 #elif defined(__AVX__)
64 #define ASTCENC_AVX 1
65 #else
66 #define ASTCENC_AVX 0
67 #endif
68 #endif
69
70 #ifndef ASTCENC_NEON
71 #if defined(__aarch64__)
72 #define ASTCENC_NEON 1
73 #else
74 #define ASTCENC_NEON 0
75 #endif
76 #endif
77
78 #if ASTCENC_AVX
79 #define ASTCENC_VECALIGN 32
80 #else
81 #define ASTCENC_VECALIGN 16
82 #endif
83
84 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
85 #include <immintrin.h>
86 #endif
87
88 /* ============================================================================
89 Fast math library; note that many of the higher-order functions in this set
90 use approximations which are less accurate, but faster, than <cmath> standard
91 library equivalents.
92
93 Note: Many of these are not necessarily faster than simple C versions when
94 used on a single scalar value, but are included for testing purposes as most
95 have an option based on SSE intrinsics and therefore provide an obvious route
96 to future vectorization.
97 ============================================================================ */
98
99 // Union for manipulation of float bit patterns
100 typedef union
101 {
102 uint32_t u;
103 int32_t s;
104 float f;
105 } if32;
106
107 // These are namespaced to avoid colliding with C standard library functions.
108 namespace astc
109 {
110
111 static const float PI = 3.14159265358979323846f;
112 static const float PI_OVER_TWO = 1.57079632679489661923f;
113
114 /**
115 * @brief SP float absolute value.
116 *
117 * @param v The value to make absolute.
118 *
119 * @return The absolute value.
120 */
fabs(float v)121 static inline float fabs(float v)
122 {
123 return std::fabs(v);
124 }
125
126 /**
127 * @brief Test if a float value is a nan.
128 *
129 * @param v The value test.
130 *
131 * @return Zero is not a NaN, non-zero otherwise.
132 */
isnan(float v)133 static inline bool isnan(float v)
134 {
135 return v != v;
136 }
137
138 /**
139 * @brief Return the minimum of two values.
140 *
141 * For floats, NaNs are turned into @c q.
142 *
143 * @param p The first value to compare.
144 * @param q The second value to compare.
145 *
146 * @return The smallest value.
147 */
148 template<typename T>
min(T p,T q)149 static inline T min(T p, T q)
150 {
151 return p < q ? p : q;
152 }
153
154 /**
155 * @brief Return the minimum of three values.
156 *
157 * For floats, NaNs are turned into @c r.
158 *
159 * @param p The first value to compare.
160 * @param q The second value to compare.
161 * @param r The third value to compare.
162 *
163 * @return The smallest value.
164 */
165 template<typename T>
min(T p,T q,T r)166 static inline T min(T p, T q, T r)
167 {
168 return min(min(p, q), r);
169 }
170
171 /**
172 * @brief Return the minimum of four values.
173 *
174 * For floats, NaNs are turned into @c s.
175 *
176 * @param p The first value to compare.
177 * @param q The second value to compare.
178 * @param r The third value to compare.
179 * @param s The fourth value to compare.
180 *
181 * @return The smallest value.
182 */
183 template<typename T>
min(T p,T q,T r,T s)184 static inline T min(T p, T q, T r, T s)
185 {
186 return min(min(p, q), min(r, s));
187 }
188
189 /**
190 * @brief Return the maximum of two values.
191 *
192 * For floats, NaNs are turned into @c q.
193 *
194 * @param p The first value to compare.
195 * @param q The second value to compare.
196 *
197 * @return The largest value.
198 */
199 template<typename T>
max(T p,T q)200 static inline T max(T p, T q)
201 {
202 return p > q ? p : q;
203 }
204
205 /**
206 * @brief Return the maximum of three values.
207 *
208 * For floats, NaNs are turned into @c r.
209 *
210 * @param p The first value to compare.
211 * @param q The second value to compare.
212 * @param r The third value to compare.
213 *
214 * @return The largest value.
215 */
216 template<typename T>
max(T p,T q,T r)217 static inline T max(T p, T q, T r)
218 {
219 return max(max(p, q), r);
220 }
221
222 /**
223 * @brief Return the maximum of four values.
224 *
225 * For floats, NaNs are turned into @c s.
226 *
227 * @param p The first value to compare.
228 * @param q The second value to compare.
229 * @param r The third value to compare.
230 * @param s The fourth value to compare.
231 *
232 * @return The largest value.
233 */
234 template<typename T>
max(T p,T q,T r,T s)235 static inline T max(T p, T q, T r, T s)
236 {
237 return max(max(p, q), max(r, s));
238 }
239
240 /**
241 * @brief Clamp a value value between @c mn and @c mx.
242 *
243 * For floats, NaNs are turned into @c mn.
244 *
245 * @param v The value to clamp.
246 * @param mn The min value (inclusive).
247 * @param mx The max value (inclusive).
248 *
249 * @return The clamped value.
250 */
251 template<typename T>
clamp(T v,T mn,T mx)252 inline T clamp(T v, T mn, T mx)
253 {
254 // Do not reorder; correct NaN handling relies on the fact that comparison
255 // with NaN returns false and will fall-though to the "min" value.
256 if (v > mx) return mx;
257 if (v > mn) return v;
258 return mn;
259 }
260
261 /**
262 * @brief Clamp a float value between 0.0f and 1.0f.
263 *
264 * NaNs are turned into 0.0f.
265 *
266 * @param v The value to clamp.
267 *
268 * @return The clamped value.
269 */
clamp1f(float v)270 static inline float clamp1f(float v)
271 {
272 return astc::clamp(v, 0.0f, 1.0f);
273 }
274
275 /**
276 * @brief Clamp a float value between 0.0f and 255.0f.
277 *
278 * NaNs are turned into 0.0f.
279 *
280 * @param v The value to clamp.
281 *
282 * @return The clamped value.
283 */
clamp255f(float v)284 static inline float clamp255f(float v)
285 {
286 return astc::clamp(v, 0.0f, 255.0f);
287 }
288
289 /**
290 * @brief SP float round-down.
291 *
292 * @param v The value to round.
293 *
294 * @return The rounded value.
295 */
flt_rd(float v)296 static inline float flt_rd(float v)
297 {
298 return std::floor(v);
299 }
300
301 /**
302 * @brief SP float round-to-nearest and convert to integer.
303 *
304 * @param v The value to round.
305 *
306 * @return The rounded value.
307 */
flt2int_rtn(float v)308 static inline int flt2int_rtn(float v)
309 {
310
311 return static_cast<int>(v + 0.5f);
312 }
313
314 /**
315 * @brief SP float round down and convert to integer.
316 *
317 * @param v The value to round.
318 *
319 * @return The rounded value.
320 */
flt2int_rd(float v)321 static inline int flt2int_rd(float v)
322 {
323 return static_cast<int>(v);
324 }
325
326 /**
327 * @brief SP float bit-interpreted as an integer.
328 *
329 * @param v The value to bitcast.
330 *
331 * @return The converted value.
332 */
float_as_int(float v)333 static inline int float_as_int(float v)
334 {
335 union { int a; float b; } u;
336 u.b = v;
337 return u.a;
338 }
339
340 /**
341 * @brief Integer bit-interpreted as an SP float.
342 *
343 * @param v The value to bitcast.
344 *
345 * @return The converted value.
346 */
int_as_float(int v)347 static inline float int_as_float(int v)
348 {
349 union { int a; float b; } u;
350 u.a = v;
351 return u.b;
352 }
353
354 /**
355 * @brief Fast approximation of 1.0 / sqrt(val).
356 *
357 * @param v The input value.
358 *
359 * @return The approximated result.
360 */
rsqrt(float v)361 static inline float rsqrt(float v)
362 {
363 return 1.0f / std::sqrt(v);
364 }
365
366 /**
367 * @brief Fast approximation of sqrt(val).
368 *
369 * @param v The input value.
370 *
371 * @return The approximated result.
372 */
sqrt(float v)373 static inline float sqrt(float v)
374 {
375 return std::sqrt(v);
376 }
377
378 /**
379 * @brief Extract mantissa and exponent of a float value.
380 *
381 * @param v The input value.
382 * @param[out] expo The output exponent.
383 *
384 * @return The mantissa.
385 */
frexp(float v,int * expo)386 static inline float frexp(float v, int* expo)
387 {
388 if32 p;
389 p.f = v;
390 *expo = ((p.u >> 23) & 0xFF) - 126;
391 p.u = (p.u & 0x807fffff) | 0x3f000000;
392 return p.f;
393 }
394
395 /**
396 * @brief Initialize the seed structure for a random number generator.
397 *
398 * Important note: For the purposes of ASTC we want sets of random numbers to
399 * use the codec, but we want the same seed value across instances and threads
400 * to ensure that image output is stable across compressor runs and across
401 * platforms. Every PRNG created by this call will therefore return the same
402 * sequence of values ...
403 *
404 * @param state The state structure to initialize.
405 */
406 void rand_init(uint64_t state[2]);
407
408 /**
409 * @brief Return the next random number from the generator.
410 *
411 * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
412 * public-domain implementation given by David Blackman & Sebastiano Vigna at
413 * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
414 *
415 * @param state The state structure to use/update.
416 */
417 uint64_t rand(uint64_t state[2]);
418
419 }
420
421 /* ============================================================================
422 Softfloat library with fp32 and fp16 conversion functionality.
423 ============================================================================ */
424 #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
425 /* narrowing float->float conversions */
426 uint16_t float_to_sf16(float val);
427 float sf16_to_float(uint16_t val);
428 #endif
429
430 /*********************************
431 Vector library
432 *********************************/
433 #include "astcenc_vecmathlib.h"
434
435 /*********************************
436 Declaration of line types
437 *********************************/
438 // parametric line, 2D: The line is given by line = a + b * t.
439
440 struct line2
441 {
442 vfloat4 a;
443 vfloat4 b;
444 };
445
446 // parametric line, 3D
447 struct line3
448 {
449 vfloat4 a;
450 vfloat4 b;
451 };
452
453 struct line4
454 {
455 vfloat4 a;
456 vfloat4 b;
457 };
458
459
460 struct processed_line2
461 {
462 vfloat4 amod;
463 vfloat4 bs;
464 };
465
466 struct processed_line3
467 {
468 vfloat4 amod;
469 vfloat4 bs;
470 };
471
472 struct processed_line4
473 {
474 vfloat4 amod;
475 vfloat4 bs;
476 };
477
478 #endif
479