1 /**************************************************************************
2 *
3 * Copyright 2008 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * Math utilities and approximations for common math functions.
31 * Reduced precision is usually acceptable in shaders...
32 *
33 * "fast" is used in the names of functions which are low-precision,
34 * or at least lower-precision than the normal C lib functions.
35 */
36
37
38 #ifndef U_MATH_H
39 #define U_MATH_H
40
41
42 #include "c99_math.h"
43 #include <assert.h>
44 #include <float.h>
45 #include <stdarg.h>
46
47 #include "bitscan.h"
48 #include "u_endian.h" /* for UTIL_ARCH_BIG_ENDIAN */
49
50 #ifdef __cplusplus
51 extern "C" {
52 #endif
53
54
55 #ifndef M_SQRT2
56 #define M_SQRT2 1.41421356237309504880
57 #endif
58
59 #define POW2_TABLE_SIZE_LOG2 9
60 #define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2)
61 #define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2)
62 #define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2))
63 extern float pow2_table[POW2_TABLE_SIZE];
64
65
66 /**
67 * Initialize math module. This should be called before using any
68 * other functions in this module.
69 */
70 extern void
71 util_init_math(void);
72
73
74 union fi {
75 float f;
76 int32_t i;
77 uint32_t ui;
78 };
79
80
81 union di {
82 double d;
83 int64_t i;
84 uint64_t ui;
85 };
86
87
88 /**
89 * Extract the IEEE float32 exponent.
90 */
91 static inline signed
util_get_float32_exponent(float x)92 util_get_float32_exponent(float x)
93 {
94 union fi f;
95
96 f.f = x;
97
98 return ((f.ui >> 23) & 0xff) - 127;
99 }
100
101
102 /**
103 * Fast version of 2^x
104 * Identity: exp2(a + b) = exp2(a) * exp2(b)
105 * Let ipart = int(x)
106 * Let fpart = x - ipart;
107 * So, exp2(x) = exp2(ipart) * exp2(fpart)
108 * Compute exp2(ipart) with i << ipart
109 * Compute exp2(fpart) with lookup table.
110 */
111 static inline float
util_fast_exp2(float x)112 util_fast_exp2(float x)
113 {
114 int32_t ipart;
115 float fpart, mpart;
116 union fi epart;
117
118 if(x > 129.00000f)
119 return 3.402823466e+38f;
120
121 if (x < -126.99999f)
122 return 0.0f;
123
124 ipart = (int32_t) x;
125 fpart = x - (float) ipart;
126
127 /* same as
128 * epart.f = (float) (1 << ipart)
129 * but faster and without integer overflow for ipart > 31
130 */
131 epart.i = (ipart + 127 ) << 23;
132
133 mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)];
134
135 return epart.f * mpart;
136 }
137
138
139 /**
140 * Fast approximation to exp(x).
141 */
142 static inline float
util_fast_exp(float x)143 util_fast_exp(float x)
144 {
145 const float k = 1.44269f; /* = log2(e) */
146 return util_fast_exp2(k * x);
147 }
148
149
150 #define LOG2_TABLE_SIZE_LOG2 16
151 #define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2)
152 #define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1)
153 extern float log2_table[LOG2_TABLE_SIZE];
154
155
156 /**
157 * Fast approximation to log2(x).
158 */
159 static inline float
util_fast_log2(float x)160 util_fast_log2(float x)
161 {
162 union fi num;
163 float epart, mpart;
164 num.f = x;
165 epart = (float)(((num.i & 0x7f800000) >> 23) - 127);
166 /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */
167 mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)];
168 return epart + mpart;
169 }
170
171
172 /**
173 * Fast approximation to x^y.
174 */
175 static inline float
util_fast_pow(float x,float y)176 util_fast_pow(float x, float y)
177 {
178 return util_fast_exp2(util_fast_log2(x) * y);
179 }
180
181
182 /**
183 * Floor(x), returned as int.
184 */
185 static inline int
util_ifloor(float f)186 util_ifloor(float f)
187 {
188 #if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
189 /*
190 * IEEE floor for computers that round to nearest or even.
191 * 'f' must be between -4194304 and 4194303.
192 * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
193 * but uses some IEEE specific tricks for better speed.
194 * Contributed by Josh Vanderhoof
195 */
196 int ai, bi;
197 double af, bf;
198 af = (3 << 22) + 0.5 + (double)f;
199 bf = (3 << 22) + 0.5 - (double)f;
200 /* GCC generates an extra fstp/fld without this. */
201 __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
202 __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
203 return (ai - bi) >> 1;
204 #else
205 int ai, bi;
206 double af, bf;
207 union fi u;
208 af = (3 << 22) + 0.5 + (double) f;
209 bf = (3 << 22) + 0.5 - (double) f;
210 u.f = (float) af; ai = u.i;
211 u.f = (float) bf; bi = u.i;
212 return (ai - bi) >> 1;
213 #endif
214 }
215
216
217 /**
218 * Round float to nearest int.
219 */
220 static inline int
util_iround(float f)221 util_iround(float f)
222 {
223 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86)
224 int r;
225 __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
226 return r;
227 #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
228 int r;
229 _asm {
230 fld f
231 fistp r
232 }
233 return r;
234 #else
235 if (f >= 0.0f)
236 return (int) (f + 0.5f);
237 else
238 return (int) (f - 0.5f);
239 #endif
240 }
241
242
243 /**
244 * Approximate floating point comparison
245 */
246 static inline bool
util_is_approx(float a,float b,float tol)247 util_is_approx(float a, float b, float tol)
248 {
249 return fabsf(b - a) <= tol;
250 }
251
252
253 /**
254 * util_is_X_inf_or_nan = test if x is NaN or +/- Inf
255 * util_is_X_nan = test if x is NaN
256 * util_X_inf_sign = return +1 for +Inf, -1 for -Inf, or 0 for not Inf
257 *
258 * NaN can be checked with x != x, however this fails with the fast math flag
259 **/
260
261
262 /**
263 * Single-float
264 */
265 static inline bool
util_is_inf_or_nan(float x)266 util_is_inf_or_nan(float x)
267 {
268 union fi tmp;
269 tmp.f = x;
270 return (tmp.ui & 0x7f800000) == 0x7f800000;
271 }
272
273
274 static inline bool
util_is_nan(float x)275 util_is_nan(float x)
276 {
277 union fi tmp;
278 tmp.f = x;
279 return (tmp.ui & 0x7fffffff) > 0x7f800000;
280 }
281
282
283 static inline int
util_inf_sign(float x)284 util_inf_sign(float x)
285 {
286 union fi tmp;
287 tmp.f = x;
288 if ((tmp.ui & 0x7fffffff) != 0x7f800000) {
289 return 0;
290 }
291
292 return (x < 0) ? -1 : 1;
293 }
294
295
296 /**
297 * Double-float
298 */
299 static inline bool
util_is_double_inf_or_nan(double x)300 util_is_double_inf_or_nan(double x)
301 {
302 union di tmp;
303 tmp.d = x;
304 return (tmp.ui & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL;
305 }
306
307
308 static inline bool
util_is_double_nan(double x)309 util_is_double_nan(double x)
310 {
311 union di tmp;
312 tmp.d = x;
313 return (tmp.ui & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL;
314 }
315
316
317 static inline int
util_double_inf_sign(double x)318 util_double_inf_sign(double x)
319 {
320 union di tmp;
321 tmp.d = x;
322 if ((tmp.ui & 0x7fffffffffffffffULL) != 0x7ff0000000000000ULL) {
323 return 0;
324 }
325
326 return (x < 0) ? -1 : 1;
327 }
328
329
330 /**
331 * Half-float
332 */
333 static inline bool
util_is_half_inf_or_nan(int16_t x)334 util_is_half_inf_or_nan(int16_t x)
335 {
336 return (x & 0x7c00) == 0x7c00;
337 }
338
339
340 static inline bool
util_is_half_nan(int16_t x)341 util_is_half_nan(int16_t x)
342 {
343 return (x & 0x7fff) > 0x7c00;
344 }
345
346
347 static inline int
util_half_inf_sign(int16_t x)348 util_half_inf_sign(int16_t x)
349 {
350 if ((x & 0x7fff) != 0x7c00) {
351 return 0;
352 }
353
354 return (x < 0) ? -1 : 1;
355 }
356
357
358 /**
359 * Return float bits.
360 */
361 static inline unsigned
fui(float f)362 fui( float f )
363 {
364 union fi fi;
365 fi.f = f;
366 return fi.ui;
367 }
368
369 static inline float
uif(uint32_t ui)370 uif(uint32_t ui)
371 {
372 union fi fi;
373 fi.ui = ui;
374 return fi.f;
375 }
376
377
378 /**
379 * Convert uint8_t to float in [0, 1].
380 */
381 static inline float
ubyte_to_float(uint8_t ub)382 ubyte_to_float(uint8_t ub)
383 {
384 return (float) ub * (1.0f / 255.0f);
385 }
386
387
388 /**
389 * Convert float in [0,1] to uint8_t in [0,255] with clamping.
390 */
391 static inline uint8_t
float_to_ubyte(float f)392 float_to_ubyte(float f)
393 {
394 /* return 0 for NaN too */
395 if (!(f > 0.0f)) {
396 return (uint8_t) 0;
397 }
398 else if (f >= 1.0f) {
399 return (uint8_t) 255;
400 }
401 else {
402 union fi tmp;
403 tmp.f = f;
404 tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f;
405 return (uint8_t) tmp.i;
406 }
407 }
408
409 /**
410 * Convert uint16_t to float in [0, 1].
411 */
412 static inline float
ushort_to_float(uint16_t us)413 ushort_to_float(uint16_t us)
414 {
415 return (float) us * (1.0f / 65535.0f);
416 }
417
418
419 /**
420 * Convert float in [0,1] to uint16_t in [0,65535] with clamping.
421 */
422 static inline uint16_t
float_to_ushort(float f)423 float_to_ushort(float f)
424 {
425 /* return 0 for NaN too */
426 if (!(f > 0.0f)) {
427 return (uint16_t) 0;
428 }
429 else if (f >= 1.0f) {
430 return (uint16_t) 65535;
431 }
432 else {
433 union fi tmp;
434 tmp.f = f;
435 tmp.f = tmp.f * (65535.0f/65536.0f) + 128.0f;
436 return (uint16_t) tmp.i;
437 }
438 }
439
440 static inline float
byte_to_float_tex(int8_t b)441 byte_to_float_tex(int8_t b)
442 {
443 return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
444 }
445
446 static inline int8_t
float_to_byte_tex(float f)447 float_to_byte_tex(float f)
448 {
449 return (int8_t) (127.0F * f);
450 }
451
452 /**
453 * Calc log base 2
454 */
455 static inline unsigned
util_logbase2(unsigned n)456 util_logbase2(unsigned n)
457 {
458 #if defined(HAVE___BUILTIN_CLZ)
459 return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1));
460 #else
461 unsigned pos = 0;
462 if (n >= 1<<16) { n >>= 16; pos += 16; }
463 if (n >= 1<< 8) { n >>= 8; pos += 8; }
464 if (n >= 1<< 4) { n >>= 4; pos += 4; }
465 if (n >= 1<< 2) { n >>= 2; pos += 2; }
466 if (n >= 1<< 1) { pos += 1; }
467 return pos;
468 #endif
469 }
470
471 static inline uint64_t
util_logbase2_64(uint64_t n)472 util_logbase2_64(uint64_t n)
473 {
474 #if defined(HAVE___BUILTIN_CLZLL)
475 return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
476 #else
477 uint64_t pos = 0ull;
478 if (n >= 1ull<<32) { n >>= 32; pos += 32; }
479 if (n >= 1ull<<16) { n >>= 16; pos += 16; }
480 if (n >= 1ull<< 8) { n >>= 8; pos += 8; }
481 if (n >= 1ull<< 4) { n >>= 4; pos += 4; }
482 if (n >= 1ull<< 2) { n >>= 2; pos += 2; }
483 if (n >= 1ull<< 1) { pos += 1; }
484 return pos;
485 #endif
486 }
487
488 /**
489 * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently,
490 * returns the smallest x such that n <= 2**x.
491 */
492 static inline unsigned
util_logbase2_ceil(unsigned n)493 util_logbase2_ceil(unsigned n)
494 {
495 if (n <= 1)
496 return 0;
497
498 return 1 + util_logbase2(n - 1);
499 }
500
501 static inline uint64_t
util_logbase2_ceil64(uint64_t n)502 util_logbase2_ceil64(uint64_t n)
503 {
504 if (n <= 1)
505 return 0;
506
507 return 1ull + util_logbase2_64(n - 1);
508 }
509
510 /**
511 * Returns the smallest power of two >= x
512 */
513 static inline unsigned
util_next_power_of_two(unsigned x)514 util_next_power_of_two(unsigned x)
515 {
516 #if defined(HAVE___BUILTIN_CLZ)
517 if (x <= 1)
518 return 1;
519
520 return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1)));
521 #else
522 unsigned val = x;
523
524 if (x <= 1)
525 return 1;
526
527 if (util_is_power_of_two_or_zero(x))
528 return x;
529
530 val--;
531 val = (val >> 1) | val;
532 val = (val >> 2) | val;
533 val = (val >> 4) | val;
534 val = (val >> 8) | val;
535 val = (val >> 16) | val;
536 val++;
537 return val;
538 #endif
539 }
540
541 static inline uint64_t
util_next_power_of_two64(uint64_t x)542 util_next_power_of_two64(uint64_t x)
543 {
544 #if defined(HAVE___BUILTIN_CLZLL)
545 if (x <= 1)
546 return 1;
547
548 return (1ull << ((sizeof(uint64_t) * 8) - __builtin_clzll(x - 1)));
549 #else
550 uint64_t val = x;
551
552 if (x <= 1)
553 return 1;
554
555 if (util_is_power_of_two_or_zero64(x))
556 return x;
557
558 val--;
559 val = (val >> 1) | val;
560 val = (val >> 2) | val;
561 val = (val >> 4) | val;
562 val = (val >> 8) | val;
563 val = (val >> 16) | val;
564 val = (val >> 32) | val;
565 val++;
566 return val;
567 #endif
568 }
569
570 /**
571 * Reverse bits in n
572 * Algorithm taken from:
573 * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
574 */
575 static inline unsigned
util_bitreverse(unsigned n)576 util_bitreverse(unsigned n)
577 {
578 n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1);
579 n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2);
580 n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4);
581 n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8);
582 n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16);
583 return n;
584 }
585
586 /**
587 * Convert from little endian to CPU byte order.
588 */
589
590 #if UTIL_ARCH_BIG_ENDIAN
591 #define util_le64_to_cpu(x) util_bswap64(x)
592 #define util_le32_to_cpu(x) util_bswap32(x)
593 #define util_le16_to_cpu(x) util_bswap16(x)
594 #else
595 #define util_le64_to_cpu(x) (x)
596 #define util_le32_to_cpu(x) (x)
597 #define util_le16_to_cpu(x) (x)
598 #endif
599
600 #define util_cpu_to_le64(x) util_le64_to_cpu(x)
601 #define util_cpu_to_le32(x) util_le32_to_cpu(x)
602 #define util_cpu_to_le16(x) util_le16_to_cpu(x)
603
604 /**
605 * Reverse byte order of a 32 bit word.
606 */
607 static inline uint32_t
util_bswap32(uint32_t n)608 util_bswap32(uint32_t n)
609 {
610 #if defined(HAVE___BUILTIN_BSWAP32)
611 return __builtin_bswap32(n);
612 #else
613 return (n >> 24) |
614 ((n >> 8) & 0x0000ff00) |
615 ((n << 8) & 0x00ff0000) |
616 (n << 24);
617 #endif
618 }
619
620 /**
621 * Reverse byte order of a 64bit word.
622 */
623 static inline uint64_t
util_bswap64(uint64_t n)624 util_bswap64(uint64_t n)
625 {
626 #if defined(HAVE___BUILTIN_BSWAP64)
627 return __builtin_bswap64(n);
628 #else
629 return ((uint64_t)util_bswap32((uint32_t)n) << 32) |
630 util_bswap32((n >> 32));
631 #endif
632 }
633
634
635 /**
636 * Reverse byte order of a 16 bit word.
637 */
638 static inline uint16_t
util_bswap16(uint16_t n)639 util_bswap16(uint16_t n)
640 {
641 return (n >> 8) |
642 (n << 8);
643 }
644
645 static inline void*
util_memcpy_cpu_to_le32(void * restrict dest,const void * restrict src,size_t n)646 util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
647 {
648 #if UTIL_ARCH_BIG_ENDIAN
649 size_t i, e;
650 assert(n % 4 == 0);
651
652 for (i = 0, e = n / 4; i < e; i++) {
653 uint32_t * restrict d = (uint32_t* restrict)dest;
654 const uint32_t * restrict s = (const uint32_t* restrict)src;
655 d[i] = util_bswap32(s[i]);
656 }
657 return dest;
658 #else
659 return memcpy(dest, src, n);
660 #endif
661 }
662
663 /**
664 * Clamp X to [MIN, MAX].
665 * This is a macro to allow float, int, uint, etc. types.
666 * We arbitrarily turn NaN into MIN.
667 */
668 #define CLAMP( X, MIN, MAX ) ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) )
669
670 /* Syntax sugar occuring frequently in graphics code */
671 #define SATURATE( X ) CLAMP(X, 0.0f, 1.0f)
672
673 #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
674 #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
675
676 #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
677 #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
678
679 #define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D))
680 #define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D))
681
682
683 /**
684 * Align a value up to an alignment value
685 *
686 * If \c value is not already aligned to the requested alignment value, it
687 * will be rounded up.
688 *
689 * \param value Value to be rounded
690 * \param alignment Alignment value to be used. This must be a power of two.
691 *
692 * \sa ROUND_DOWN_TO()
693 */
694 static inline uintptr_t
ALIGN(uintptr_t value,int32_t alignment)695 ALIGN(uintptr_t value, int32_t alignment)
696 {
697 assert(util_is_power_of_two_nonzero(alignment));
698 return (((value) + (alignment) - 1) & ~((alignment) - 1));
699 }
700
701 /**
702 * Like ALIGN(), but works with a non-power-of-two alignment.
703 */
704 static inline uintptr_t
ALIGN_NPOT(uintptr_t value,int32_t alignment)705 ALIGN_NPOT(uintptr_t value, int32_t alignment)
706 {
707 assert(alignment > 0);
708 return (value + alignment - 1) / alignment * alignment;
709 }
710
711 /**
712 * Align a value down to an alignment value
713 *
714 * If \c value is not already aligned to the requested alignment value, it
715 * will be rounded down.
716 *
717 * \param value Value to be rounded
718 * \param alignment Alignment value to be used. This must be a power of two.
719 *
720 * \sa ALIGN()
721 */
722 static inline uintptr_t
ROUND_DOWN_TO(uintptr_t value,int32_t alignment)723 ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
724 {
725 assert(util_is_power_of_two_nonzero(alignment));
726 return ((value) & ~(alignment - 1));
727 }
728
729 /**
730 * Align a value, only works pot alignemnts.
731 */
732 static inline int
align(int value,int alignment)733 align(int value, int alignment)
734 {
735 return (value + alignment - 1) & ~(alignment - 1);
736 }
737
738 static inline uint64_t
align64(uint64_t value,unsigned alignment)739 align64(uint64_t value, unsigned alignment)
740 {
741 return (value + alignment - 1) & ~((uint64_t)alignment - 1);
742 }
743
744 /**
745 * Works like align but on npot alignments.
746 */
747 static inline size_t
util_align_npot(size_t value,size_t alignment)748 util_align_npot(size_t value, size_t alignment)
749 {
750 if (value % alignment)
751 return value + (alignment - (value % alignment));
752 return value;
753 }
754
755 static inline unsigned
u_minify(unsigned value,unsigned levels)756 u_minify(unsigned value, unsigned levels)
757 {
758 return MAX2(1, value >> levels);
759 }
760
761 #ifndef COPY_4V
762 #define COPY_4V( DST, SRC ) \
763 do { \
764 (DST)[0] = (SRC)[0]; \
765 (DST)[1] = (SRC)[1]; \
766 (DST)[2] = (SRC)[2]; \
767 (DST)[3] = (SRC)[3]; \
768 } while (0)
769 #endif
770
771
772 #ifndef COPY_4FV
773 #define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC)
774 #endif
775
776
777 #ifndef ASSIGN_4V
778 #define ASSIGN_4V( DST, V0, V1, V2, V3 ) \
779 do { \
780 (DST)[0] = (V0); \
781 (DST)[1] = (V1); \
782 (DST)[2] = (V2); \
783 (DST)[3] = (V3); \
784 } while (0)
785 #endif
786
787
788 static inline uint32_t
util_unsigned_fixed(float value,unsigned frac_bits)789 util_unsigned_fixed(float value, unsigned frac_bits)
790 {
791 return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits));
792 }
793
794 static inline int32_t
util_signed_fixed(float value,unsigned frac_bits)795 util_signed_fixed(float value, unsigned frac_bits)
796 {
797 return (int32_t)(value * (1<<frac_bits));
798 }
799
800 unsigned
801 util_fpstate_get(void);
802 unsigned
803 util_fpstate_set_denorms_to_zero(unsigned current_fpstate);
804 void
805 util_fpstate_set(unsigned fpstate);
806
807 /**
808 * For indexed draw calls, return true if the vertex count to be drawn is
809 * much lower than the vertex count that has to be uploaded, meaning
810 * that the driver should flatten indices instead of trying to upload
811 * a too big range.
812 *
813 * This is used by vertex upload code in u_vbuf and glthread.
814 */
815 static inline bool
util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count,unsigned upload_vertex_count)816 util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count,
817 unsigned upload_vertex_count)
818 {
819 if (draw_vertex_count > 1024)
820 return upload_vertex_count > draw_vertex_count * 4;
821 else if (draw_vertex_count > 32)
822 return upload_vertex_count > draw_vertex_count * 8;
823 else
824 return upload_vertex_count > draw_vertex_count * 16;
825 }
826
827 #ifdef __cplusplus
828 }
829 #endif
830
831 #endif /* U_MATH_H */
832