• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2002 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // mathutil.h: Math and bit manipulation functions.
8 
9 #ifndef COMMON_MATHUTIL_H_
10 #define COMMON_MATHUTIL_H_
11 
12 #include <math.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <algorithm>
17 #include <limits>
18 
19 #include <anglebase/numerics/safe_math.h>
20 
21 #include "common/debug.h"
22 #include "common/platform.h"
23 
24 namespace angle
25 {
26 using base::CheckedNumeric;
27 using base::IsValueInRangeForNumericType;
28 }  // namespace angle
29 
30 namespace gl
31 {
32 
33 const unsigned int Float32One   = 0x3F800000;
34 const unsigned short Float16One = 0x3C00;
35 
36 template <typename T>
isPow2(T x)37 inline constexpr bool isPow2(T x)
38 {
39     static_assert(std::is_integral<T>::value, "isPow2 must be called on an integer type.");
40     return (x & (x - 1)) == 0 && (x != 0);
41 }
42 
43 template <typename T>
log2(T x)44 inline int log2(T x)
45 {
46     static_assert(std::is_integral<T>::value, "log2 must be called on an integer type.");
47     int r = 0;
48     while ((x >> r) > 1)
49         r++;
50     return r;
51 }
52 
ceilPow2(unsigned int x)53 inline unsigned int ceilPow2(unsigned int x)
54 {
55     if (x != 0)
56         x--;
57     x |= x >> 1;
58     x |= x >> 2;
59     x |= x >> 4;
60     x |= x >> 8;
61     x |= x >> 16;
62     x++;
63 
64     return x;
65 }
66 
67 template <typename DestT, typename SrcT>
clampCast(SrcT value)68 inline DestT clampCast(SrcT value)
69 {
70     // For floating-point types with denormalization, min returns the minimum positive normalized
71     // value. To find the value that has no values less than it, use numeric_limits::lowest.
72     constexpr const long double destLo =
73         static_cast<long double>(std::numeric_limits<DestT>::lowest());
74     constexpr const long double destHi =
75         static_cast<long double>(std::numeric_limits<DestT>::max());
76     constexpr const long double srcLo =
77         static_cast<long double>(std::numeric_limits<SrcT>::lowest());
78     constexpr long double srcHi = static_cast<long double>(std::numeric_limits<SrcT>::max());
79 
80     if (destHi < srcHi)
81     {
82         DestT destMax = std::numeric_limits<DestT>::max();
83         if (value >= static_cast<SrcT>(destMax))
84         {
85             return destMax;
86         }
87     }
88 
89     if (destLo > srcLo)
90     {
91         DestT destLow = std::numeric_limits<DestT>::lowest();
92         if (value <= static_cast<SrcT>(destLow))
93         {
94             return destLow;
95         }
96     }
97 
98     return static_cast<DestT>(value);
99 }
100 
101 // Specialize clampCast for bool->int conversion to avoid MSVS 2015 performance warning when the max
102 // value is casted to the source type.
103 template <>
clampCast(bool value)104 inline unsigned int clampCast(bool value)
105 {
106     return static_cast<unsigned int>(value);
107 }
108 
109 template <>
clampCast(bool value)110 inline int clampCast(bool value)
111 {
112     return static_cast<int>(value);
113 }
114 
115 template <typename T, typename MIN, typename MAX>
clamp(T x,MIN min,MAX max)116 inline T clamp(T x, MIN min, MAX max)
117 {
118     // Since NaNs fail all comparison tests, a NaN value will default to min
119     return x > min ? (x > max ? max : x) : min;
120 }
121 
122 template <typename T>
clampForBitCount(T value,size_t bitCount)123 T clampForBitCount(T value, size_t bitCount)
124 {
125     static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
126 
127     if (bitCount == 0)
128     {
129         constexpr T kZero = 0;
130         return kZero;
131     }
132     ASSERT(bitCount <= sizeof(T) * 8);
133 
134     constexpr bool kIsSigned = std::numeric_limits<T>::is_signed;
135     ASSERT((bitCount > 1) || !kIsSigned);
136 
137     T min = 0;
138     T max = 0;
139     if (bitCount == sizeof(T) * 8)
140     {
141         min = std::numeric_limits<T>::min();
142         max = std::numeric_limits<T>::max();
143     }
144     else
145     {
146         constexpr T kOne = 1;
147         min              = (kIsSigned) ? -1 * (kOne << (bitCount - 1)) : 0;
148         max              = (kIsSigned) ? (kOne << (bitCount - 1)) - 1 : (kOne << bitCount) - 1;
149     }
150 
151     return gl::clamp(value, min, max);
152 }
153 
clamp01(float x)154 inline float clamp01(float x)
155 {
156     return clamp(x, 0.0f, 1.0f);
157 }
158 
159 template <const int n>
unorm(float x)160 inline unsigned int unorm(float x)
161 {
162     const unsigned int max = 0xFFFFFFFF >> (32 - n);
163 
164     if (x > 1)
165     {
166         return max;
167     }
168     else if (x < 0)
169     {
170         return 0;
171     }
172     else
173     {
174         return (unsigned int)(max * x + 0.5f);
175     }
176 }
177 
178 template <typename destType, typename sourceType>
bitCast(const sourceType & source)179 destType bitCast(const sourceType &source)
180 {
181     size_t copySize = std::min(sizeof(destType), sizeof(sourceType));
182     destType output;
183     memcpy(&output, &source, copySize);
184     return output;
185 }
186 
187 template <typename DestT, typename SrcT>
unsafe_int_to_pointer_cast(SrcT src)188 DestT unsafe_int_to_pointer_cast(SrcT src)
189 {
190     return reinterpret_cast<DestT>(static_cast<uintptr_t>(src));
191 }
192 
193 template <typename DestT, typename SrcT>
unsafe_pointer_to_int_cast(SrcT src)194 DestT unsafe_pointer_to_int_cast(SrcT src)
195 {
196     return static_cast<DestT>(reinterpret_cast<uintptr_t>(src));
197 }
198 
199 // https://stackoverflow.com/a/37581284
200 template <typename T>
normalize(T value)201 static constexpr double normalize(T value)
202 {
203     return value < 0 ? -static_cast<double>(value) / std::numeric_limits<T>::min()
204                      : static_cast<double>(value) / std::numeric_limits<T>::max();
205 }
206 
float32ToFloat16(float fp32)207 inline unsigned short float32ToFloat16(float fp32)
208 {
209     unsigned int fp32i = bitCast<unsigned int>(fp32);
210     unsigned int sign  = (fp32i & 0x80000000) >> 16;
211     unsigned int abs   = fp32i & 0x7FFFFFFF;
212 
213     if (abs > 0x7F800000)
214     {  // NaN
215         return 0x7FFF;
216     }
217     else if (abs > 0x47FFEFFF)
218     {  // Infinity
219         return static_cast<uint16_t>(sign | 0x7C00);
220     }
221     else if (abs < 0x38800000)  // Denormal
222     {
223         unsigned int mantissa = (abs & 0x007FFFFF) | 0x00800000;
224         int e                 = 113 - (abs >> 23);
225 
226         if (e < 24)
227         {
228             abs = mantissa >> e;
229         }
230         else
231         {
232             abs = 0;
233         }
234 
235         return static_cast<unsigned short>(sign | (abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
236     }
237     else
238     {
239         return static_cast<unsigned short>(
240             sign | (abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
241     }
242 }
243 
244 float float16ToFloat32(unsigned short h);
245 
246 unsigned int convertRGBFloatsTo999E5(float red, float green, float blue);
247 void convert999E5toRGBFloats(unsigned int input, float *red, float *green, float *blue);
248 
float32ToFloat11(float fp32)249 inline unsigned short float32ToFloat11(float fp32)
250 {
251     const unsigned int float32MantissaMask     = 0x7FFFFF;
252     const unsigned int float32ExponentMask     = 0x7F800000;
253     const unsigned int float32SignMask         = 0x80000000;
254     const unsigned int float32ValueMask        = ~float32SignMask;
255     const unsigned int float32ExponentFirstBit = 23;
256     const unsigned int float32ExponentBias     = 127;
257 
258     const unsigned short float11Max          = 0x7BF;
259     const unsigned short float11MantissaMask = 0x3F;
260     const unsigned short float11ExponentMask = 0x7C0;
261     const unsigned short float11BitMask      = 0x7FF;
262     const unsigned int float11ExponentBias   = 14;
263 
264     const unsigned int float32Maxfloat11       = 0x477E0000;
265     const unsigned int float32MinNormfloat11   = 0x38800000;
266     const unsigned int float32MinDenormfloat11 = 0x35000080;
267 
268     const unsigned int float32Bits = bitCast<unsigned int>(fp32);
269     const bool float32Sign         = (float32Bits & float32SignMask) == float32SignMask;
270 
271     unsigned int float32Val = float32Bits & float32ValueMask;
272 
273     if ((float32Val & float32ExponentMask) == float32ExponentMask)
274     {
275         // INF or NAN
276         if ((float32Val & float32MantissaMask) != 0)
277         {
278             return float11ExponentMask |
279                    (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
280                     float11MantissaMask);
281         }
282         else if (float32Sign)
283         {
284             // -INF is clamped to 0 since float11 is positive only
285             return 0;
286         }
287         else
288         {
289             return float11ExponentMask;
290         }
291     }
292     else if (float32Sign)
293     {
294         // float11 is positive only, so clamp to zero
295         return 0;
296     }
297     else if (float32Val > float32Maxfloat11)
298     {
299         // The number is too large to be represented as a float11, set to max
300         return float11Max;
301     }
302     else if (float32Val < float32MinDenormfloat11)
303     {
304         // The number is too small to be represented as a denormalized float11, set to 0
305         return 0;
306     }
307     else
308     {
309         if (float32Val < float32MinNormfloat11)
310         {
311             // The number is too small to be represented as a normalized float11
312             // Convert it to a denormalized value.
313             const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
314                                        (float32Val >> float32ExponentFirstBit);
315             ASSERT(shift < 32);
316             float32Val =
317                 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
318         }
319         else
320         {
321             // Rebias the exponent to represent the value as a normalized float11
322             float32Val += 0xC8000000;
323         }
324 
325         return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
326     }
327 }
328 
float32ToFloat10(float fp32)329 inline unsigned short float32ToFloat10(float fp32)
330 {
331     const unsigned int float32MantissaMask     = 0x7FFFFF;
332     const unsigned int float32ExponentMask     = 0x7F800000;
333     const unsigned int float32SignMask         = 0x80000000;
334     const unsigned int float32ValueMask        = ~float32SignMask;
335     const unsigned int float32ExponentFirstBit = 23;
336     const unsigned int float32ExponentBias     = 127;
337 
338     const unsigned short float10Max          = 0x3DF;
339     const unsigned short float10MantissaMask = 0x1F;
340     const unsigned short float10ExponentMask = 0x3E0;
341     const unsigned short float10BitMask      = 0x3FF;
342     const unsigned int float10ExponentBias   = 14;
343 
344     const unsigned int float32Maxfloat10       = 0x477C0000;
345     const unsigned int float32MinNormfloat10   = 0x38800000;
346     const unsigned int float32MinDenormfloat10 = 0x35800040;
347 
348     const unsigned int float32Bits = bitCast<unsigned int>(fp32);
349     const bool float32Sign         = (float32Bits & float32SignMask) == float32SignMask;
350 
351     unsigned int float32Val = float32Bits & float32ValueMask;
352 
353     if ((float32Val & float32ExponentMask) == float32ExponentMask)
354     {
355         // INF or NAN
356         if ((float32Val & float32MantissaMask) != 0)
357         {
358             return float10ExponentMask |
359                    (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
360                     float10MantissaMask);
361         }
362         else if (float32Sign)
363         {
364             // -INF is clamped to 0 since float10 is positive only
365             return 0;
366         }
367         else
368         {
369             return float10ExponentMask;
370         }
371     }
372     else if (float32Sign)
373     {
374         // float10 is positive only, so clamp to zero
375         return 0;
376     }
377     else if (float32Val > float32Maxfloat10)
378     {
379         // The number is too large to be represented as a float10, set to max
380         return float10Max;
381     }
382     else if (float32Val < float32MinDenormfloat10)
383     {
384         // The number is too small to be represented as a denormalized float10, set to 0
385         return 0;
386     }
387     else
388     {
389         if (float32Val < float32MinNormfloat10)
390         {
391             // The number is too small to be represented as a normalized float10
392             // Convert it to a denormalized value.
393             const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
394                                        (float32Val >> float32ExponentFirstBit);
395             ASSERT(shift < 32);
396             float32Val =
397                 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
398         }
399         else
400         {
401             // Rebias the exponent to represent the value as a normalized float10
402             float32Val += 0xC8000000;
403         }
404 
405         return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
406     }
407 }
408 
float11ToFloat32(unsigned short fp11)409 inline float float11ToFloat32(unsigned short fp11)
410 {
411     unsigned short exponent = (fp11 >> 6) & 0x1F;
412     unsigned short mantissa = fp11 & 0x3F;
413 
414     if (exponent == 0x1F)
415     {
416         // INF or NAN
417         return bitCast<float>(0x7f800000 | (mantissa << 17));
418     }
419     else
420     {
421         if (exponent != 0)
422         {
423             // normalized
424         }
425         else if (mantissa != 0)
426         {
427             // The value is denormalized
428             exponent = 1;
429 
430             do
431             {
432                 exponent--;
433                 mantissa <<= 1;
434             } while ((mantissa & 0x40) == 0);
435 
436             mantissa = mantissa & 0x3F;
437         }
438         else  // The value is zero
439         {
440             exponent = static_cast<unsigned short>(-112);
441         }
442 
443         return bitCast<float>(((exponent + 112) << 23) | (mantissa << 17));
444     }
445 }
446 
float10ToFloat32(unsigned short fp10)447 inline float float10ToFloat32(unsigned short fp10)
448 {
449     unsigned short exponent = (fp10 >> 5) & 0x1F;
450     unsigned short mantissa = fp10 & 0x1F;
451 
452     if (exponent == 0x1F)
453     {
454         // INF or NAN
455         return bitCast<float>(0x7f800000 | (mantissa << 17));
456     }
457     else
458     {
459         if (exponent != 0)
460         {
461             // normalized
462         }
463         else if (mantissa != 0)
464         {
465             // The value is denormalized
466             exponent = 1;
467 
468             do
469             {
470                 exponent--;
471                 mantissa <<= 1;
472             } while ((mantissa & 0x20) == 0);
473 
474             mantissa = mantissa & 0x1F;
475         }
476         else  // The value is zero
477         {
478             exponent = static_cast<unsigned short>(-112);
479         }
480 
481         return bitCast<float>(((exponent + 112) << 23) | (mantissa << 18));
482     }
483 }
484 
485 // Converts to and from float and 16.16 fixed point format.
ConvertFixedToFloat(int32_t fixedInput)486 inline float ConvertFixedToFloat(int32_t fixedInput)
487 {
488     return static_cast<float>(fixedInput) / 65536.0f;
489 }
490 
ConvertFloatToFixed(float floatInput)491 inline uint32_t ConvertFloatToFixed(float floatInput)
492 {
493     static constexpr uint32_t kHighest = 32767 * 65536 + 65535;
494     static constexpr uint32_t kLowest  = static_cast<uint32_t>(-32768 * 65536 + 65535);
495 
496     if (floatInput > 32767.65535)
497     {
498         return kHighest;
499     }
500     else if (floatInput < -32768.65535)
501     {
502         return kLowest;
503     }
504     else
505     {
506         return static_cast<uint32_t>(floatInput * 65536);
507     }
508 }
509 
510 template <typename T>
normalizedToFloat(T input)511 inline float normalizedToFloat(T input)
512 {
513     static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
514 
515     if constexpr (sizeof(T) > 2)
516     {
517         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
518         constexpr double inverseMax = 1.0 / std::numeric_limits<T>::max();
519         if constexpr (std::is_signed<T>::value)
520         {
521             static_assert(static_cast<float>(std::numeric_limits<T>::min() * inverseMax) == -1.0f);
522         }
523         return static_cast<float>(input * inverseMax);
524     }
525     else
526     {
527         constexpr float inverseMax = 1.0f / std::numeric_limits<T>::max();
528         if constexpr (std::is_signed<T>::value)
529         {
530             // If the input is signed and equals to the type's min value, the multiplication result
531             // would be less than -1. This step is not needed for int32_t because the difference is
532             // not representable with single-precision floats in that case. For the best codegen,
533             // std::max with the first constant parameter must be used here.
534             return std::max(-1.0f, input * inverseMax);
535         }
536         return input * inverseMax;
537     }
538 }
539 
540 template <unsigned int inputBitCount, typename T>
normalizedToFloat(T input)541 inline float normalizedToFloat(T input)
542 {
543     static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
544     static_assert(inputBitCount > 0u && inputBitCount < 32u);
545     if constexpr (std::is_signed<T>::value)
546     {
547         static_assert(inputBitCount > 1 && inputBitCount < sizeof(T) * 8 - 1);
548     }
549     else
550     {
551         static_assert(inputBitCount < sizeof(T) * 8);
552     }
553 
554     // Account for the sign bit
555     constexpr uint32_t effectiveBitCount =
556         std::is_unsigned<T>::value ? inputBitCount : inputBitCount - 1u;
557 
558     constexpr T maxValue = static_cast<T>((1u << effectiveBitCount) - 1u);
559 
560     // Ensure that the input value fits in the declared number of bits.
561     ASSERT(input <= maxValue);
562     if constexpr (std::is_signed<T>::value)
563     {
564         ASSERT(input >= -maxValue - 1);
565     }
566 
567     if constexpr (effectiveBitCount > 23)
568     {
569         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
570         constexpr double inverseMax = 1.0 / maxValue;
571         if constexpr (std::is_signed<T>::value)
572         {
573             if constexpr (effectiveBitCount < 25)
574             {
575                 return std::max(-1.0f, static_cast<float>(input * inverseMax));
576             }
577             else
578             {
579                 static_assert(static_cast<float>((-maxValue - 1) * inverseMax) == -1.0f);
580             }
581         }
582         return static_cast<float>(input * inverseMax);
583     }
584     else
585     {
586         constexpr float inverseMax = 1.0f / maxValue;
587         if constexpr (std::is_signed<T>::value)
588         {
589             return std::max(-1.0f, input * inverseMax);
590         }
591         return input * inverseMax;
592     }
593 }
594 
595 template <typename T, typename R>
roundToNearest(T input)596 inline R roundToNearest(T input)
597 {
598     static_assert(std::is_floating_point<T>::value);
599     static_assert(std::numeric_limits<R>::is_integer);
600 #if defined(__aarch64__) || defined(_M_ARM64)
601     // On armv8, this expression is compiled to a dedicated round-to-nearest instruction
602     return static_cast<R>(std::round(input));
603 #else
604     static_assert(0.49999997f < 0.5f);
605     static_assert(0.49999997f + 0.5f == 1.0f);
606     static_assert(0.49999999999999994 < 0.5);
607     static_assert(0.49999999999999994 + 0.5 == 1.0);
608     constexpr T bias = sizeof(T) == 8 ? 0.49999999999999994 : 0.49999997f;
609     return static_cast<R>(input + (std::is_signed<R>::value ? std::copysign(bias, input) : bias));
610 #endif
611 }
612 
613 template <typename T>
floatToNormalized(float input)614 inline T floatToNormalized(float input)
615 {
616     if constexpr (sizeof(T) > 2)
617     {
618         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
619         return roundToNearest<double, T>(std::numeric_limits<T>::max() *
620                                          static_cast<double>(input));
621     }
622     else
623     {
624         return roundToNearest<float, T>(std::numeric_limits<T>::max() * input);
625     }
626 }
627 
628 template <unsigned int outputBitCount, typename T>
floatToNormalized(float input)629 inline T floatToNormalized(float input)
630 {
631     static_assert(outputBitCount < (sizeof(T) * 8), "T must have more bits than outputBitCount.");
632     static_assert(outputBitCount > (std::is_unsigned<T>::value ? 0 : 1),
633                   "outputBitCount must be at least 1 not counting the sign bit.");
634     constexpr unsigned int bits = std::is_unsigned<T>::value ? outputBitCount : outputBitCount - 1;
635 
636     if (bits > 23)
637     {
638         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
639         return roundToNearest<double, T>(((1 << bits) - 1) * static_cast<double>(input));
640     }
641     else
642     {
643         return roundToNearest<float, T>(((1 << bits) - 1) * input);
644     }
645 }
646 
647 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
getShiftedData(T input)648 inline T getShiftedData(T input)
649 {
650     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
651                   "T must have at least as many bits as inputBitCount + inputBitStart.");
652     const T mask = (1 << inputBitCount) - 1;
653     return (input >> inputBitStart) & mask;
654 }
655 
656 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
shiftData(T input)657 inline T shiftData(T input)
658 {
659     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
660                   "T must have at least as many bits as inputBitCount + inputBitStart.");
661     const T mask = (1 << inputBitCount) - 1;
662     return (input & mask) << inputBitStart;
663 }
664 
CountLeadingZeros(uint32_t x)665 inline unsigned int CountLeadingZeros(uint32_t x)
666 {
667     // Use binary search to find the amount of leading zeros.
668     unsigned int zeros = 32u;
669     uint32_t y;
670 
671     y = x >> 16u;
672     if (y != 0)
673     {
674         zeros = zeros - 16u;
675         x     = y;
676     }
677     y = x >> 8u;
678     if (y != 0)
679     {
680         zeros = zeros - 8u;
681         x     = y;
682     }
683     y = x >> 4u;
684     if (y != 0)
685     {
686         zeros = zeros - 4u;
687         x     = y;
688     }
689     y = x >> 2u;
690     if (y != 0)
691     {
692         zeros = zeros - 2u;
693         x     = y;
694     }
695     y = x >> 1u;
696     if (y != 0)
697     {
698         return zeros - 2u;
699     }
700     return zeros - x;
701 }
702 
average(unsigned char a,unsigned char b)703 inline unsigned char average(unsigned char a, unsigned char b)
704 {
705     return ((a ^ b) >> 1) + (a & b);
706 }
707 
average(signed char a,signed char b)708 inline signed char average(signed char a, signed char b)
709 {
710     return ((short)a + (short)b) / 2;
711 }
712 
average(unsigned short a,unsigned short b)713 inline unsigned short average(unsigned short a, unsigned short b)
714 {
715     return ((a ^ b) >> 1) + (a & b);
716 }
717 
average(signed short a,signed short b)718 inline signed short average(signed short a, signed short b)
719 {
720     return ((int)a + (int)b) / 2;
721 }
722 
average(unsigned int a,unsigned int b)723 inline unsigned int average(unsigned int a, unsigned int b)
724 {
725     return ((a ^ b) >> 1) + (a & b);
726 }
727 
average(int a,int b)728 inline int average(int a, int b)
729 {
730     long long average = (static_cast<long long>(a) + static_cast<long long>(b)) / 2LL;
731     return static_cast<int>(average);
732 }
733 
average(float a,float b)734 inline float average(float a, float b)
735 {
736     return (a + b) * 0.5f;
737 }
738 
averageHalfFloat(unsigned short a,unsigned short b)739 inline unsigned short averageHalfFloat(unsigned short a, unsigned short b)
740 {
741     return float32ToFloat16((float16ToFloat32(a) + float16ToFloat32(b)) * 0.5f);
742 }
743 
averageFloat11(unsigned int a,unsigned int b)744 inline unsigned int averageFloat11(unsigned int a, unsigned int b)
745 {
746     return float32ToFloat11((float11ToFloat32(static_cast<unsigned short>(a)) +
747                              float11ToFloat32(static_cast<unsigned short>(b))) *
748                             0.5f);
749 }
750 
averageFloat10(unsigned int a,unsigned int b)751 inline unsigned int averageFloat10(unsigned int a, unsigned int b)
752 {
753     return float32ToFloat10((float10ToFloat32(static_cast<unsigned short>(a)) +
754                              float10ToFloat32(static_cast<unsigned short>(b))) *
755                             0.5f);
756 }
757 
758 template <typename T>
759 class Range
760 {
761   public:
Range()762     Range() {}
Range(T lo,T hi)763     Range(T lo, T hi) : mLow(lo), mHigh(hi) {}
764 
765     bool operator==(const Range<T> &other) const
766     {
767         return mLow == other.mLow && mHigh == other.mHigh;
768     }
769 
length()770     T length() const { return (empty() ? 0 : (mHigh - mLow)); }
771 
intersects(const Range<T> & other)772     bool intersects(const Range<T> &other) const
773     {
774         if (mLow <= other.mLow)
775         {
776             return other.mLow < mHigh;
777         }
778         else
779         {
780             return mLow < other.mHigh;
781         }
782     }
783 
intersectsOrContinuous(const Range<T> & other)784     bool intersectsOrContinuous(const Range<T> &other) const
785     {
786         ASSERT(!empty());
787         ASSERT(!other.empty());
788         if (mLow <= other.mLow)
789         {
790             return mHigh >= other.mLow;
791         }
792         else
793         {
794             return mLow <= other.mHigh;
795         }
796     }
797 
merge(const Range<T> & other)798     void merge(const Range<T> &other)
799     {
800         if (mLow > other.mLow)
801         {
802             mLow = other.mLow;
803         }
804 
805         if (mHigh < other.mHigh)
806         {
807             mHigh = other.mHigh;
808         }
809     }
810 
811     // Assumes that end is non-inclusive.. for example, extending to 5 will make "end" 6.
extend(T value)812     void extend(T value)
813     {
814         mLow  = value < mLow ? value : mLow;
815         mHigh = value >= mHigh ? (value + 1) : mHigh;
816     }
817 
empty()818     bool empty() const { return mHigh <= mLow; }
819 
contains(T value)820     bool contains(T value) const { return value >= mLow && value < mHigh; }
821 
822     class Iterator final
823     {
824       public:
Iterator(T value)825         Iterator(T value) : mCurrent(value) {}
826 
827         Iterator &operator++()
828         {
829             mCurrent++;
830             return *this;
831         }
832         bool operator==(const Iterator &other) const { return mCurrent == other.mCurrent; }
833         bool operator!=(const Iterator &other) const { return mCurrent != other.mCurrent; }
834         T operator*() const { return mCurrent; }
835 
836       private:
837         T mCurrent;
838     };
839 
begin()840     Iterator begin() const { return Iterator(mLow); }
841 
end()842     Iterator end() const { return Iterator(mHigh); }
843 
low()844     T low() const { return mLow; }
high()845     T high() const { return mHigh; }
846 
invalidate()847     void invalidate()
848     {
849         mLow  = std::numeric_limits<T>::max();
850         mHigh = std::numeric_limits<T>::min();
851     }
852 
853   private:
854     T mLow;
855     T mHigh;
856 };
857 
858 typedef Range<int> RangeI;
859 typedef Range<unsigned int> RangeUI;
860 static_assert(std::is_trivially_copyable<RangeUI>(),
861               "RangeUI should be trivial copyable so that we can memcpy");
862 
863 struct IndexRange
864 {
865     struct Undefined
866     {};
IndexRangeIndexRange867     IndexRange(Undefined) {}
IndexRangeIndexRange868     IndexRange() : IndexRange(0, 0, 0) {}
IndexRangeIndexRange869     IndexRange(size_t start_, size_t end_, size_t vertexIndexCount_)
870         : start(start_), end(end_), vertexIndexCount(vertexIndexCount_)
871     {
872         ASSERT(start <= end);
873     }
874 
875     // Number of vertices in the range.
vertexCountIndexRange876     size_t vertexCount() const { return (end - start) + 1; }
877 
878     // Inclusive range of indices that are not primitive restart
879     size_t start;
880     size_t end;
881 
882     // Number of non-primitive restart indices
883     size_t vertexIndexCount;
884 };
885 
886 // Combine a floating-point value representing a mantissa (x) and an integer exponent (exp) into a
887 // floating-point value. As in GLSL ldexp() built-in.
Ldexp(float x,int exp)888 inline float Ldexp(float x, int exp)
889 {
890     if (exp > 128)
891     {
892         return std::numeric_limits<float>::infinity();
893     }
894     if (exp < -126)
895     {
896         return 0.0f;
897     }
898     double result = static_cast<double>(x) * std::pow(2.0, static_cast<double>(exp));
899     return static_cast<float>(result);
900 }
901 
902 // First, both normalized floating-point values are converted into 16-bit integer values.
903 // Then, the results are packed into the returned 32-bit unsigned integer.
904 // The first float value will be written to the least significant bits of the output;
905 // the last float value will be written to the most significant bits.
906 // The conversion of each value to fixed point is done as follows :
907 // packSnorm2x16 : round(clamp(c, -1, +1) * 32767.0)
packSnorm2x16(float f1,float f2)908 inline uint32_t packSnorm2x16(float f1, float f2)
909 {
910     int16_t leastSignificantBits = static_cast<int16_t>(roundf(clamp(f1, -1.0f, 1.0f) * 32767.0f));
911     int16_t mostSignificantBits  = static_cast<int16_t>(roundf(clamp(f2, -1.0f, 1.0f) * 32767.0f));
912     return static_cast<uint32_t>(mostSignificantBits) << 16 |
913            (static_cast<uint32_t>(leastSignificantBits) & 0xFFFF);
914 }
915 
916 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
917 // each component is converted to a normalized floating-point value to generate the returned two
918 // float values. The first float value will be extracted from the least significant bits of the
919 // input; the last float value will be extracted from the most-significant bits. The conversion for
920 // unpacked fixed-point value to floating point is done as follows: unpackSnorm2x16 : clamp(f /
921 // 32767.0, -1, +1)
unpackSnorm2x16(uint32_t u,float * f1,float * f2)922 inline void unpackSnorm2x16(uint32_t u, float *f1, float *f2)
923 {
924     int16_t leastSignificantBits = static_cast<int16_t>(u & 0xFFFF);
925     int16_t mostSignificantBits  = static_cast<int16_t>(u >> 16);
926     *f1 = clamp(static_cast<float>(leastSignificantBits) / 32767.0f, -1.0f, 1.0f);
927     *f2 = clamp(static_cast<float>(mostSignificantBits) / 32767.0f, -1.0f, 1.0f);
928 }
929 
930 // First, both normalized floating-point values are converted into 16-bit integer values.
931 // Then, the results are packed into the returned 32-bit unsigned integer.
932 // The first float value will be written to the least significant bits of the output;
933 // the last float value will be written to the most significant bits.
934 // The conversion of each value to fixed point is done as follows:
935 // packUnorm2x16 : round(clamp(c, 0, +1) * 65535.0)
packUnorm2x16(float f1,float f2)936 inline uint32_t packUnorm2x16(float f1, float f2)
937 {
938     uint16_t leastSignificantBits = static_cast<uint16_t>(roundf(clamp(f1, 0.0f, 1.0f) * 65535.0f));
939     uint16_t mostSignificantBits  = static_cast<uint16_t>(roundf(clamp(f2, 0.0f, 1.0f) * 65535.0f));
940     return static_cast<uint32_t>(mostSignificantBits) << 16 |
941            static_cast<uint32_t>(leastSignificantBits);
942 }
943 
944 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
945 // each component is converted to a normalized floating-point value to generate the returned two
946 // float values. The first float value will be extracted from the least significant bits of the
947 // input; the last float value will be extracted from the most-significant bits. The conversion for
948 // unpacked fixed-point value to floating point is done as follows: unpackUnorm2x16 : f / 65535.0
unpackUnorm2x16(uint32_t u,float * f1,float * f2)949 inline void unpackUnorm2x16(uint32_t u, float *f1, float *f2)
950 {
951     uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
952     uint16_t mostSignificantBits  = static_cast<uint16_t>(u >> 16);
953     *f1                           = static_cast<float>(leastSignificantBits) / 65535.0f;
954     *f2                           = static_cast<float>(mostSignificantBits) / 65535.0f;
955 }
956 
957 // Helper functions intended to be used only here.
958 namespace priv
959 {
960 
ToPackedUnorm8(float f)961 inline uint8_t ToPackedUnorm8(float f)
962 {
963     return static_cast<uint8_t>(roundf(clamp(f, 0.0f, 1.0f) * 255.0f));
964 }
965 
ToPackedSnorm8(float f)966 inline int8_t ToPackedSnorm8(float f)
967 {
968     return static_cast<int8_t>(roundf(clamp(f, -1.0f, 1.0f) * 127.0f));
969 }
970 
971 }  // namespace priv
972 
973 // Packs 4 normalized unsigned floating-point values to a single 32-bit unsigned integer. Works
974 // similarly to packUnorm2x16. The floats are clamped to the range 0.0 to 1.0, and written to the
975 // unsigned integer starting from the least significant bits.
PackUnorm4x8(float f1,float f2,float f3,float f4)976 inline uint32_t PackUnorm4x8(float f1, float f2, float f3, float f4)
977 {
978     uint8_t bits[4];
979     bits[0]         = priv::ToPackedUnorm8(f1);
980     bits[1]         = priv::ToPackedUnorm8(f2);
981     bits[2]         = priv::ToPackedUnorm8(f3);
982     bits[3]         = priv::ToPackedUnorm8(f4);
983     uint32_t result = 0u;
984     for (int i = 0; i < 4; ++i)
985     {
986         int shift = i * 8;
987         result |= (static_cast<uint32_t>(bits[i]) << shift);
988     }
989     return result;
990 }
991 
992 // Unpacks 4 normalized unsigned floating-point values from a single 32-bit unsigned integer into f.
993 // Works similarly to unpackUnorm2x16. The floats are unpacked starting from the least significant
994 // bits.
UnpackUnorm4x8(uint32_t u,float * f)995 inline void UnpackUnorm4x8(uint32_t u, float *f)
996 {
997     for (int i = 0; i < 4; ++i)
998     {
999         int shift    = i * 8;
1000         uint8_t bits = static_cast<uint8_t>((u >> shift) & 0xFF);
1001         f[i]         = static_cast<float>(bits) / 255.0f;
1002     }
1003 }
1004 
1005 // Packs 4 normalized signed floating-point values to a single 32-bit unsigned integer. The floats
1006 // are clamped to the range -1.0 to 1.0, and written to the unsigned integer starting from the least
1007 // significant bits.
PackSnorm4x8(float f1,float f2,float f3,float f4)1008 inline uint32_t PackSnorm4x8(float f1, float f2, float f3, float f4)
1009 {
1010     int8_t bits[4];
1011     bits[0]         = priv::ToPackedSnorm8(f1);
1012     bits[1]         = priv::ToPackedSnorm8(f2);
1013     bits[2]         = priv::ToPackedSnorm8(f3);
1014     bits[3]         = priv::ToPackedSnorm8(f4);
1015     uint32_t result = 0u;
1016     for (int i = 0; i < 4; ++i)
1017     {
1018         int shift = i * 8;
1019         result |= ((static_cast<uint32_t>(bits[i]) & 0xFF) << shift);
1020     }
1021     return result;
1022 }
1023 
1024 // Unpacks 4 normalized signed floating-point values from a single 32-bit unsigned integer into f.
1025 // Works similarly to unpackSnorm2x16. The floats are unpacked starting from the least significant
1026 // bits, and clamped to the range -1.0 to 1.0.
UnpackSnorm4x8(uint32_t u,float * f)1027 inline void UnpackSnorm4x8(uint32_t u, float *f)
1028 {
1029     for (int i = 0; i < 4; ++i)
1030     {
1031         int shift   = i * 8;
1032         int8_t bits = static_cast<int8_t>((u >> shift) & 0xFF);
1033         f[i]        = clamp(static_cast<float>(bits) / 127.0f, -1.0f, 1.0f);
1034     }
1035 }
1036 
1037 // Returns an unsigned integer obtained by converting the two floating-point values to the 16-bit
1038 // floating-point representation found in the OpenGL ES Specification, and then packing these
1039 // two 16-bit integers into a 32-bit unsigned integer.
1040 // f1: The 16 least-significant bits of the result;
1041 // f2: The 16 most-significant bits.
packHalf2x16(float f1,float f2)1042 inline uint32_t packHalf2x16(float f1, float f2)
1043 {
1044     uint16_t leastSignificantBits = static_cast<uint16_t>(float32ToFloat16(f1));
1045     uint16_t mostSignificantBits  = static_cast<uint16_t>(float32ToFloat16(f2));
1046     return static_cast<uint32_t>(mostSignificantBits) << 16 |
1047            static_cast<uint32_t>(leastSignificantBits);
1048 }
1049 
1050 // Returns two floating-point values obtained by unpacking a 32-bit unsigned integer into a pair of
1051 // 16-bit values, interpreting those values as 16-bit floating-point numbers according to the OpenGL
1052 // ES Specification, and converting them to 32-bit floating-point values. The first float value is
1053 // obtained from the 16 least-significant bits of u; the second component is obtained from the 16
1054 // most-significant bits of u.
unpackHalf2x16(uint32_t u,float * f1,float * f2)1055 inline void unpackHalf2x16(uint32_t u, float *f1, float *f2)
1056 {
1057     uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
1058     uint16_t mostSignificantBits  = static_cast<uint16_t>(u >> 16);
1059 
1060     *f1 = float16ToFloat32(leastSignificantBits);
1061     *f2 = float16ToFloat32(mostSignificantBits);
1062 }
1063 
sRGBToLinear(uint8_t srgbValue)1064 inline float sRGBToLinear(uint8_t srgbValue)
1065 {
1066     float value = srgbValue / 255.0f;
1067     if (value <= 0.04045f)
1068     {
1069         value = value / 12.92f;
1070     }
1071     else
1072     {
1073         value = std::pow((value + 0.055f) / 1.055f, 2.4f);
1074     }
1075     ASSERT(value >= 0.0f && value <= 1.0f);
1076     return value;
1077 }
1078 
linearToSRGB(float value)1079 inline uint8_t linearToSRGB(float value)
1080 {
1081     ASSERT(value >= 0.0f && value <= 1.0f);
1082     if (value < 0.0031308f)
1083     {
1084         value = value * 12.92f;
1085     }
1086     else
1087     {
1088         value = std::pow(value, 0.41666f) * 1.055f - 0.055f;
1089     }
1090     return static_cast<uint8_t>(value * 255.0f + 0.5f);
1091 }
1092 
1093 // Reverse the order of the bits.
BitfieldReverse(uint32_t value)1094 inline uint32_t BitfieldReverse(uint32_t value)
1095 {
1096     // TODO(oetuaho@nvidia.com): Optimize this if needed. There don't seem to be compiler intrinsics
1097     // for this, and right now it's not used in performance-critical paths.
1098     uint32_t result = 0u;
1099     for (size_t j = 0u; j < 32u; ++j)
1100     {
1101         result |= (((value >> j) & 1u) << (31u - j));
1102     }
1103     return result;
1104 }
1105 
1106 // Count the 1 bits.
1107 #if defined(_MSC_VER) && !defined(__clang__)
1108 #    if defined(_M_IX86) || defined(_M_X64)
1109 namespace priv
1110 {
1111 // Check POPCNT instruction support and cache the result.
1112 // https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64#remarks
1113 static const bool kHasPopcnt = [] {
1114     int info[4];
1115     __cpuid(&info[0], 1);
1116     return static_cast<bool>(info[2] & 0x800000);
1117 }();
1118 }  // namespace priv
1119 
1120 // Polyfills for x86/x64 CPUs without POPCNT.
1121 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
BitCountPolyfill(uint32_t bits)1122 inline int BitCountPolyfill(uint32_t bits)
1123 {
1124     bits = bits - ((bits >> 1) & 0x55555555);
1125     bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
1126     bits = ((bits + (bits >> 4) & 0x0F0F0F0F) * 0x01010101) >> 24;
1127     return static_cast<int>(bits);
1128 }
1129 
BitCountPolyfill(uint64_t bits)1130 inline int BitCountPolyfill(uint64_t bits)
1131 {
1132     bits = bits - ((bits >> 1) & 0x5555555555555555ull);
1133     bits = (bits & 0x3333333333333333ull) + ((bits >> 2) & 0x3333333333333333ull);
1134     bits = ((bits + (bits >> 4) & 0x0F0F0F0F0F0F0F0Full) * 0x0101010101010101ull) >> 56;
1135     return static_cast<int>(bits);
1136 }
1137 
BitCount(uint32_t bits)1138 inline int BitCount(uint32_t bits)
1139 {
1140     if (priv::kHasPopcnt)
1141     {
1142         return static_cast<int>(__popcnt(bits));
1143     }
1144     return BitCountPolyfill(bits);
1145 }
1146 
BitCount(uint64_t bits)1147 inline int BitCount(uint64_t bits)
1148 {
1149     if (priv::kHasPopcnt)
1150     {
1151 #        if defined(_M_X64)
1152         return static_cast<int>(__popcnt64(bits));
1153 #        else   // x86
1154         return static_cast<int>(__popcnt(static_cast<uint32_t>(bits >> 32)) +
1155                                 __popcnt(static_cast<uint32_t>(bits)));
1156 #        endif  // defined(_M_X64)
1157     }
1158     return BitCountPolyfill(bits);
1159 }
1160 
1161 #    elif defined(_M_ARM) || defined(_M_ARM64)
1162 
1163 // MSVC's _CountOneBits* intrinsics are not defined for ARM64, moreover they do not use dedicated
1164 // NEON instructions.
1165 
BitCount(uint32_t bits)1166 inline int BitCount(uint32_t bits)
1167 {
1168     // cast bits to 8x8 datatype and use VCNT on it
1169     const uint8x8_t vsum = vcnt_u8(vcreate_u8(static_cast<uint64_t>(bits)));
1170 
1171     // pairwise sums: 8x8 -> 16x4 -> 32x2
1172     return static_cast<int>(vget_lane_u32(vpaddl_u16(vpaddl_u8(vsum)), 0));
1173 }
1174 
BitCount(uint64_t bits)1175 inline int BitCount(uint64_t bits)
1176 {
1177     // cast bits to 8x8 datatype and use VCNT on it
1178     const uint8x8_t vsum = vcnt_u8(vcreate_u8(bits));
1179 
1180     // pairwise sums: 8x8 -> 16x4 -> 32x2 -> 64x1
1181     return static_cast<int>(vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(vsum))), 0));
1182 }
1183 #    endif  // defined(_M_IX86) || defined(_M_X64)
1184 #endif      // defined(_MSC_VER) && !defined(__clang__)
1185 
1186 #if defined(ANGLE_PLATFORM_POSIX) || defined(__clang__) || defined(__GNUC__)
BitCount(uint32_t bits)1187 inline int BitCount(uint32_t bits)
1188 {
1189     return __builtin_popcount(bits);
1190 }
1191 
BitCount(uint64_t bits)1192 inline int BitCount(uint64_t bits)
1193 {
1194     return __builtin_popcountll(bits);
1195 }
1196 #endif  // defined(ANGLE_PLATFORM_POSIX) || defined(__clang__) || defined(__GNUC__)
1197 
BitCount(uint8_t bits)1198 inline int BitCount(uint8_t bits)
1199 {
1200     return BitCount(static_cast<uint32_t>(bits));
1201 }
1202 
BitCount(uint16_t bits)1203 inline int BitCount(uint16_t bits)
1204 {
1205     return BitCount(static_cast<uint32_t>(bits));
1206 }
1207 
1208 #if defined(ANGLE_PLATFORM_WINDOWS)
1209 // Return the index of the least significant bit set. Indexing is such that bit 0 is the least
1210 // significant bit. Implemented for different bit widths on different platforms.
ScanForward(uint32_t bits)1211 inline unsigned long ScanForward(uint32_t bits)
1212 {
1213     ASSERT(bits != 0u);
1214     unsigned long firstBitIndex = 0ul;
1215     unsigned char ret           = _BitScanForward(&firstBitIndex, bits);
1216     ASSERT(ret != 0u);
1217     return firstBitIndex;
1218 }
1219 
ScanForward(uint64_t bits)1220 inline unsigned long ScanForward(uint64_t bits)
1221 {
1222     ASSERT(bits != 0u);
1223     unsigned long firstBitIndex = 0ul;
1224 #    if defined(ANGLE_IS_64_BIT_CPU)
1225     unsigned char ret = _BitScanForward64(&firstBitIndex, bits);
1226 #    else
1227     unsigned char ret;
1228     if (static_cast<uint32_t>(bits) == 0)
1229     {
1230         ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits >> 32));
1231         firstBitIndex += 32ul;
1232     }
1233     else
1234     {
1235         ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits));
1236     }
1237 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1238     ASSERT(ret != 0u);
1239     return firstBitIndex;
1240 }
1241 
1242 // Return the index of the most significant bit set. Indexing is such that bit 0 is the least
1243 // significant bit.
ScanReverse(uint32_t bits)1244 inline unsigned long ScanReverse(uint32_t bits)
1245 {
1246     ASSERT(bits != 0u);
1247     unsigned long lastBitIndex = 0ul;
1248     unsigned char ret          = _BitScanReverse(&lastBitIndex, bits);
1249     ASSERT(ret != 0u);
1250     return lastBitIndex;
1251 }
1252 
ScanReverse(uint64_t bits)1253 inline unsigned long ScanReverse(uint64_t bits)
1254 {
1255     ASSERT(bits != 0u);
1256     unsigned long lastBitIndex = 0ul;
1257 #    if defined(ANGLE_IS_64_BIT_CPU)
1258     unsigned char ret = _BitScanReverse64(&lastBitIndex, bits);
1259 #    else
1260     unsigned char ret;
1261     if (static_cast<uint32_t>(bits >> 32) == 0)
1262     {
1263         ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits));
1264     }
1265     else
1266     {
1267         ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits >> 32));
1268         lastBitIndex += 32ul;
1269     }
1270 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1271     ASSERT(ret != 0u);
1272     return lastBitIndex;
1273 }
1274 #endif  // defined(ANGLE_PLATFORM_WINDOWS)
1275 
1276 #if defined(ANGLE_PLATFORM_POSIX)
ScanForward(uint32_t bits)1277 inline unsigned long ScanForward(uint32_t bits)
1278 {
1279     ASSERT(bits != 0u);
1280     return static_cast<unsigned long>(__builtin_ctz(bits));
1281 }
1282 
ScanForward(uint64_t bits)1283 inline unsigned long ScanForward(uint64_t bits)
1284 {
1285     ASSERT(bits != 0u);
1286 #    if defined(ANGLE_IS_64_BIT_CPU)
1287     return static_cast<unsigned long>(__builtin_ctzll(bits));
1288 #    else
1289     return static_cast<unsigned long>(static_cast<uint32_t>(bits) == 0
1290                                           ? __builtin_ctz(static_cast<uint32_t>(bits >> 32)) + 32
1291                                           : __builtin_ctz(static_cast<uint32_t>(bits)));
1292 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1293 }
1294 
ScanReverse(uint32_t bits)1295 inline unsigned long ScanReverse(uint32_t bits)
1296 {
1297     ASSERT(bits != 0u);
1298     return static_cast<unsigned long>(sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(bits));
1299 }
1300 
ScanReverse(uint64_t bits)1301 inline unsigned long ScanReverse(uint64_t bits)
1302 {
1303     ASSERT(bits != 0u);
1304 #    if defined(ANGLE_IS_64_BIT_CPU)
1305     return static_cast<unsigned long>(sizeof(uint64_t) * CHAR_BIT - 1 - __builtin_clzll(bits));
1306 #    else
1307     if (static_cast<uint32_t>(bits >> 32) == 0)
1308     {
1309         return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits));
1310     }
1311     else
1312     {
1313         return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits >> 32)) +
1314                32;
1315     }
1316 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1317 }
1318 #endif  // defined(ANGLE_PLATFORM_POSIX)
1319 
ScanForward(uint8_t bits)1320 inline unsigned long ScanForward(uint8_t bits)
1321 {
1322     return ScanForward(static_cast<uint32_t>(bits));
1323 }
1324 
ScanForward(uint16_t bits)1325 inline unsigned long ScanForward(uint16_t bits)
1326 {
1327     return ScanForward(static_cast<uint32_t>(bits));
1328 }
1329 
ScanReverse(uint8_t bits)1330 inline unsigned long ScanReverse(uint8_t bits)
1331 {
1332     return ScanReverse(static_cast<uint32_t>(bits));
1333 }
1334 
ScanReverse(uint16_t bits)1335 inline unsigned long ScanReverse(uint16_t bits)
1336 {
1337     return ScanReverse(static_cast<uint32_t>(bits));
1338 }
1339 
1340 // Returns -1 on 0, otherwise the index of the least significant 1 bit as in GLSL.
1341 template <typename T>
FindLSB(T bits)1342 int FindLSB(T bits)
1343 {
1344     static_assert(std::is_integral<T>::value, "must be integral type.");
1345     if (bits == 0u)
1346     {
1347         return -1;
1348     }
1349     else
1350     {
1351         return static_cast<int>(ScanForward(bits));
1352     }
1353 }
1354 
1355 // Returns -1 on 0, otherwise the index of the most significant 1 bit as in GLSL.
1356 template <typename T>
FindMSB(T bits)1357 int FindMSB(T bits)
1358 {
1359     static_assert(std::is_integral<T>::value, "must be integral type.");
1360     if (bits == 0u)
1361     {
1362         return -1;
1363     }
1364     else
1365     {
1366         return static_cast<int>(ScanReverse(bits));
1367     }
1368 }
1369 
1370 // Returns whether the argument is Not a Number.
1371 // IEEE 754 single precision NaN representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1372 // non-zero.
isNaN(float f)1373 inline bool isNaN(float f)
1374 {
1375     // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1376     // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1377     return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1378            (bitCast<uint32_t>(f) & 0x7fffffu);
1379 }
1380 
1381 // Returns whether the argument is infinity.
1382 // IEEE 754 single precision infinity representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1383 // zero.
isInf(float f)1384 inline bool isInf(float f)
1385 {
1386     // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1387     // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1388     return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1389            !(bitCast<uint32_t>(f) & 0x7fffffu);
1390 }
1391 
1392 namespace priv
1393 {
1394 template <unsigned int N, unsigned int R>
1395 struct iSquareRoot
1396 {
solveiSquareRoot1397     static constexpr unsigned int solve()
1398     {
1399         return (R * R > N)
1400                    ? 0
1401                    : ((R * R == N) ? R : static_cast<unsigned int>(iSquareRoot<N, R + 1>::value));
1402     }
1403     enum Result
1404     {
1405         value = iSquareRoot::solve()
1406     };
1407 };
1408 
1409 template <unsigned int N>
1410 struct iSquareRoot<N, N>
1411 {
1412     enum result
1413     {
1414         value = N
1415     };
1416 };
1417 
1418 }  // namespace priv
1419 
1420 template <unsigned int N>
1421 constexpr unsigned int iSquareRoot()
1422 {
1423     return priv::iSquareRoot<N, 1>::value;
1424 }
1425 
1426 // Sum, difference and multiplication operations for signed ints that wrap on 32-bit overflow.
1427 //
1428 // Unsigned types are defined to do arithmetic modulo 2^n in C++. For signed types, overflow
1429 // behavior is undefined.
1430 
1431 template <typename T>
1432 inline T WrappingSum(T lhs, T rhs)
1433 {
1434     uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1435     uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1436     return static_cast<T>(lhsUnsigned + rhsUnsigned);
1437 }
1438 
1439 template <typename T>
1440 inline T WrappingDiff(T lhs, T rhs)
1441 {
1442     uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1443     uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1444     return static_cast<T>(lhsUnsigned - rhsUnsigned);
1445 }
1446 
1447 inline int32_t WrappingMul(int32_t lhs, int32_t rhs)
1448 {
1449     int64_t lhsWide = static_cast<int64_t>(lhs);
1450     int64_t rhsWide = static_cast<int64_t>(rhs);
1451     // The multiplication is guaranteed not to overflow.
1452     int64_t resultWide = lhsWide * rhsWide;
1453     // Implement the desired wrapping behavior by masking out the high-order 32 bits.
1454     resultWide = resultWide & 0xffffffffLL;
1455     // Casting to a narrower signed type is fine since the casted value is representable in the
1456     // narrower type.
1457     return static_cast<int32_t>(resultWide);
1458 }
1459 
1460 inline float scaleScreenDimensionToNdc(float dimensionScreen, float viewportDimension)
1461 {
1462     return 2.0f * dimensionScreen / viewportDimension;
1463 }
1464 
1465 inline float scaleScreenCoordinateToNdc(float coordinateScreen, float viewportDimension)
1466 {
1467     float halfShifted = coordinateScreen / viewportDimension;
1468     return 2.0f * (halfShifted - 0.5f);
1469 }
1470 
1471 }  // namespace gl
1472 
1473 namespace rx
1474 {
1475 
1476 template <typename T>
1477 T roundUp(const T value, const T alignment)
1478 {
1479     auto temp = value + alignment - static_cast<T>(1);
1480     return temp - temp % alignment;
1481 }
1482 
1483 template <typename T>
1484 constexpr T roundUpPow2(const T value, const T alignment)
1485 {
1486     ASSERT(gl::isPow2(alignment));
1487     return (value + alignment - 1) & ~(alignment - 1);
1488 }
1489 
1490 template <typename T>
1491 constexpr T roundDownPow2(const T value, const T alignment)
1492 {
1493     ASSERT(gl::isPow2(alignment));
1494     return value & ~(alignment - 1);
1495 }
1496 
1497 template <typename T>
1498 angle::CheckedNumeric<T> CheckedRoundUp(const T value, const T alignment)
1499 {
1500     angle::CheckedNumeric<T> checkedValue(value);
1501     angle::CheckedNumeric<T> checkedAlignment(alignment);
1502     return roundUp(checkedValue, checkedAlignment);
1503 }
1504 
1505 inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned int divisor)
1506 {
1507     unsigned int divided = value / divisor;
1508     return (divided + ((value % divisor == 0) ? 0 : 1));
1509 }
1510 
1511 #if defined(__has_builtin)
1512 #    define ANGLE_HAS_BUILTIN(x) __has_builtin(x)
1513 #else
1514 #    define ANGLE_HAS_BUILTIN(x) 0
1515 #endif
1516 
1517 #if defined(_MSC_VER)
1518 
1519 #    define ANGLE_ROTL(x, y) _rotl(x, y)
1520 #    define ANGLE_ROTL64(x, y) _rotl64(x, y)
1521 #    define ANGLE_ROTR16(x, y) _rotr16(x, y)
1522 
1523 #elif defined(__clang__) && ANGLE_HAS_BUILTIN(__builtin_rotateleft32) && \
1524     ANGLE_HAS_BUILTIN(__builtin_rotateleft64) && ANGLE_HAS_BUILTIN(__builtin_rotateright16)
1525 
1526 #    define ANGLE_ROTL(x, y) __builtin_rotateleft32(x, y)
1527 #    define ANGLE_ROTL64(x, y) __builtin_rotateleft64(x, y)
1528 #    define ANGLE_ROTR16(x, y) __builtin_rotateright16(x, y)
1529 
1530 #else
1531 
1532 inline uint32_t RotL(uint32_t x, int8_t r)
1533 {
1534     return (x << r) | (x >> (32 - r));
1535 }
1536 
1537 inline uint64_t RotL64(uint64_t x, int8_t r)
1538 {
1539     return (x << r) | (x >> (64 - r));
1540 }
1541 
1542 inline uint16_t RotR16(uint16_t x, int8_t r)
1543 {
1544     return (x >> r) | (x << (16 - r));
1545 }
1546 
1547 #    define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
1548 #    define ANGLE_ROTL64(x, y) ::rx::RotL64(x, y)
1549 #    define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
1550 
1551 #endif  // namespace rx
1552 
1553 constexpr unsigned int Log2(unsigned int bytes)
1554 {
1555     return bytes == 1 ? 0 : (1 + Log2(bytes / 2));
1556 }
1557 }  // namespace rx
1558 
1559 #endif  // COMMON_MATHUTIL_H_
1560