• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2002 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 
7 // mathutil.h: Math and bit manipulation functions.
8 
9 #ifndef COMMON_MATHUTIL_H_
10 #define COMMON_MATHUTIL_H_
11 
12 #include <math.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <algorithm>
17 #include <limits>
18 
19 #include <anglebase/numerics/safe_math.h>
20 
21 #include "common/debug.h"
22 #include "common/platform.h"
23 
24 namespace angle
25 {
26 using base::CheckedNumeric;
27 using base::IsValueInRangeForNumericType;
28 }  // namespace angle
29 
30 namespace gl
31 {
32 
33 const unsigned int Float32One   = 0x3F800000;
34 const unsigned short Float16One = 0x3C00;
35 
36 template <typename T>
isPow2(T x)37 inline constexpr bool isPow2(T x)
38 {
39     static_assert(std::is_integral<T>::value, "isPow2 must be called on an integer type.");
40     return (x & (x - 1)) == 0 && (x != 0);
41 }
42 
43 template <typename T>
log2(T x)44 inline int log2(T x)
45 {
46     static_assert(std::is_integral<T>::value, "log2 must be called on an integer type.");
47     int r = 0;
48     while ((x >> r) > 1)
49         r++;
50     return r;
51 }
52 
ceilPow2(unsigned int x)53 inline unsigned int ceilPow2(unsigned int x)
54 {
55     if (x != 0)
56         x--;
57     x |= x >> 1;
58     x |= x >> 2;
59     x |= x >> 4;
60     x |= x >> 8;
61     x |= x >> 16;
62     x++;
63 
64     return x;
65 }
66 
67 template <typename DestT, typename SrcT>
clampCast(SrcT value)68 inline DestT clampCast(SrcT value)
69 {
70     // For floating-point types with denormalization, min returns the minimum positive normalized
71     // value. To find the value that has no values less than it, use numeric_limits::lowest.
72     constexpr const long double destLo =
73         static_cast<long double>(std::numeric_limits<DestT>::lowest());
74     constexpr const long double destHi =
75         static_cast<long double>(std::numeric_limits<DestT>::max());
76     constexpr const long double srcLo =
77         static_cast<long double>(std::numeric_limits<SrcT>::lowest());
78     constexpr long double srcHi = static_cast<long double>(std::numeric_limits<SrcT>::max());
79 
80     if (destHi < srcHi)
81     {
82         DestT destMax = std::numeric_limits<DestT>::max();
83         if (value >= static_cast<SrcT>(destMax))
84         {
85             return destMax;
86         }
87     }
88 
89     if (destLo > srcLo)
90     {
91         DestT destLow = std::numeric_limits<DestT>::lowest();
92         if (value <= static_cast<SrcT>(destLow))
93         {
94             return destLow;
95         }
96     }
97 
98     return static_cast<DestT>(value);
99 }
100 
101 // Specialize clampCast for bool->int conversion to avoid MSVS 2015 performance warning when the max
102 // value is casted to the source type.
103 template <>
clampCast(bool value)104 inline unsigned int clampCast(bool value)
105 {
106     return static_cast<unsigned int>(value);
107 }
108 
109 template <>
clampCast(bool value)110 inline int clampCast(bool value)
111 {
112     return static_cast<int>(value);
113 }
114 
115 template <typename T, typename MIN, typename MAX>
clamp(T x,MIN min,MAX max)116 inline T clamp(T x, MIN min, MAX max)
117 {
118     // Since NaNs fail all comparison tests, a NaN value will default to min
119     return x > min ? (x > max ? max : x) : min;
120 }
121 
clamp01(float x)122 inline float clamp01(float x)
123 {
124     return clamp(x, 0.0f, 1.0f);
125 }
126 
127 template <const int n>
unorm(float x)128 inline unsigned int unorm(float x)
129 {
130     const unsigned int max = 0xFFFFFFFF >> (32 - n);
131 
132     if (x > 1)
133     {
134         return max;
135     }
136     else if (x < 0)
137     {
138         return 0;
139     }
140     else
141     {
142         return (unsigned int)(max * x + 0.5f);
143     }
144 }
145 
supportsSSE2()146 inline bool supportsSSE2()
147 {
148 #if defined(ANGLE_USE_SSE)
149     static bool checked  = false;
150     static bool supports = false;
151 
152     if (checked)
153     {
154         return supports;
155     }
156 
157 #    if defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
158     {
159         int info[4];
160         __cpuid(info, 0);
161 
162         if (info[0] >= 1)
163         {
164             __cpuid(info, 1);
165 
166             supports = (info[3] >> 26) & 1;
167         }
168     }
169 #    endif  // defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
170     checked = true;
171     return supports;
172 #else  // defined(ANGLE_USE_SSE)
173     return false;
174 #endif
175 }
176 
177 template <typename destType, typename sourceType>
bitCast(const sourceType & source)178 destType bitCast(const sourceType &source)
179 {
180     size_t copySize = std::min(sizeof(destType), sizeof(sourceType));
181     destType output;
182     memcpy(&output, &source, copySize);
183     return output;
184 }
185 
186 // https://stackoverflow.com/a/37581284
187 template <typename T>
normalize(T value)188 static constexpr double normalize(T value)
189 {
190     return value < 0 ? -static_cast<double>(value) / std::numeric_limits<T>::min()
191                      : static_cast<double>(value) / std::numeric_limits<T>::max();
192 }
193 
float32ToFloat16(float fp32)194 inline unsigned short float32ToFloat16(float fp32)
195 {
196     unsigned int fp32i = bitCast<unsigned int>(fp32);
197     unsigned int sign  = (fp32i & 0x80000000) >> 16;
198     unsigned int abs   = fp32i & 0x7FFFFFFF;
199 
200     if (abs > 0x7F800000)
201     {  // NaN
202         return 0x7FFF;
203     }
204     else if (abs > 0x47FFEFFF)
205     {  // Infinity
206         return static_cast<uint16_t>(sign | 0x7C00);
207     }
208     else if (abs < 0x38800000)  // Denormal
209     {
210         unsigned int mantissa = (abs & 0x007FFFFF) | 0x00800000;
211         int e                 = 113 - (abs >> 23);
212 
213         if (e < 24)
214         {
215             abs = mantissa >> e;
216         }
217         else
218         {
219             abs = 0;
220         }
221 
222         return static_cast<unsigned short>(sign | (abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
223     }
224     else
225     {
226         return static_cast<unsigned short>(
227             sign | (abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
228     }
229 }
230 
231 float float16ToFloat32(unsigned short h);
232 
233 unsigned int convertRGBFloatsTo999E5(float red, float green, float blue);
234 void convert999E5toRGBFloats(unsigned int input, float *red, float *green, float *blue);
235 
float32ToFloat11(float fp32)236 inline unsigned short float32ToFloat11(float fp32)
237 {
238     const unsigned int float32MantissaMask     = 0x7FFFFF;
239     const unsigned int float32ExponentMask     = 0x7F800000;
240     const unsigned int float32SignMask         = 0x80000000;
241     const unsigned int float32ValueMask        = ~float32SignMask;
242     const unsigned int float32ExponentFirstBit = 23;
243     const unsigned int float32ExponentBias     = 127;
244 
245     const unsigned short float11Max          = 0x7BF;
246     const unsigned short float11MantissaMask = 0x3F;
247     const unsigned short float11ExponentMask = 0x7C0;
248     const unsigned short float11BitMask      = 0x7FF;
249     const unsigned int float11ExponentBias   = 14;
250 
251     const unsigned int float32Maxfloat11       = 0x477E0000;
252     const unsigned int float32MinNormfloat11   = 0x38800000;
253     const unsigned int float32MinDenormfloat11 = 0x35000080;
254 
255     const unsigned int float32Bits = bitCast<unsigned int>(fp32);
256     const bool float32Sign         = (float32Bits & float32SignMask) == float32SignMask;
257 
258     unsigned int float32Val = float32Bits & float32ValueMask;
259 
260     if ((float32Val & float32ExponentMask) == float32ExponentMask)
261     {
262         // INF or NAN
263         if ((float32Val & float32MantissaMask) != 0)
264         {
265             return float11ExponentMask |
266                    (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
267                     float11MantissaMask);
268         }
269         else if (float32Sign)
270         {
271             // -INF is clamped to 0 since float11 is positive only
272             return 0;
273         }
274         else
275         {
276             return float11ExponentMask;
277         }
278     }
279     else if (float32Sign)
280     {
281         // float11 is positive only, so clamp to zero
282         return 0;
283     }
284     else if (float32Val > float32Maxfloat11)
285     {
286         // The number is too large to be represented as a float11, set to max
287         return float11Max;
288     }
289     else if (float32Val < float32MinDenormfloat11)
290     {
291         // The number is too small to be represented as a denormalized float11, set to 0
292         return 0;
293     }
294     else
295     {
296         if (float32Val < float32MinNormfloat11)
297         {
298             // The number is too small to be represented as a normalized float11
299             // Convert it to a denormalized value.
300             const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
301                                        (float32Val >> float32ExponentFirstBit);
302             ASSERT(shift < 32);
303             float32Val =
304                 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
305         }
306         else
307         {
308             // Rebias the exponent to represent the value as a normalized float11
309             float32Val += 0xC8000000;
310         }
311 
312         return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
313     }
314 }
315 
float32ToFloat10(float fp32)316 inline unsigned short float32ToFloat10(float fp32)
317 {
318     const unsigned int float32MantissaMask     = 0x7FFFFF;
319     const unsigned int float32ExponentMask     = 0x7F800000;
320     const unsigned int float32SignMask         = 0x80000000;
321     const unsigned int float32ValueMask        = ~float32SignMask;
322     const unsigned int float32ExponentFirstBit = 23;
323     const unsigned int float32ExponentBias     = 127;
324 
325     const unsigned short float10Max          = 0x3DF;
326     const unsigned short float10MantissaMask = 0x1F;
327     const unsigned short float10ExponentMask = 0x3E0;
328     const unsigned short float10BitMask      = 0x3FF;
329     const unsigned int float10ExponentBias   = 14;
330 
331     const unsigned int float32Maxfloat10       = 0x477C0000;
332     const unsigned int float32MinNormfloat10   = 0x38800000;
333     const unsigned int float32MinDenormfloat10 = 0x35800040;
334 
335     const unsigned int float32Bits = bitCast<unsigned int>(fp32);
336     const bool float32Sign         = (float32Bits & float32SignMask) == float32SignMask;
337 
338     unsigned int float32Val = float32Bits & float32ValueMask;
339 
340     if ((float32Val & float32ExponentMask) == float32ExponentMask)
341     {
342         // INF or NAN
343         if ((float32Val & float32MantissaMask) != 0)
344         {
345             return float10ExponentMask |
346                    (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
347                     float10MantissaMask);
348         }
349         else if (float32Sign)
350         {
351             // -INF is clamped to 0 since float10 is positive only
352             return 0;
353         }
354         else
355         {
356             return float10ExponentMask;
357         }
358     }
359     else if (float32Sign)
360     {
361         // float10 is positive only, so clamp to zero
362         return 0;
363     }
364     else if (float32Val > float32Maxfloat10)
365     {
366         // The number is too large to be represented as a float10, set to max
367         return float10Max;
368     }
369     else if (float32Val < float32MinDenormfloat10)
370     {
371         // The number is too small to be represented as a denormalized float10, set to 0
372         return 0;
373     }
374     else
375     {
376         if (float32Val < float32MinNormfloat10)
377         {
378             // The number is too small to be represented as a normalized float10
379             // Convert it to a denormalized value.
380             const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
381                                        (float32Val >> float32ExponentFirstBit);
382             ASSERT(shift < 32);
383             float32Val =
384                 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
385         }
386         else
387         {
388             // Rebias the exponent to represent the value as a normalized float10
389             float32Val += 0xC8000000;
390         }
391 
392         return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
393     }
394 }
395 
float11ToFloat32(unsigned short fp11)396 inline float float11ToFloat32(unsigned short fp11)
397 {
398     unsigned short exponent = (fp11 >> 6) & 0x1F;
399     unsigned short mantissa = fp11 & 0x3F;
400 
401     if (exponent == 0x1F)
402     {
403         // INF or NAN
404         return bitCast<float>(0x7f800000 | (mantissa << 17));
405     }
406     else
407     {
408         if (exponent != 0)
409         {
410             // normalized
411         }
412         else if (mantissa != 0)
413         {
414             // The value is denormalized
415             exponent = 1;
416 
417             do
418             {
419                 exponent--;
420                 mantissa <<= 1;
421             } while ((mantissa & 0x40) == 0);
422 
423             mantissa = mantissa & 0x3F;
424         }
425         else  // The value is zero
426         {
427             exponent = static_cast<unsigned short>(-112);
428         }
429 
430         return bitCast<float>(((exponent + 112) << 23) | (mantissa << 17));
431     }
432 }
433 
float10ToFloat32(unsigned short fp10)434 inline float float10ToFloat32(unsigned short fp10)
435 {
436     unsigned short exponent = (fp10 >> 5) & 0x1F;
437     unsigned short mantissa = fp10 & 0x1F;
438 
439     if (exponent == 0x1F)
440     {
441         // INF or NAN
442         return bitCast<float>(0x7f800000 | (mantissa << 17));
443     }
444     else
445     {
446         if (exponent != 0)
447         {
448             // normalized
449         }
450         else if (mantissa != 0)
451         {
452             // The value is denormalized
453             exponent = 1;
454 
455             do
456             {
457                 exponent--;
458                 mantissa <<= 1;
459             } while ((mantissa & 0x20) == 0);
460 
461             mantissa = mantissa & 0x1F;
462         }
463         else  // The value is zero
464         {
465             exponent = static_cast<unsigned short>(-112);
466         }
467 
468         return bitCast<float>(((exponent + 112) << 23) | (mantissa << 18));
469     }
470 }
471 
472 // Converts to and from float and 16.16 fixed point format.
ConvertFixedToFloat(int32_t fixedInput)473 inline float ConvertFixedToFloat(int32_t fixedInput)
474 {
475     return static_cast<float>(fixedInput) / 65536.0f;
476 }
477 
ConvertFloatToFixed(float floatInput)478 inline uint32_t ConvertFloatToFixed(float floatInput)
479 {
480     static constexpr uint32_t kHighest = 32767 * 65536 + 65535;
481     static constexpr uint32_t kLowest  = static_cast<uint32_t>(-32768 * 65536 + 65535);
482 
483     if (floatInput > 32767.65535)
484     {
485         return kHighest;
486     }
487     else if (floatInput < -32768.65535)
488     {
489         return kLowest;
490     }
491     else
492     {
493         return static_cast<uint32_t>(floatInput * 65536);
494     }
495 }
496 
497 template <typename T>
normalizedToFloat(T input)498 inline float normalizedToFloat(T input)
499 {
500     static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
501 
502     if (sizeof(T) > 2)
503     {
504         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
505         constexpr double inverseMax = 1.0 / std::numeric_limits<T>::max();
506         return static_cast<float>(input * inverseMax);
507     }
508     else
509     {
510         constexpr float inverseMax = 1.0f / std::numeric_limits<T>::max();
511         return input * inverseMax;
512     }
513 }
514 
515 template <unsigned int inputBitCount, typename T>
normalizedToFloat(T input)516 inline float normalizedToFloat(T input)
517 {
518     static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
519     static_assert(inputBitCount < (sizeof(T) * 8), "T must have more bits than inputBitCount.");
520     ASSERT((input & ~((1 << inputBitCount) - 1)) == 0);
521 
522     if (inputBitCount > 23)
523     {
524         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
525         constexpr double inverseMax = 1.0 / ((1 << inputBitCount) - 1);
526         return static_cast<float>(input * inverseMax);
527     }
528     else
529     {
530         constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
531         return input * inverseMax;
532     }
533 }
534 
535 template <typename T>
floatToNormalized(float input)536 inline T floatToNormalized(float input)
537 {
538     if (sizeof(T) > 2)
539     {
540         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
541         return static_cast<T>(std::numeric_limits<T>::max() * static_cast<double>(input) + 0.5);
542     }
543     else
544     {
545         return static_cast<T>(std::numeric_limits<T>::max() * input + 0.5f);
546     }
547 }
548 
549 template <unsigned int outputBitCount, typename T>
floatToNormalized(float input)550 inline T floatToNormalized(float input)
551 {
552     static_assert(outputBitCount < (sizeof(T) * 8), "T must have more bits than outputBitCount.");
553 
554     if (outputBitCount > 23)
555     {
556         // float has only a 23 bit mantissa, so we need to do the calculation in double precision
557         return static_cast<T>(((1 << outputBitCount) - 1) * static_cast<double>(input) + 0.5);
558     }
559     else
560     {
561         return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
562     }
563 }
564 
565 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
getShiftedData(T input)566 inline T getShiftedData(T input)
567 {
568     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
569                   "T must have at least as many bits as inputBitCount + inputBitStart.");
570     const T mask = (1 << inputBitCount) - 1;
571     return (input >> inputBitStart) & mask;
572 }
573 
574 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
shiftData(T input)575 inline T shiftData(T input)
576 {
577     static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
578                   "T must have at least as many bits as inputBitCount + inputBitStart.");
579     const T mask = (1 << inputBitCount) - 1;
580     return (input & mask) << inputBitStart;
581 }
582 
CountLeadingZeros(uint32_t x)583 inline unsigned int CountLeadingZeros(uint32_t x)
584 {
585     // Use binary search to find the amount of leading zeros.
586     unsigned int zeros = 32u;
587     uint32_t y;
588 
589     y = x >> 16u;
590     if (y != 0)
591     {
592         zeros = zeros - 16u;
593         x     = y;
594     }
595     y = x >> 8u;
596     if (y != 0)
597     {
598         zeros = zeros - 8u;
599         x     = y;
600     }
601     y = x >> 4u;
602     if (y != 0)
603     {
604         zeros = zeros - 4u;
605         x     = y;
606     }
607     y = x >> 2u;
608     if (y != 0)
609     {
610         zeros = zeros - 2u;
611         x     = y;
612     }
613     y = x >> 1u;
614     if (y != 0)
615     {
616         return zeros - 2u;
617     }
618     return zeros - x;
619 }
620 
average(unsigned char a,unsigned char b)621 inline unsigned char average(unsigned char a, unsigned char b)
622 {
623     return ((a ^ b) >> 1) + (a & b);
624 }
625 
average(signed char a,signed char b)626 inline signed char average(signed char a, signed char b)
627 {
628     return ((short)a + (short)b) / 2;
629 }
630 
average(unsigned short a,unsigned short b)631 inline unsigned short average(unsigned short a, unsigned short b)
632 {
633     return ((a ^ b) >> 1) + (a & b);
634 }
635 
average(signed short a,signed short b)636 inline signed short average(signed short a, signed short b)
637 {
638     return ((int)a + (int)b) / 2;
639 }
640 
average(unsigned int a,unsigned int b)641 inline unsigned int average(unsigned int a, unsigned int b)
642 {
643     return ((a ^ b) >> 1) + (a & b);
644 }
645 
average(int a,int b)646 inline int average(int a, int b)
647 {
648     long long average = (static_cast<long long>(a) + static_cast<long long>(b)) / 2ll;
649     return static_cast<int>(average);
650 }
651 
average(float a,float b)652 inline float average(float a, float b)
653 {
654     return (a + b) * 0.5f;
655 }
656 
averageHalfFloat(unsigned short a,unsigned short b)657 inline unsigned short averageHalfFloat(unsigned short a, unsigned short b)
658 {
659     return float32ToFloat16((float16ToFloat32(a) + float16ToFloat32(b)) * 0.5f);
660 }
661 
averageFloat11(unsigned int a,unsigned int b)662 inline unsigned int averageFloat11(unsigned int a, unsigned int b)
663 {
664     return float32ToFloat11((float11ToFloat32(static_cast<unsigned short>(a)) +
665                              float11ToFloat32(static_cast<unsigned short>(b))) *
666                             0.5f);
667 }
668 
averageFloat10(unsigned int a,unsigned int b)669 inline unsigned int averageFloat10(unsigned int a, unsigned int b)
670 {
671     return float32ToFloat10((float10ToFloat32(static_cast<unsigned short>(a)) +
672                              float10ToFloat32(static_cast<unsigned short>(b))) *
673                             0.5f);
674 }
675 
676 template <typename T>
677 class Range
678 {
679   public:
Range()680     Range() {}
Range(T lo,T hi)681     Range(T lo, T hi) : mLow(lo), mHigh(hi) {}
682 
length()683     T length() const { return (empty() ? 0 : (mHigh - mLow)); }
684 
intersects(Range<T> other)685     bool intersects(Range<T> other)
686     {
687         if (mLow <= other.mLow)
688         {
689             return other.mLow < mHigh;
690         }
691         else
692         {
693             return mLow < other.mHigh;
694         }
695     }
696 
697     // Assumes that end is non-inclusive.. for example, extending to 5 will make "end" 6.
extend(T value)698     void extend(T value)
699     {
700         mLow  = value < mLow ? value : mLow;
701         mHigh = value >= mHigh ? (value + 1) : mHigh;
702     }
703 
empty()704     bool empty() const { return mHigh <= mLow; }
705 
contains(T value)706     bool contains(T value) const { return value >= mLow && value < mHigh; }
707 
708     class Iterator final
709     {
710       public:
Iterator(T value)711         Iterator(T value) : mCurrent(value) {}
712 
713         Iterator &operator++()
714         {
715             mCurrent++;
716             return *this;
717         }
718         bool operator==(const Iterator &other) const { return mCurrent == other.mCurrent; }
719         bool operator!=(const Iterator &other) const { return mCurrent != other.mCurrent; }
720         T operator*() const { return mCurrent; }
721 
722       private:
723         T mCurrent;
724     };
725 
begin()726     Iterator begin() const { return Iterator(mLow); }
727 
end()728     Iterator end() const { return Iterator(mHigh); }
729 
low()730     T low() const { return mLow; }
high()731     T high() const { return mHigh; }
732 
invalidate()733     void invalidate()
734     {
735         mLow  = std::numeric_limits<T>::max();
736         mHigh = std::numeric_limits<T>::min();
737     }
738 
739   private:
740     T mLow;
741     T mHigh;
742 };
743 
744 typedef Range<int> RangeI;
745 typedef Range<unsigned int> RangeUI;
746 
747 struct IndexRange
748 {
749     struct Undefined
750     {};
IndexRangeIndexRange751     IndexRange(Undefined) {}
IndexRangeIndexRange752     IndexRange() : IndexRange(0, 0, 0) {}
IndexRangeIndexRange753     IndexRange(size_t start_, size_t end_, size_t vertexIndexCount_)
754         : start(start_), end(end_), vertexIndexCount(vertexIndexCount_)
755     {
756         ASSERT(start <= end);
757     }
758 
759     // Number of vertices in the range.
vertexCountIndexRange760     size_t vertexCount() const { return (end - start) + 1; }
761 
762     // Inclusive range of indices that are not primitive restart
763     size_t start;
764     size_t end;
765 
766     // Number of non-primitive restart indices
767     size_t vertexIndexCount;
768 };
769 
770 // Combine a floating-point value representing a mantissa (x) and an integer exponent (exp) into a
771 // floating-point value. As in GLSL ldexp() built-in.
Ldexp(float x,int exp)772 inline float Ldexp(float x, int exp)
773 {
774     if (exp > 128)
775     {
776         return std::numeric_limits<float>::infinity();
777     }
778     if (exp < -126)
779     {
780         return 0.0f;
781     }
782     double result = static_cast<double>(x) * std::pow(2.0, static_cast<double>(exp));
783     return static_cast<float>(result);
784 }
785 
786 // First, both normalized floating-point values are converted into 16-bit integer values.
787 // Then, the results are packed into the returned 32-bit unsigned integer.
788 // The first float value will be written to the least significant bits of the output;
789 // the last float value will be written to the most significant bits.
790 // The conversion of each value to fixed point is done as follows :
791 // packSnorm2x16 : round(clamp(c, -1, +1) * 32767.0)
packSnorm2x16(float f1,float f2)792 inline uint32_t packSnorm2x16(float f1, float f2)
793 {
794     int16_t leastSignificantBits = static_cast<int16_t>(roundf(clamp(f1, -1.0f, 1.0f) * 32767.0f));
795     int16_t mostSignificantBits  = static_cast<int16_t>(roundf(clamp(f2, -1.0f, 1.0f) * 32767.0f));
796     return static_cast<uint32_t>(mostSignificantBits) << 16 |
797            (static_cast<uint32_t>(leastSignificantBits) & 0xFFFF);
798 }
799 
800 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
801 // each component is converted to a normalized floating-point value to generate the returned two
802 // float values. The first float value will be extracted from the least significant bits of the
803 // input; the last float value will be extracted from the most-significant bits. The conversion for
804 // unpacked fixed-point value to floating point is done as follows: unpackSnorm2x16 : clamp(f /
805 // 32767.0, -1, +1)
unpackSnorm2x16(uint32_t u,float * f1,float * f2)806 inline void unpackSnorm2x16(uint32_t u, float *f1, float *f2)
807 {
808     int16_t leastSignificantBits = static_cast<int16_t>(u & 0xFFFF);
809     int16_t mostSignificantBits  = static_cast<int16_t>(u >> 16);
810     *f1 = clamp(static_cast<float>(leastSignificantBits) / 32767.0f, -1.0f, 1.0f);
811     *f2 = clamp(static_cast<float>(mostSignificantBits) / 32767.0f, -1.0f, 1.0f);
812 }
813 
814 // First, both normalized floating-point values are converted into 16-bit integer values.
815 // Then, the results are packed into the returned 32-bit unsigned integer.
816 // The first float value will be written to the least significant bits of the output;
817 // the last float value will be written to the most significant bits.
818 // The conversion of each value to fixed point is done as follows:
819 // packUnorm2x16 : round(clamp(c, 0, +1) * 65535.0)
packUnorm2x16(float f1,float f2)820 inline uint32_t packUnorm2x16(float f1, float f2)
821 {
822     uint16_t leastSignificantBits = static_cast<uint16_t>(roundf(clamp(f1, 0.0f, 1.0f) * 65535.0f));
823     uint16_t mostSignificantBits  = static_cast<uint16_t>(roundf(clamp(f2, 0.0f, 1.0f) * 65535.0f));
824     return static_cast<uint32_t>(mostSignificantBits) << 16 |
825            static_cast<uint32_t>(leastSignificantBits);
826 }
827 
828 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
829 // each component is converted to a normalized floating-point value to generate the returned two
830 // float values. The first float value will be extracted from the least significant bits of the
831 // input; the last float value will be extracted from the most-significant bits. The conversion for
832 // unpacked fixed-point value to floating point is done as follows: unpackUnorm2x16 : f / 65535.0
unpackUnorm2x16(uint32_t u,float * f1,float * f2)833 inline void unpackUnorm2x16(uint32_t u, float *f1, float *f2)
834 {
835     uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
836     uint16_t mostSignificantBits  = static_cast<uint16_t>(u >> 16);
837     *f1                           = static_cast<float>(leastSignificantBits) / 65535.0f;
838     *f2                           = static_cast<float>(mostSignificantBits) / 65535.0f;
839 }
840 
841 // Helper functions intended to be used only here.
842 namespace priv
843 {
844 
ToPackedUnorm8(float f)845 inline uint8_t ToPackedUnorm8(float f)
846 {
847     return static_cast<uint8_t>(roundf(clamp(f, 0.0f, 1.0f) * 255.0f));
848 }
849 
ToPackedSnorm8(float f)850 inline int8_t ToPackedSnorm8(float f)
851 {
852     return static_cast<int8_t>(roundf(clamp(f, -1.0f, 1.0f) * 127.0f));
853 }
854 
855 }  // namespace priv
856 
857 // Packs 4 normalized unsigned floating-point values to a single 32-bit unsigned integer. Works
858 // similarly to packUnorm2x16. The floats are clamped to the range 0.0 to 1.0, and written to the
859 // unsigned integer starting from the least significant bits.
PackUnorm4x8(float f1,float f2,float f3,float f4)860 inline uint32_t PackUnorm4x8(float f1, float f2, float f3, float f4)
861 {
862     uint8_t bits[4];
863     bits[0]         = priv::ToPackedUnorm8(f1);
864     bits[1]         = priv::ToPackedUnorm8(f2);
865     bits[2]         = priv::ToPackedUnorm8(f3);
866     bits[3]         = priv::ToPackedUnorm8(f4);
867     uint32_t result = 0u;
868     for (int i = 0; i < 4; ++i)
869     {
870         int shift = i * 8;
871         result |= (static_cast<uint32_t>(bits[i]) << shift);
872     }
873     return result;
874 }
875 
876 // Unpacks 4 normalized unsigned floating-point values from a single 32-bit unsigned integer into f.
877 // Works similarly to unpackUnorm2x16. The floats are unpacked starting from the least significant
878 // bits.
UnpackUnorm4x8(uint32_t u,float * f)879 inline void UnpackUnorm4x8(uint32_t u, float *f)
880 {
881     for (int i = 0; i < 4; ++i)
882     {
883         int shift    = i * 8;
884         uint8_t bits = static_cast<uint8_t>((u >> shift) & 0xFF);
885         f[i]         = static_cast<float>(bits) / 255.0f;
886     }
887 }
888 
889 // Packs 4 normalized signed floating-point values to a single 32-bit unsigned integer. The floats
890 // are clamped to the range -1.0 to 1.0, and written to the unsigned integer starting from the least
891 // significant bits.
PackSnorm4x8(float f1,float f2,float f3,float f4)892 inline uint32_t PackSnorm4x8(float f1, float f2, float f3, float f4)
893 {
894     int8_t bits[4];
895     bits[0]         = priv::ToPackedSnorm8(f1);
896     bits[1]         = priv::ToPackedSnorm8(f2);
897     bits[2]         = priv::ToPackedSnorm8(f3);
898     bits[3]         = priv::ToPackedSnorm8(f4);
899     uint32_t result = 0u;
900     for (int i = 0; i < 4; ++i)
901     {
902         int shift = i * 8;
903         result |= ((static_cast<uint32_t>(bits[i]) & 0xFF) << shift);
904     }
905     return result;
906 }
907 
908 // Unpacks 4 normalized signed floating-point values from a single 32-bit unsigned integer into f.
909 // Works similarly to unpackSnorm2x16. The floats are unpacked starting from the least significant
910 // bits, and clamped to the range -1.0 to 1.0.
UnpackSnorm4x8(uint32_t u,float * f)911 inline void UnpackSnorm4x8(uint32_t u, float *f)
912 {
913     for (int i = 0; i < 4; ++i)
914     {
915         int shift   = i * 8;
916         int8_t bits = static_cast<int8_t>((u >> shift) & 0xFF);
917         f[i]        = clamp(static_cast<float>(bits) / 127.0f, -1.0f, 1.0f);
918     }
919 }
920 
921 // Returns an unsigned integer obtained by converting the two floating-point values to the 16-bit
922 // floating-point representation found in the OpenGL ES Specification, and then packing these
923 // two 16-bit integers into a 32-bit unsigned integer.
924 // f1: The 16 least-significant bits of the result;
925 // f2: The 16 most-significant bits.
packHalf2x16(float f1,float f2)926 inline uint32_t packHalf2x16(float f1, float f2)
927 {
928     uint16_t leastSignificantBits = static_cast<uint16_t>(float32ToFloat16(f1));
929     uint16_t mostSignificantBits  = static_cast<uint16_t>(float32ToFloat16(f2));
930     return static_cast<uint32_t>(mostSignificantBits) << 16 |
931            static_cast<uint32_t>(leastSignificantBits);
932 }
933 
934 // Returns two floating-point values obtained by unpacking a 32-bit unsigned integer into a pair of
935 // 16-bit values, interpreting those values as 16-bit floating-point numbers according to the OpenGL
936 // ES Specification, and converting them to 32-bit floating-point values. The first float value is
937 // obtained from the 16 least-significant bits of u; the second component is obtained from the 16
938 // most-significant bits of u.
unpackHalf2x16(uint32_t u,float * f1,float * f2)939 inline void unpackHalf2x16(uint32_t u, float *f1, float *f2)
940 {
941     uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
942     uint16_t mostSignificantBits  = static_cast<uint16_t>(u >> 16);
943 
944     *f1 = float16ToFloat32(leastSignificantBits);
945     *f2 = float16ToFloat32(mostSignificantBits);
946 }
947 
sRGBToLinear(uint8_t srgbValue)948 inline uint8_t sRGBToLinear(uint8_t srgbValue)
949 {
950     float value = srgbValue / 255.0f;
951     if (value <= 0.04045f)
952     {
953         value = value / 12.92f;
954     }
955     else
956     {
957         value = std::pow((value + 0.055f) / 1.055f, 2.4f);
958     }
959     return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
960 }
961 
linearToSRGB(uint8_t linearValue)962 inline uint8_t linearToSRGB(uint8_t linearValue)
963 {
964     float value = linearValue / 255.0f;
965     if (value <= 0.0f)
966     {
967         value = 0.0f;
968     }
969     else if (value < 0.0031308f)
970     {
971         value = value * 12.92f;
972     }
973     else if (value < 1.0f)
974     {
975         value = std::pow(value, 0.41666f) * 1.055f - 0.055f;
976     }
977     else
978     {
979         value = 1.0f;
980     }
981     return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
982 }
983 
984 // Reverse the order of the bits.
BitfieldReverse(uint32_t value)985 inline uint32_t BitfieldReverse(uint32_t value)
986 {
987     // TODO(oetuaho@nvidia.com): Optimize this if needed. There don't seem to be compiler intrinsics
988     // for this, and right now it's not used in performance-critical paths.
989     uint32_t result = 0u;
990     for (size_t j = 0u; j < 32u; ++j)
991     {
992         result |= (((value >> j) & 1u) << (31u - j));
993     }
994     return result;
995 }
996 
997 // Count the 1 bits.
998 #if defined(_MSC_VER) && !defined(__clang__)
999 #    if defined(_M_IX86) || defined(_M_X64)
1000 namespace priv
1001 {
1002 // Check POPCNT instruction support and cache the result.
1003 // https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64#remarks
1004 static const bool kHasPopcnt = [] {
1005     int info[4];
1006     __cpuid(&info[0], 1);
1007     return static_cast<bool>(info[2] & 0x800000);
1008 }();
1009 }  // namespace priv
1010 
1011 // Polyfills for x86/x64 CPUs without POPCNT.
1012 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
BitCountPolyfill(uint32_t bits)1013 inline int BitCountPolyfill(uint32_t bits)
1014 {
1015     bits = bits - ((bits >> 1) & 0x55555555);
1016     bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
1017     bits = ((bits + (bits >> 4) & 0x0F0F0F0F) * 0x01010101) >> 24;
1018     return static_cast<int>(bits);
1019 }
1020 
BitCountPolyfill(uint64_t bits)1021 inline int BitCountPolyfill(uint64_t bits)
1022 {
1023     bits = bits - ((bits >> 1) & 0x5555555555555555ull);
1024     bits = (bits & 0x3333333333333333ull) + ((bits >> 2) & 0x3333333333333333ull);
1025     bits = ((bits + (bits >> 4) & 0x0F0F0F0F0F0F0F0Full) * 0x0101010101010101ull) >> 56;
1026     return static_cast<int>(bits);
1027 }
1028 
BitCount(uint32_t bits)1029 inline int BitCount(uint32_t bits)
1030 {
1031     if (priv::kHasPopcnt)
1032     {
1033         return static_cast<int>(__popcnt(bits));
1034     }
1035     return BitCountPolyfill(bits);
1036 }
1037 
BitCount(uint64_t bits)1038 inline int BitCount(uint64_t bits)
1039 {
1040     if (priv::kHasPopcnt)
1041     {
1042 #        if defined(_M_X64)
1043         return static_cast<int>(__popcnt64(bits));
1044 #        else   // x86
1045         return static_cast<int>(__popcnt(static_cast<uint32_t>(bits >> 32)) +
1046                                 __popcnt(static_cast<uint32_t>(bits)));
1047 #        endif  // defined(_M_X64)
1048     }
1049     return BitCountPolyfill(bits);
1050 }
1051 
1052 #    elif defined(_M_ARM) || defined(_M_ARM64)
1053 
1054 // MSVC's _CountOneBits* intrinsics are not defined for ARM64, moreover they do not use dedicated
1055 // NEON instructions.
1056 
BitCount(uint32_t bits)1057 inline int BitCount(uint32_t bits)
1058 {
1059     // cast bits to 8x8 datatype and use VCNT on it
1060     const uint8x8_t vsum = vcnt_u8(vcreate_u8(static_cast<uint64_t>(bits)));
1061 
1062     // pairwise sums: 8x8 -> 16x4 -> 32x2
1063     return static_cast<int>(vget_lane_u32(vpaddl_u16(vpaddl_u8(vsum)), 0));
1064 }
1065 
BitCount(uint64_t bits)1066 inline int BitCount(uint64_t bits)
1067 {
1068     // cast bits to 8x8 datatype and use VCNT on it
1069     const uint8x8_t vsum = vcnt_u8(vcreate_u8(bits));
1070 
1071     // pairwise sums: 8x8 -> 16x4 -> 32x2 -> 64x1
1072     return static_cast<int>(vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(vsum))), 0));
1073 }
1074 #    endif  // defined(_M_IX86) || defined(_M_X64)
1075 #endif      // defined(_MSC_VER) && !defined(__clang__)
1076 
1077 #if defined(ANGLE_PLATFORM_POSIX) || defined(__clang__)
BitCount(uint32_t bits)1078 inline int BitCount(uint32_t bits)
1079 {
1080     return __builtin_popcount(bits);
1081 }
1082 
BitCount(uint64_t bits)1083 inline int BitCount(uint64_t bits)
1084 {
1085     return __builtin_popcountll(bits);
1086 }
1087 #endif  // defined(ANGLE_PLATFORM_POSIX) || defined(__clang__)
1088 
BitCount(uint8_t bits)1089 inline int BitCount(uint8_t bits)
1090 {
1091     return BitCount(static_cast<uint32_t>(bits));
1092 }
1093 
BitCount(uint16_t bits)1094 inline int BitCount(uint16_t bits)
1095 {
1096     return BitCount(static_cast<uint32_t>(bits));
1097 }
1098 
1099 #if defined(ANGLE_PLATFORM_WINDOWS)
1100 // Return the index of the least significant bit set. Indexing is such that bit 0 is the least
1101 // significant bit. Implemented for different bit widths on different platforms.
ScanForward(uint32_t bits)1102 inline unsigned long ScanForward(uint32_t bits)
1103 {
1104     ASSERT(bits != 0u);
1105     unsigned long firstBitIndex = 0ul;
1106     unsigned char ret           = _BitScanForward(&firstBitIndex, bits);
1107     ASSERT(ret != 0u);
1108     return firstBitIndex;
1109 }
1110 
ScanForward(uint64_t bits)1111 inline unsigned long ScanForward(uint64_t bits)
1112 {
1113     ASSERT(bits != 0u);
1114     unsigned long firstBitIndex = 0ul;
1115 #    if defined(ANGLE_IS_64_BIT_CPU)
1116     unsigned char ret = _BitScanForward64(&firstBitIndex, bits);
1117 #    else
1118     unsigned char ret;
1119     if (static_cast<uint32_t>(bits) == 0)
1120     {
1121         ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits >> 32));
1122         firstBitIndex += 32ul;
1123     }
1124     else
1125     {
1126         ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits));
1127     }
1128 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1129     ASSERT(ret != 0u);
1130     return firstBitIndex;
1131 }
1132 
1133 // Return the index of the most significant bit set. Indexing is such that bit 0 is the least
1134 // significant bit.
ScanReverse(uint32_t bits)1135 inline unsigned long ScanReverse(uint32_t bits)
1136 {
1137     ASSERT(bits != 0u);
1138     unsigned long lastBitIndex = 0ul;
1139     unsigned char ret          = _BitScanReverse(&lastBitIndex, bits);
1140     ASSERT(ret != 0u);
1141     return lastBitIndex;
1142 }
1143 
ScanReverse(uint64_t bits)1144 inline unsigned long ScanReverse(uint64_t bits)
1145 {
1146     ASSERT(bits != 0u);
1147     unsigned long lastBitIndex = 0ul;
1148 #    if defined(ANGLE_IS_64_BIT_CPU)
1149     unsigned char ret = _BitScanReverse64(&lastBitIndex, bits);
1150 #    else
1151     unsigned char ret;
1152     if (static_cast<uint32_t>(bits >> 32) == 0)
1153     {
1154         ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits));
1155     }
1156     else
1157     {
1158         ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits >> 32));
1159         lastBitIndex += 32ul;
1160     }
1161 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1162     ASSERT(ret != 0u);
1163     return lastBitIndex;
1164 }
1165 #endif  // defined(ANGLE_PLATFORM_WINDOWS)
1166 
1167 #if defined(ANGLE_PLATFORM_POSIX)
ScanForward(uint32_t bits)1168 inline unsigned long ScanForward(uint32_t bits)
1169 {
1170     ASSERT(bits != 0u);
1171     return static_cast<unsigned long>(__builtin_ctz(bits));
1172 }
1173 
ScanForward(uint64_t bits)1174 inline unsigned long ScanForward(uint64_t bits)
1175 {
1176     ASSERT(bits != 0u);
1177 #    if defined(ANGLE_IS_64_BIT_CPU)
1178     return static_cast<unsigned long>(__builtin_ctzll(bits));
1179 #    else
1180     return static_cast<unsigned long>(static_cast<uint32_t>(bits) == 0
1181                                           ? __builtin_ctz(static_cast<uint32_t>(bits >> 32)) + 32
1182                                           : __builtin_ctz(static_cast<uint32_t>(bits)));
1183 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1184 }
1185 
ScanReverse(uint32_t bits)1186 inline unsigned long ScanReverse(uint32_t bits)
1187 {
1188     ASSERT(bits != 0u);
1189     return static_cast<unsigned long>(sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(bits));
1190 }
1191 
ScanReverse(uint64_t bits)1192 inline unsigned long ScanReverse(uint64_t bits)
1193 {
1194     ASSERT(bits != 0u);
1195 #    if defined(ANGLE_IS_64_BIT_CPU)
1196     return static_cast<unsigned long>(sizeof(uint64_t) * CHAR_BIT - 1 - __builtin_clzll(bits));
1197 #    else
1198     if (static_cast<uint32_t>(bits >> 32) == 0)
1199     {
1200         return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits));
1201     }
1202     else
1203     {
1204         return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits >> 32)) +
1205                32;
1206     }
1207 #    endif  // defined(ANGLE_IS_64_BIT_CPU)
1208 }
1209 #endif  // defined(ANGLE_PLATFORM_POSIX)
1210 
ScanForward(uint8_t bits)1211 inline unsigned long ScanForward(uint8_t bits)
1212 {
1213     return ScanForward(static_cast<uint32_t>(bits));
1214 }
1215 
ScanForward(uint16_t bits)1216 inline unsigned long ScanForward(uint16_t bits)
1217 {
1218     return ScanForward(static_cast<uint32_t>(bits));
1219 }
1220 
ScanReverse(uint8_t bits)1221 inline unsigned long ScanReverse(uint8_t bits)
1222 {
1223     return ScanReverse(static_cast<uint32_t>(bits));
1224 }
1225 
ScanReverse(uint16_t bits)1226 inline unsigned long ScanReverse(uint16_t bits)
1227 {
1228     return ScanReverse(static_cast<uint32_t>(bits));
1229 }
1230 
1231 // Returns -1 on 0, otherwise the index of the least significant 1 bit as in GLSL.
1232 template <typename T>
FindLSB(T bits)1233 int FindLSB(T bits)
1234 {
1235     static_assert(std::is_integral<T>::value, "must be integral type.");
1236     if (bits == 0u)
1237     {
1238         return -1;
1239     }
1240     else
1241     {
1242         return static_cast<int>(ScanForward(bits));
1243     }
1244 }
1245 
1246 // Returns -1 on 0, otherwise the index of the most significant 1 bit as in GLSL.
1247 template <typename T>
FindMSB(T bits)1248 int FindMSB(T bits)
1249 {
1250     static_assert(std::is_integral<T>::value, "must be integral type.");
1251     if (bits == 0u)
1252     {
1253         return -1;
1254     }
1255     else
1256     {
1257         return static_cast<int>(ScanReverse(bits));
1258     }
1259 }
1260 
1261 // Returns whether the argument is Not a Number.
1262 // IEEE 754 single precision NaN representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1263 // non-zero.
isNaN(float f)1264 inline bool isNaN(float f)
1265 {
1266     // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1267     // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1268     return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1269            (bitCast<uint32_t>(f) & 0x7fffffu);
1270 }
1271 
1272 // Returns whether the argument is infinity.
1273 // IEEE 754 single precision infinity representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1274 // zero.
isInf(float f)1275 inline bool isInf(float f)
1276 {
1277     // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1278     // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1279     return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1280            !(bitCast<uint32_t>(f) & 0x7fffffu);
1281 }
1282 
1283 namespace priv
1284 {
1285 template <unsigned int N, unsigned int R>
1286 struct iSquareRoot
1287 {
solveiSquareRoot1288     static constexpr unsigned int solve()
1289     {
1290         return (R * R > N)
1291                    ? 0
1292                    : ((R * R == N) ? R : static_cast<unsigned int>(iSquareRoot<N, R + 1>::value));
1293     }
1294     enum Result
1295     {
1296         value = iSquareRoot::solve()
1297     };
1298 };
1299 
1300 template <unsigned int N>
1301 struct iSquareRoot<N, N>
1302 {
1303     enum result
1304     {
1305         value = N
1306     };
1307 };
1308 
1309 }  // namespace priv
1310 
1311 template <unsigned int N>
1312 constexpr unsigned int iSquareRoot()
1313 {
1314     return priv::iSquareRoot<N, 1>::value;
1315 }
1316 
1317 // Sum, difference and multiplication operations for signed ints that wrap on 32-bit overflow.
1318 //
1319 // Unsigned types are defined to do arithmetic modulo 2^n in C++. For signed types, overflow
1320 // behavior is undefined.
1321 
1322 template <typename T>
1323 inline T WrappingSum(T lhs, T rhs)
1324 {
1325     uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1326     uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1327     return static_cast<T>(lhsUnsigned + rhsUnsigned);
1328 }
1329 
1330 template <typename T>
1331 inline T WrappingDiff(T lhs, T rhs)
1332 {
1333     uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1334     uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1335     return static_cast<T>(lhsUnsigned - rhsUnsigned);
1336 }
1337 
1338 inline int32_t WrappingMul(int32_t lhs, int32_t rhs)
1339 {
1340     int64_t lhsWide = static_cast<int64_t>(lhs);
1341     int64_t rhsWide = static_cast<int64_t>(rhs);
1342     // The multiplication is guaranteed not to overflow.
1343     int64_t resultWide = lhsWide * rhsWide;
1344     // Implement the desired wrapping behavior by masking out the high-order 32 bits.
1345     resultWide = resultWide & 0xffffffffll;
1346     // Casting to a narrower signed type is fine since the casted value is representable in the
1347     // narrower type.
1348     return static_cast<int32_t>(resultWide);
1349 }
1350 
1351 inline float scaleScreenDimensionToNdc(float dimensionScreen, float viewportDimension)
1352 {
1353     return 2.0f * dimensionScreen / viewportDimension;
1354 }
1355 
1356 inline float scaleScreenCoordinateToNdc(float coordinateScreen, float viewportDimension)
1357 {
1358     float halfShifted = coordinateScreen / viewportDimension;
1359     return 2.0f * (halfShifted - 0.5f);
1360 }
1361 
1362 }  // namespace gl
1363 
1364 namespace rx
1365 {
1366 
1367 template <typename T>
1368 T roundUp(const T value, const T alignment)
1369 {
1370     auto temp = value + alignment - static_cast<T>(1);
1371     return temp - temp % alignment;
1372 }
1373 
1374 template <typename T>
1375 constexpr T roundUpPow2(const T value, const T alignment)
1376 {
1377     ASSERT(gl::isPow2(alignment));
1378     return (value + alignment - 1) & ~(alignment - 1);
1379 }
1380 
1381 template <typename T>
1382 constexpr T roundDownPow2(const T value, const T alignment)
1383 {
1384     ASSERT(gl::isPow2(alignment));
1385     return value & ~(alignment - 1);
1386 }
1387 
1388 template <typename T>
1389 angle::CheckedNumeric<T> CheckedRoundUp(const T value, const T alignment)
1390 {
1391     angle::CheckedNumeric<T> checkedValue(value);
1392     angle::CheckedNumeric<T> checkedAlignment(alignment);
1393     return roundUp(checkedValue, checkedAlignment);
1394 }
1395 
1396 inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned int divisor)
1397 {
1398     unsigned int divided = value / divisor;
1399     return (divided + ((value % divisor == 0) ? 0 : 1));
1400 }
1401 
1402 #if defined(__has_builtin)
1403 #    define ANGLE_HAS_BUILTIN(x) __has_builtin(x)
1404 #else
1405 #    define ANGLE_HAS_BUILTIN(x) 0
1406 #endif
1407 
1408 #if defined(_MSC_VER)
1409 
1410 #    define ANGLE_ROTL(x, y) _rotl(x, y)
1411 #    define ANGLE_ROTL64(x, y) _rotl64(x, y)
1412 #    define ANGLE_ROTR16(x, y) _rotr16(x, y)
1413 
1414 #elif defined(__clang__) && ANGLE_HAS_BUILTIN(__builtin_rotateleft32) && \
1415     ANGLE_HAS_BUILTIN(__builtin_rotateleft64) && ANGLE_HAS_BUILTIN(__builtin_rotateright16)
1416 
1417 #    define ANGLE_ROTL(x, y) __builtin_rotateleft32(x, y)
1418 #    define ANGLE_ROTL64(x, y) __builtin_rotateleft64(x, y)
1419 #    define ANGLE_ROTR16(x, y) __builtin_rotateright16(x, y)
1420 
1421 #else
1422 
1423 inline uint32_t RotL(uint32_t x, int8_t r)
1424 {
1425     return (x << r) | (x >> (32 - r));
1426 }
1427 
1428 inline uint64_t RotL64(uint64_t x, int8_t r)
1429 {
1430     return (x << r) | (x >> (64 - r));
1431 }
1432 
1433 inline uint16_t RotR16(uint16_t x, int8_t r)
1434 {
1435     return (x >> r) | (x << (16 - r));
1436 }
1437 
1438 #    define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
1439 #    define ANGLE_ROTL64(x, y) ::rx::RotL64(x, y)
1440 #    define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
1441 
1442 #endif  // namespace rx
1443 
1444 constexpr unsigned int Log2(unsigned int bytes)
1445 {
1446     return bytes == 1 ? 0 : (1 + Log2(bytes / 2));
1447 }
1448 }  // namespace rx
1449 
1450 #endif  // COMMON_MATHUTIL_H_
1451