1 //
2 // Copyright 2002 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 // mathutil.h: Math and bit manipulation functions.
8
9 #ifndef COMMON_MATHUTIL_H_
10 #define COMMON_MATHUTIL_H_
11
12 #include <math.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <algorithm>
17 #include <limits>
18
19 #include <anglebase/numerics/safe_math.h>
20
21 #include "common/debug.h"
22 #include "common/platform.h"
23
24 namespace angle
25 {
26 using base::CheckedNumeric;
27 using base::IsValueInRangeForNumericType;
28 } // namespace angle
29
30 namespace gl
31 {
32
33 const unsigned int Float32One = 0x3F800000;
34 const unsigned short Float16One = 0x3C00;
35
36 template <typename T>
isPow2(T x)37 inline constexpr bool isPow2(T x)
38 {
39 static_assert(std::is_integral<T>::value, "isPow2 must be called on an integer type.");
40 return (x & (x - 1)) == 0 && (x != 0);
41 }
42
43 template <typename T>
log2(T x)44 inline int log2(T x)
45 {
46 static_assert(std::is_integral<T>::value, "log2 must be called on an integer type.");
47 int r = 0;
48 while ((x >> r) > 1)
49 r++;
50 return r;
51 }
52
ceilPow2(unsigned int x)53 inline unsigned int ceilPow2(unsigned int x)
54 {
55 if (x != 0)
56 x--;
57 x |= x >> 1;
58 x |= x >> 2;
59 x |= x >> 4;
60 x |= x >> 8;
61 x |= x >> 16;
62 x++;
63
64 return x;
65 }
66
67 template <typename DestT, typename SrcT>
clampCast(SrcT value)68 inline DestT clampCast(SrcT value)
69 {
70 // For floating-point types with denormalization, min returns the minimum positive normalized
71 // value. To find the value that has no values less than it, use numeric_limits::lowest.
72 constexpr const long double destLo =
73 static_cast<long double>(std::numeric_limits<DestT>::lowest());
74 constexpr const long double destHi =
75 static_cast<long double>(std::numeric_limits<DestT>::max());
76 constexpr const long double srcLo =
77 static_cast<long double>(std::numeric_limits<SrcT>::lowest());
78 constexpr long double srcHi = static_cast<long double>(std::numeric_limits<SrcT>::max());
79
80 if (destHi < srcHi)
81 {
82 DestT destMax = std::numeric_limits<DestT>::max();
83 if (value >= static_cast<SrcT>(destMax))
84 {
85 return destMax;
86 }
87 }
88
89 if (destLo > srcLo)
90 {
91 DestT destLow = std::numeric_limits<DestT>::lowest();
92 if (value <= static_cast<SrcT>(destLow))
93 {
94 return destLow;
95 }
96 }
97
98 return static_cast<DestT>(value);
99 }
100
101 // Specialize clampCast for bool->int conversion to avoid MSVS 2015 performance warning when the max
102 // value is casted to the source type.
103 template <>
clampCast(bool value)104 inline unsigned int clampCast(bool value)
105 {
106 return static_cast<unsigned int>(value);
107 }
108
109 template <>
clampCast(bool value)110 inline int clampCast(bool value)
111 {
112 return static_cast<int>(value);
113 }
114
115 template <typename T, typename MIN, typename MAX>
clamp(T x,MIN min,MAX max)116 inline T clamp(T x, MIN min, MAX max)
117 {
118 // Since NaNs fail all comparison tests, a NaN value will default to min
119 return x > min ? (x > max ? max : x) : min;
120 }
121
clamp01(float x)122 inline float clamp01(float x)
123 {
124 return clamp(x, 0.0f, 1.0f);
125 }
126
127 template <const int n>
unorm(float x)128 inline unsigned int unorm(float x)
129 {
130 const unsigned int max = 0xFFFFFFFF >> (32 - n);
131
132 if (x > 1)
133 {
134 return max;
135 }
136 else if (x < 0)
137 {
138 return 0;
139 }
140 else
141 {
142 return (unsigned int)(max * x + 0.5f);
143 }
144 }
145
supportsSSE2()146 inline bool supportsSSE2()
147 {
148 #if defined(ANGLE_USE_SSE)
149 static bool checked = false;
150 static bool supports = false;
151
152 if (checked)
153 {
154 return supports;
155 }
156
157 # if defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
158 {
159 int info[4];
160 __cpuid(info, 0);
161
162 if (info[0] >= 1)
163 {
164 __cpuid(info, 1);
165
166 supports = (info[3] >> 26) & 1;
167 }
168 }
169 # endif // defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
170 checked = true;
171 return supports;
172 #else // defined(ANGLE_USE_SSE)
173 return false;
174 #endif
175 }
176
177 template <typename destType, typename sourceType>
bitCast(const sourceType & source)178 destType bitCast(const sourceType &source)
179 {
180 size_t copySize = std::min(sizeof(destType), sizeof(sourceType));
181 destType output;
182 memcpy(&output, &source, copySize);
183 return output;
184 }
185
186 // https://stackoverflow.com/a/37581284
187 template <typename T>
normalize(T value)188 static constexpr double normalize(T value)
189 {
190 return value < 0 ? -static_cast<double>(value) / std::numeric_limits<T>::min()
191 : static_cast<double>(value) / std::numeric_limits<T>::max();
192 }
193
float32ToFloat16(float fp32)194 inline unsigned short float32ToFloat16(float fp32)
195 {
196 unsigned int fp32i = bitCast<unsigned int>(fp32);
197 unsigned int sign = (fp32i & 0x80000000) >> 16;
198 unsigned int abs = fp32i & 0x7FFFFFFF;
199
200 if (abs > 0x7F800000)
201 { // NaN
202 return 0x7FFF;
203 }
204 else if (abs > 0x47FFEFFF)
205 { // Infinity
206 return static_cast<uint16_t>(sign | 0x7C00);
207 }
208 else if (abs < 0x38800000) // Denormal
209 {
210 unsigned int mantissa = (abs & 0x007FFFFF) | 0x00800000;
211 int e = 113 - (abs >> 23);
212
213 if (e < 24)
214 {
215 abs = mantissa >> e;
216 }
217 else
218 {
219 abs = 0;
220 }
221
222 return static_cast<unsigned short>(sign | (abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
223 }
224 else
225 {
226 return static_cast<unsigned short>(
227 sign | (abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
228 }
229 }
230
231 float float16ToFloat32(unsigned short h);
232
233 unsigned int convertRGBFloatsTo999E5(float red, float green, float blue);
234 void convert999E5toRGBFloats(unsigned int input, float *red, float *green, float *blue);
235
float32ToFloat11(float fp32)236 inline unsigned short float32ToFloat11(float fp32)
237 {
238 const unsigned int float32MantissaMask = 0x7FFFFF;
239 const unsigned int float32ExponentMask = 0x7F800000;
240 const unsigned int float32SignMask = 0x80000000;
241 const unsigned int float32ValueMask = ~float32SignMask;
242 const unsigned int float32ExponentFirstBit = 23;
243 const unsigned int float32ExponentBias = 127;
244
245 const unsigned short float11Max = 0x7BF;
246 const unsigned short float11MantissaMask = 0x3F;
247 const unsigned short float11ExponentMask = 0x7C0;
248 const unsigned short float11BitMask = 0x7FF;
249 const unsigned int float11ExponentBias = 14;
250
251 const unsigned int float32Maxfloat11 = 0x477E0000;
252 const unsigned int float32MinNormfloat11 = 0x38800000;
253 const unsigned int float32MinDenormfloat11 = 0x35000080;
254
255 const unsigned int float32Bits = bitCast<unsigned int>(fp32);
256 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
257
258 unsigned int float32Val = float32Bits & float32ValueMask;
259
260 if ((float32Val & float32ExponentMask) == float32ExponentMask)
261 {
262 // INF or NAN
263 if ((float32Val & float32MantissaMask) != 0)
264 {
265 return float11ExponentMask |
266 (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
267 float11MantissaMask);
268 }
269 else if (float32Sign)
270 {
271 // -INF is clamped to 0 since float11 is positive only
272 return 0;
273 }
274 else
275 {
276 return float11ExponentMask;
277 }
278 }
279 else if (float32Sign)
280 {
281 // float11 is positive only, so clamp to zero
282 return 0;
283 }
284 else if (float32Val > float32Maxfloat11)
285 {
286 // The number is too large to be represented as a float11, set to max
287 return float11Max;
288 }
289 else if (float32Val < float32MinDenormfloat11)
290 {
291 // The number is too small to be represented as a denormalized float11, set to 0
292 return 0;
293 }
294 else
295 {
296 if (float32Val < float32MinNormfloat11)
297 {
298 // The number is too small to be represented as a normalized float11
299 // Convert it to a denormalized value.
300 const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
301 (float32Val >> float32ExponentFirstBit);
302 float32Val =
303 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
304 }
305 else
306 {
307 // Rebias the exponent to represent the value as a normalized float11
308 float32Val += 0xC8000000;
309 }
310
311 return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
312 }
313 }
314
float32ToFloat10(float fp32)315 inline unsigned short float32ToFloat10(float fp32)
316 {
317 const unsigned int float32MantissaMask = 0x7FFFFF;
318 const unsigned int float32ExponentMask = 0x7F800000;
319 const unsigned int float32SignMask = 0x80000000;
320 const unsigned int float32ValueMask = ~float32SignMask;
321 const unsigned int float32ExponentFirstBit = 23;
322 const unsigned int float32ExponentBias = 127;
323
324 const unsigned short float10Max = 0x3DF;
325 const unsigned short float10MantissaMask = 0x1F;
326 const unsigned short float10ExponentMask = 0x3E0;
327 const unsigned short float10BitMask = 0x3FF;
328 const unsigned int float10ExponentBias = 14;
329
330 const unsigned int float32Maxfloat10 = 0x477C0000;
331 const unsigned int float32MinNormfloat10 = 0x38800000;
332 const unsigned int float32MinDenormfloat10 = 0x35800040;
333
334 const unsigned int float32Bits = bitCast<unsigned int>(fp32);
335 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
336
337 unsigned int float32Val = float32Bits & float32ValueMask;
338
339 if ((float32Val & float32ExponentMask) == float32ExponentMask)
340 {
341 // INF or NAN
342 if ((float32Val & float32MantissaMask) != 0)
343 {
344 return float10ExponentMask |
345 (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
346 float10MantissaMask);
347 }
348 else if (float32Sign)
349 {
350 // -INF is clamped to 0 since float10 is positive only
351 return 0;
352 }
353 else
354 {
355 return float10ExponentMask;
356 }
357 }
358 else if (float32Sign)
359 {
360 // float10 is positive only, so clamp to zero
361 return 0;
362 }
363 else if (float32Val > float32Maxfloat10)
364 {
365 // The number is too large to be represented as a float10, set to max
366 return float10Max;
367 }
368 else if (float32Val < float32MinDenormfloat10)
369 {
370 // The number is too small to be represented as a denormalized float10, set to 0
371 return 0;
372 }
373 else
374 {
375 if (float32Val < float32MinNormfloat10)
376 {
377 // The number is too small to be represented as a normalized float10
378 // Convert it to a denormalized value.
379 const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
380 (float32Val >> float32ExponentFirstBit);
381 float32Val =
382 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
383 }
384 else
385 {
386 // Rebias the exponent to represent the value as a normalized float10
387 float32Val += 0xC8000000;
388 }
389
390 return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
391 }
392 }
393
float11ToFloat32(unsigned short fp11)394 inline float float11ToFloat32(unsigned short fp11)
395 {
396 unsigned short exponent = (fp11 >> 6) & 0x1F;
397 unsigned short mantissa = fp11 & 0x3F;
398
399 if (exponent == 0x1F)
400 {
401 // INF or NAN
402 return bitCast<float>(0x7f800000 | (mantissa << 17));
403 }
404 else
405 {
406 if (exponent != 0)
407 {
408 // normalized
409 }
410 else if (mantissa != 0)
411 {
412 // The value is denormalized
413 exponent = 1;
414
415 do
416 {
417 exponent--;
418 mantissa <<= 1;
419 } while ((mantissa & 0x40) == 0);
420
421 mantissa = mantissa & 0x3F;
422 }
423 else // The value is zero
424 {
425 exponent = static_cast<unsigned short>(-112);
426 }
427
428 return bitCast<float>(((exponent + 112) << 23) | (mantissa << 17));
429 }
430 }
431
float10ToFloat32(unsigned short fp10)432 inline float float10ToFloat32(unsigned short fp10)
433 {
434 unsigned short exponent = (fp10 >> 5) & 0x1F;
435 unsigned short mantissa = fp10 & 0x1F;
436
437 if (exponent == 0x1F)
438 {
439 // INF or NAN
440 return bitCast<float>(0x7f800000 | (mantissa << 17));
441 }
442 else
443 {
444 if (exponent != 0)
445 {
446 // normalized
447 }
448 else if (mantissa != 0)
449 {
450 // The value is denormalized
451 exponent = 1;
452
453 do
454 {
455 exponent--;
456 mantissa <<= 1;
457 } while ((mantissa & 0x20) == 0);
458
459 mantissa = mantissa & 0x1F;
460 }
461 else // The value is zero
462 {
463 exponent = static_cast<unsigned short>(-112);
464 }
465
466 return bitCast<float>(((exponent + 112) << 23) | (mantissa << 18));
467 }
468 }
469
470 // Convers to and from float and 16.16 fixed point format.
471
ConvertFixedToFloat(uint32_t fixedInput)472 inline float ConvertFixedToFloat(uint32_t fixedInput)
473 {
474 return static_cast<float>(fixedInput) / 65536.0f;
475 }
476
ConvertFloatToFixed(float floatInput)477 inline uint32_t ConvertFloatToFixed(float floatInput)
478 {
479 static constexpr uint32_t kHighest = 32767 * 65536 + 65535;
480 static constexpr uint32_t kLowest = static_cast<uint32_t>(-32768 * 65536 + 65535);
481
482 if (floatInput > 32767.65535)
483 {
484 return kHighest;
485 }
486 else if (floatInput < -32768.65535)
487 {
488 return kLowest;
489 }
490 else
491 {
492 return static_cast<uint32_t>(floatInput * 65536);
493 }
494 }
495
496 template <typename T>
normalizedToFloat(T input)497 inline float normalizedToFloat(T input)
498 {
499 static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
500
501 if (sizeof(T) > 2)
502 {
503 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
504 constexpr double inverseMax = 1.0 / std::numeric_limits<T>::max();
505 return static_cast<float>(input * inverseMax);
506 }
507 else
508 {
509 constexpr float inverseMax = 1.0f / std::numeric_limits<T>::max();
510 return input * inverseMax;
511 }
512 }
513
514 template <unsigned int inputBitCount, typename T>
normalizedToFloat(T input)515 inline float normalizedToFloat(T input)
516 {
517 static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
518 static_assert(inputBitCount < (sizeof(T) * 8), "T must have more bits than inputBitCount.");
519 ASSERT((input & ~((1 << inputBitCount) - 1)) == 0);
520
521 if (inputBitCount > 23)
522 {
523 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
524 constexpr double inverseMax = 1.0 / ((1 << inputBitCount) - 1);
525 return static_cast<float>(input * inverseMax);
526 }
527 else
528 {
529 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
530 return input * inverseMax;
531 }
532 }
533
534 template <typename T>
floatToNormalized(float input)535 inline T floatToNormalized(float input)
536 {
537 if (sizeof(T) > 2)
538 {
539 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
540 return static_cast<T>(std::numeric_limits<T>::max() * static_cast<double>(input) + 0.5);
541 }
542 else
543 {
544 return static_cast<T>(std::numeric_limits<T>::max() * input + 0.5f);
545 }
546 }
547
548 template <unsigned int outputBitCount, typename T>
floatToNormalized(float input)549 inline T floatToNormalized(float input)
550 {
551 static_assert(outputBitCount < (sizeof(T) * 8), "T must have more bits than outputBitCount.");
552
553 if (outputBitCount > 23)
554 {
555 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
556 return static_cast<T>(((1 << outputBitCount) - 1) * static_cast<double>(input) + 0.5);
557 }
558 else
559 {
560 return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
561 }
562 }
563
564 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
getShiftedData(T input)565 inline T getShiftedData(T input)
566 {
567 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
568 "T must have at least as many bits as inputBitCount + inputBitStart.");
569 const T mask = (1 << inputBitCount) - 1;
570 return (input >> inputBitStart) & mask;
571 }
572
573 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
shiftData(T input)574 inline T shiftData(T input)
575 {
576 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
577 "T must have at least as many bits as inputBitCount + inputBitStart.");
578 const T mask = (1 << inputBitCount) - 1;
579 return (input & mask) << inputBitStart;
580 }
581
CountLeadingZeros(uint32_t x)582 inline unsigned int CountLeadingZeros(uint32_t x)
583 {
584 // Use binary search to find the amount of leading zeros.
585 unsigned int zeros = 32u;
586 uint32_t y;
587
588 y = x >> 16u;
589 if (y != 0)
590 {
591 zeros = zeros - 16u;
592 x = y;
593 }
594 y = x >> 8u;
595 if (y != 0)
596 {
597 zeros = zeros - 8u;
598 x = y;
599 }
600 y = x >> 4u;
601 if (y != 0)
602 {
603 zeros = zeros - 4u;
604 x = y;
605 }
606 y = x >> 2u;
607 if (y != 0)
608 {
609 zeros = zeros - 2u;
610 x = y;
611 }
612 y = x >> 1u;
613 if (y != 0)
614 {
615 return zeros - 2u;
616 }
617 return zeros - x;
618 }
619
average(unsigned char a,unsigned char b)620 inline unsigned char average(unsigned char a, unsigned char b)
621 {
622 return ((a ^ b) >> 1) + (a & b);
623 }
624
average(signed char a,signed char b)625 inline signed char average(signed char a, signed char b)
626 {
627 return ((short)a + (short)b) / 2;
628 }
629
average(unsigned short a,unsigned short b)630 inline unsigned short average(unsigned short a, unsigned short b)
631 {
632 return ((a ^ b) >> 1) + (a & b);
633 }
634
average(signed short a,signed short b)635 inline signed short average(signed short a, signed short b)
636 {
637 return ((int)a + (int)b) / 2;
638 }
639
average(unsigned int a,unsigned int b)640 inline unsigned int average(unsigned int a, unsigned int b)
641 {
642 return ((a ^ b) >> 1) + (a & b);
643 }
644
average(int a,int b)645 inline int average(int a, int b)
646 {
647 long long average = (static_cast<long long>(a) + static_cast<long long>(b)) / 2ll;
648 return static_cast<int>(average);
649 }
650
average(float a,float b)651 inline float average(float a, float b)
652 {
653 return (a + b) * 0.5f;
654 }
655
averageHalfFloat(unsigned short a,unsigned short b)656 inline unsigned short averageHalfFloat(unsigned short a, unsigned short b)
657 {
658 return float32ToFloat16((float16ToFloat32(a) + float16ToFloat32(b)) * 0.5f);
659 }
660
averageFloat11(unsigned int a,unsigned int b)661 inline unsigned int averageFloat11(unsigned int a, unsigned int b)
662 {
663 return float32ToFloat11((float11ToFloat32(static_cast<unsigned short>(a)) +
664 float11ToFloat32(static_cast<unsigned short>(b))) *
665 0.5f);
666 }
667
averageFloat10(unsigned int a,unsigned int b)668 inline unsigned int averageFloat10(unsigned int a, unsigned int b)
669 {
670 return float32ToFloat10((float10ToFloat32(static_cast<unsigned short>(a)) +
671 float10ToFloat32(static_cast<unsigned short>(b))) *
672 0.5f);
673 }
674
675 template <typename T>
676 class Range
677 {
678 public:
Range()679 Range() {}
Range(T lo,T hi)680 Range(T lo, T hi) : mLow(lo), mHigh(hi) {}
681
length()682 T length() const { return (empty() ? 0 : (mHigh - mLow)); }
683
intersects(Range<T> other)684 bool intersects(Range<T> other)
685 {
686 if (mLow <= other.mLow)
687 {
688 return other.mLow < mHigh;
689 }
690 else
691 {
692 return mLow < other.mHigh;
693 }
694 }
695
696 // Assumes that end is non-inclusive.. for example, extending to 5 will make "end" 6.
extend(T value)697 void extend(T value)
698 {
699 mLow = value < mLow ? value : mLow;
700 mHigh = value >= mHigh ? (value + 1) : mHigh;
701 }
702
empty()703 bool empty() const { return mHigh <= mLow; }
704
contains(T value)705 bool contains(T value) const { return value >= mLow && value < mHigh; }
706
707 class Iterator final
708 {
709 public:
Iterator(T value)710 Iterator(T value) : mCurrent(value) {}
711
712 Iterator &operator++()
713 {
714 mCurrent++;
715 return *this;
716 }
717 bool operator==(const Iterator &other) const { return mCurrent == other.mCurrent; }
718 bool operator!=(const Iterator &other) const { return mCurrent != other.mCurrent; }
719 T operator*() const { return mCurrent; }
720
721 private:
722 T mCurrent;
723 };
724
begin()725 Iterator begin() const { return Iterator(mLow); }
726
end()727 Iterator end() const { return Iterator(mHigh); }
728
low()729 T low() const { return mLow; }
high()730 T high() const { return mHigh; }
731
invalidate()732 void invalidate()
733 {
734 mLow = std::numeric_limits<T>::max();
735 mHigh = std::numeric_limits<T>::min();
736 }
737
738 private:
739 T mLow;
740 T mHigh;
741 };
742
743 typedef Range<int> RangeI;
744 typedef Range<unsigned int> RangeUI;
745
746 struct IndexRange
747 {
748 struct Undefined
749 {};
IndexRangeIndexRange750 IndexRange(Undefined) {}
IndexRangeIndexRange751 IndexRange() : IndexRange(0, 0, 0) {}
IndexRangeIndexRange752 IndexRange(size_t start_, size_t end_, size_t vertexIndexCount_)
753 : start(start_), end(end_), vertexIndexCount(vertexIndexCount_)
754 {
755 ASSERT(start <= end);
756 }
757
758 // Number of vertices in the range.
vertexCountIndexRange759 size_t vertexCount() const { return (end - start) + 1; }
760
761 // Inclusive range of indices that are not primitive restart
762 size_t start;
763 size_t end;
764
765 // Number of non-primitive restart indices
766 size_t vertexIndexCount;
767 };
768
769 // Combine a floating-point value representing a mantissa (x) and an integer exponent (exp) into a
770 // floating-point value. As in GLSL ldexp() built-in.
Ldexp(float x,int exp)771 inline float Ldexp(float x, int exp)
772 {
773 if (exp > 128)
774 {
775 return std::numeric_limits<float>::infinity();
776 }
777 if (exp < -126)
778 {
779 return 0.0f;
780 }
781 double result = static_cast<double>(x) * std::pow(2.0, static_cast<double>(exp));
782 return static_cast<float>(result);
783 }
784
785 // First, both normalized floating-point values are converted into 16-bit integer values.
786 // Then, the results are packed into the returned 32-bit unsigned integer.
787 // The first float value will be written to the least significant bits of the output;
788 // the last float value will be written to the most significant bits.
789 // The conversion of each value to fixed point is done as follows :
790 // packSnorm2x16 : round(clamp(c, -1, +1) * 32767.0)
packSnorm2x16(float f1,float f2)791 inline uint32_t packSnorm2x16(float f1, float f2)
792 {
793 int16_t leastSignificantBits = static_cast<int16_t>(roundf(clamp(f1, -1.0f, 1.0f) * 32767.0f));
794 int16_t mostSignificantBits = static_cast<int16_t>(roundf(clamp(f2, -1.0f, 1.0f) * 32767.0f));
795 return static_cast<uint32_t>(mostSignificantBits) << 16 |
796 (static_cast<uint32_t>(leastSignificantBits) & 0xFFFF);
797 }
798
799 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
800 // each component is converted to a normalized floating-point value to generate the returned two
801 // float values. The first float value will be extracted from the least significant bits of the
802 // input; the last float value will be extracted from the most-significant bits. The conversion for
803 // unpacked fixed-point value to floating point is done as follows: unpackSnorm2x16 : clamp(f /
804 // 32767.0, -1, +1)
unpackSnorm2x16(uint32_t u,float * f1,float * f2)805 inline void unpackSnorm2x16(uint32_t u, float *f1, float *f2)
806 {
807 int16_t leastSignificantBits = static_cast<int16_t>(u & 0xFFFF);
808 int16_t mostSignificantBits = static_cast<int16_t>(u >> 16);
809 *f1 = clamp(static_cast<float>(leastSignificantBits) / 32767.0f, -1.0f, 1.0f);
810 *f2 = clamp(static_cast<float>(mostSignificantBits) / 32767.0f, -1.0f, 1.0f);
811 }
812
813 // First, both normalized floating-point values are converted into 16-bit integer values.
814 // Then, the results are packed into the returned 32-bit unsigned integer.
815 // The first float value will be written to the least significant bits of the output;
816 // the last float value will be written to the most significant bits.
817 // The conversion of each value to fixed point is done as follows:
818 // packUnorm2x16 : round(clamp(c, 0, +1) * 65535.0)
packUnorm2x16(float f1,float f2)819 inline uint32_t packUnorm2x16(float f1, float f2)
820 {
821 uint16_t leastSignificantBits = static_cast<uint16_t>(roundf(clamp(f1, 0.0f, 1.0f) * 65535.0f));
822 uint16_t mostSignificantBits = static_cast<uint16_t>(roundf(clamp(f2, 0.0f, 1.0f) * 65535.0f));
823 return static_cast<uint32_t>(mostSignificantBits) << 16 |
824 static_cast<uint32_t>(leastSignificantBits);
825 }
826
827 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
828 // each component is converted to a normalized floating-point value to generate the returned two
829 // float values. The first float value will be extracted from the least significant bits of the
830 // input; the last float value will be extracted from the most-significant bits. The conversion for
831 // unpacked fixed-point value to floating point is done as follows: unpackUnorm2x16 : f / 65535.0
unpackUnorm2x16(uint32_t u,float * f1,float * f2)832 inline void unpackUnorm2x16(uint32_t u, float *f1, float *f2)
833 {
834 uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
835 uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
836 *f1 = static_cast<float>(leastSignificantBits) / 65535.0f;
837 *f2 = static_cast<float>(mostSignificantBits) / 65535.0f;
838 }
839
840 // Helper functions intended to be used only here.
841 namespace priv
842 {
843
ToPackedUnorm8(float f)844 inline uint8_t ToPackedUnorm8(float f)
845 {
846 return static_cast<uint8_t>(roundf(clamp(f, 0.0f, 1.0f) * 255.0f));
847 }
848
ToPackedSnorm8(float f)849 inline int8_t ToPackedSnorm8(float f)
850 {
851 return static_cast<int8_t>(roundf(clamp(f, -1.0f, 1.0f) * 127.0f));
852 }
853
854 } // namespace priv
855
856 // Packs 4 normalized unsigned floating-point values to a single 32-bit unsigned integer. Works
857 // similarly to packUnorm2x16. The floats are clamped to the range 0.0 to 1.0, and written to the
858 // unsigned integer starting from the least significant bits.
PackUnorm4x8(float f1,float f2,float f3,float f4)859 inline uint32_t PackUnorm4x8(float f1, float f2, float f3, float f4)
860 {
861 uint8_t bits[4];
862 bits[0] = priv::ToPackedUnorm8(f1);
863 bits[1] = priv::ToPackedUnorm8(f2);
864 bits[2] = priv::ToPackedUnorm8(f3);
865 bits[3] = priv::ToPackedUnorm8(f4);
866 uint32_t result = 0u;
867 for (int i = 0; i < 4; ++i)
868 {
869 int shift = i * 8;
870 result |= (static_cast<uint32_t>(bits[i]) << shift);
871 }
872 return result;
873 }
874
875 // Unpacks 4 normalized unsigned floating-point values from a single 32-bit unsigned integer into f.
876 // Works similarly to unpackUnorm2x16. The floats are unpacked starting from the least significant
877 // bits.
UnpackUnorm4x8(uint32_t u,float * f)878 inline void UnpackUnorm4x8(uint32_t u, float *f)
879 {
880 for (int i = 0; i < 4; ++i)
881 {
882 int shift = i * 8;
883 uint8_t bits = static_cast<uint8_t>((u >> shift) & 0xFF);
884 f[i] = static_cast<float>(bits) / 255.0f;
885 }
886 }
887
888 // Packs 4 normalized signed floating-point values to a single 32-bit unsigned integer. The floats
889 // are clamped to the range -1.0 to 1.0, and written to the unsigned integer starting from the least
890 // significant bits.
PackSnorm4x8(float f1,float f2,float f3,float f4)891 inline uint32_t PackSnorm4x8(float f1, float f2, float f3, float f4)
892 {
893 int8_t bits[4];
894 bits[0] = priv::ToPackedSnorm8(f1);
895 bits[1] = priv::ToPackedSnorm8(f2);
896 bits[2] = priv::ToPackedSnorm8(f3);
897 bits[3] = priv::ToPackedSnorm8(f4);
898 uint32_t result = 0u;
899 for (int i = 0; i < 4; ++i)
900 {
901 int shift = i * 8;
902 result |= ((static_cast<uint32_t>(bits[i]) & 0xFF) << shift);
903 }
904 return result;
905 }
906
907 // Unpacks 4 normalized signed floating-point values from a single 32-bit unsigned integer into f.
908 // Works similarly to unpackSnorm2x16. The floats are unpacked starting from the least significant
909 // bits, and clamped to the range -1.0 to 1.0.
UnpackSnorm4x8(uint32_t u,float * f)910 inline void UnpackSnorm4x8(uint32_t u, float *f)
911 {
912 for (int i = 0; i < 4; ++i)
913 {
914 int shift = i * 8;
915 int8_t bits = static_cast<int8_t>((u >> shift) & 0xFF);
916 f[i] = clamp(static_cast<float>(bits) / 127.0f, -1.0f, 1.0f);
917 }
918 }
919
920 // Returns an unsigned integer obtained by converting the two floating-point values to the 16-bit
921 // floating-point representation found in the OpenGL ES Specification, and then packing these
922 // two 16-bit integers into a 32-bit unsigned integer.
923 // f1: The 16 least-significant bits of the result;
924 // f2: The 16 most-significant bits.
packHalf2x16(float f1,float f2)925 inline uint32_t packHalf2x16(float f1, float f2)
926 {
927 uint16_t leastSignificantBits = static_cast<uint16_t>(float32ToFloat16(f1));
928 uint16_t mostSignificantBits = static_cast<uint16_t>(float32ToFloat16(f2));
929 return static_cast<uint32_t>(mostSignificantBits) << 16 |
930 static_cast<uint32_t>(leastSignificantBits);
931 }
932
933 // Returns two floating-point values obtained by unpacking a 32-bit unsigned integer into a pair of
934 // 16-bit values, interpreting those values as 16-bit floating-point numbers according to the OpenGL
935 // ES Specification, and converting them to 32-bit floating-point values. The first float value is
936 // obtained from the 16 least-significant bits of u; the second component is obtained from the 16
937 // most-significant bits of u.
unpackHalf2x16(uint32_t u,float * f1,float * f2)938 inline void unpackHalf2x16(uint32_t u, float *f1, float *f2)
939 {
940 uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
941 uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
942
943 *f1 = float16ToFloat32(leastSignificantBits);
944 *f2 = float16ToFloat32(mostSignificantBits);
945 }
946
sRGBToLinear(uint8_t srgbValue)947 inline uint8_t sRGBToLinear(uint8_t srgbValue)
948 {
949 float value = srgbValue / 255.0f;
950 if (value <= 0.04045f)
951 {
952 value = value / 12.92f;
953 }
954 else
955 {
956 value = std::pow((value + 0.055f) / 1.055f, 2.4f);
957 }
958 return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
959 }
960
linearToSRGB(uint8_t linearValue)961 inline uint8_t linearToSRGB(uint8_t linearValue)
962 {
963 float value = linearValue / 255.0f;
964 if (value <= 0.0f)
965 {
966 value = 0.0f;
967 }
968 else if (value < 0.0031308f)
969 {
970 value = value * 12.92f;
971 }
972 else if (value < 1.0f)
973 {
974 value = std::pow(value, 0.41666f) * 1.055f - 0.055f;
975 }
976 else
977 {
978 value = 1.0f;
979 }
980 return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
981 }
982
983 // Reverse the order of the bits.
BitfieldReverse(uint32_t value)984 inline uint32_t BitfieldReverse(uint32_t value)
985 {
986 // TODO(oetuaho@nvidia.com): Optimize this if needed. There don't seem to be compiler intrinsics
987 // for this, and right now it's not used in performance-critical paths.
988 uint32_t result = 0u;
989 for (size_t j = 0u; j < 32u; ++j)
990 {
991 result |= (((value >> j) & 1u) << (31u - j));
992 }
993 return result;
994 }
995
996 // Count the 1 bits.
997 #if defined(_MSC_VER) && !defined(__clang__)
998 # if defined(_M_IX86) || defined(_M_X64)
999 namespace priv
1000 {
1001 // Check POPCNT instruction support and cache the result.
1002 // https://docs.microsoft.com/en-us/cpp/intrinsics/popcnt16-popcnt-popcnt64#remarks
1003 static const bool kHasPopcnt = [] {
1004 int info[4];
1005 __cpuid(&info[0], 1);
1006 return static_cast<bool>(info[2] & 0x800000);
1007 }();
1008 } // namespace priv
1009
1010 // Polyfills for x86/x64 CPUs without POPCNT.
1011 // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
BitCountPolyfill(uint32_t bits)1012 inline int BitCountPolyfill(uint32_t bits)
1013 {
1014 bits = bits - ((bits >> 1) & 0x55555555);
1015 bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
1016 bits = ((bits + (bits >> 4) & 0x0F0F0F0F) * 0x01010101) >> 24;
1017 return static_cast<int>(bits);
1018 }
1019
BitCountPolyfill(uint64_t bits)1020 inline int BitCountPolyfill(uint64_t bits)
1021 {
1022 bits = bits - ((bits >> 1) & 0x5555555555555555ull);
1023 bits = (bits & 0x3333333333333333ull) + ((bits >> 2) & 0x3333333333333333ull);
1024 bits = ((bits + (bits >> 4) & 0x0F0F0F0F0F0F0F0Full) * 0x0101010101010101ull) >> 56;
1025 return static_cast<int>(bits);
1026 }
1027
BitCount(uint32_t bits)1028 inline int BitCount(uint32_t bits)
1029 {
1030 if (priv::kHasPopcnt)
1031 {
1032 return static_cast<int>(__popcnt(bits));
1033 }
1034 return BitCountPolyfill(bits);
1035 }
1036
BitCount(uint64_t bits)1037 inline int BitCount(uint64_t bits)
1038 {
1039 if (priv::kHasPopcnt)
1040 {
1041 # if defined(_M_X64)
1042 return static_cast<int>(__popcnt64(bits));
1043 # else // x86
1044 return static_cast<int>(__popcnt(static_cast<uint32_t>(bits >> 32)) +
1045 __popcnt(static_cast<uint32_t>(bits)));
1046 # endif // defined(_M_X64)
1047 }
1048 return BitCountPolyfill(bits);
1049 }
1050
1051 # elif defined(_M_ARM) || defined(_M_ARM64)
1052
1053 // MSVC's _CountOneBits* intrinsics are not defined for ARM64, moreover they do not use dedicated
1054 // NEON instructions.
1055
BitCount(uint32_t bits)1056 inline int BitCount(uint32_t bits)
1057 {
1058 // cast bits to 8x8 datatype and use VCNT on it
1059 const uint8x8_t vsum = vcnt_u8(vcreate_u8(static_cast<uint64_t>(bits)));
1060
1061 // pairwise sums: 8x8 -> 16x4 -> 32x2
1062 return static_cast<int>(vget_lane_u32(vpaddl_u16(vpaddl_u8(vsum)), 0));
1063 }
1064
BitCount(uint64_t bits)1065 inline int BitCount(uint64_t bits)
1066 {
1067 // cast bits to 8x8 datatype and use VCNT on it
1068 const uint8x8_t vsum = vcnt_u8(vcreate_u8(bits));
1069
1070 // pairwise sums: 8x8 -> 16x4 -> 32x2 -> 64x1
1071 return static_cast<int>(vget_lane_u64(vpaddl_u32(vpaddl_u16(vpaddl_u8(vsum))), 0));
1072 }
1073 # endif // defined(_M_IX86) || defined(_M_X64)
1074 #endif // defined(_MSC_VER) && !defined(__clang__)
1075
1076 #if defined(ANGLE_PLATFORM_POSIX) || defined(__clang__)
BitCount(uint32_t bits)1077 inline int BitCount(uint32_t bits)
1078 {
1079 return __builtin_popcount(bits);
1080 }
1081
BitCount(uint64_t bits)1082 inline int BitCount(uint64_t bits)
1083 {
1084 return __builtin_popcountll(bits);
1085 }
1086 #endif // defined(ANGLE_PLATFORM_POSIX) || defined(__clang__)
1087
BitCount(uint8_t bits)1088 inline int BitCount(uint8_t bits)
1089 {
1090 return BitCount(static_cast<uint32_t>(bits));
1091 }
1092
BitCount(uint16_t bits)1093 inline int BitCount(uint16_t bits)
1094 {
1095 return BitCount(static_cast<uint32_t>(bits));
1096 }
1097
1098 #if defined(ANGLE_PLATFORM_WINDOWS)
1099 // Return the index of the least significant bit set. Indexing is such that bit 0 is the least
1100 // significant bit. Implemented for different bit widths on different platforms.
ScanForward(uint32_t bits)1101 inline unsigned long ScanForward(uint32_t bits)
1102 {
1103 ASSERT(bits != 0u);
1104 unsigned long firstBitIndex = 0ul;
1105 unsigned char ret = _BitScanForward(&firstBitIndex, bits);
1106 ASSERT(ret != 0u);
1107 return firstBitIndex;
1108 }
1109
ScanForward(uint64_t bits)1110 inline unsigned long ScanForward(uint64_t bits)
1111 {
1112 ASSERT(bits != 0u);
1113 unsigned long firstBitIndex = 0ul;
1114 # if defined(ANGLE_IS_64_BIT_CPU)
1115 unsigned char ret = _BitScanForward64(&firstBitIndex, bits);
1116 # else
1117 unsigned char ret;
1118 if (static_cast<uint32_t>(bits) == 0)
1119 {
1120 ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits >> 32));
1121 firstBitIndex += 32ul;
1122 }
1123 else
1124 {
1125 ret = _BitScanForward(&firstBitIndex, static_cast<uint32_t>(bits));
1126 }
1127 # endif // defined(ANGLE_IS_64_BIT_CPU)
1128 ASSERT(ret != 0u);
1129 return firstBitIndex;
1130 }
1131
1132 // Return the index of the most significant bit set. Indexing is such that bit 0 is the least
1133 // significant bit.
ScanReverse(uint32_t bits)1134 inline unsigned long ScanReverse(uint32_t bits)
1135 {
1136 ASSERT(bits != 0u);
1137 unsigned long lastBitIndex = 0ul;
1138 unsigned char ret = _BitScanReverse(&lastBitIndex, bits);
1139 ASSERT(ret != 0u);
1140 return lastBitIndex;
1141 }
1142
ScanReverse(uint64_t bits)1143 inline unsigned long ScanReverse(uint64_t bits)
1144 {
1145 ASSERT(bits != 0u);
1146 unsigned long lastBitIndex = 0ul;
1147 # if defined(ANGLE_IS_64_BIT_CPU)
1148 unsigned char ret = _BitScanReverse64(&lastBitIndex, bits);
1149 # else
1150 unsigned char ret;
1151 if (static_cast<uint32_t>(bits >> 32) == 0)
1152 {
1153 ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits));
1154 }
1155 else
1156 {
1157 ret = _BitScanReverse(&lastBitIndex, static_cast<uint32_t>(bits >> 32));
1158 lastBitIndex += 32ul;
1159 }
1160 # endif // defined(ANGLE_IS_64_BIT_CPU)
1161 ASSERT(ret != 0u);
1162 return lastBitIndex;
1163 }
1164 #endif // defined(ANGLE_PLATFORM_WINDOWS)
1165
1166 #if defined(ANGLE_PLATFORM_POSIX)
ScanForward(uint32_t bits)1167 inline unsigned long ScanForward(uint32_t bits)
1168 {
1169 ASSERT(bits != 0u);
1170 return static_cast<unsigned long>(__builtin_ctz(bits));
1171 }
1172
ScanForward(uint64_t bits)1173 inline unsigned long ScanForward(uint64_t bits)
1174 {
1175 ASSERT(bits != 0u);
1176 # if defined(ANGLE_IS_64_BIT_CPU)
1177 return static_cast<unsigned long>(__builtin_ctzll(bits));
1178 # else
1179 return static_cast<unsigned long>(static_cast<uint32_t>(bits) == 0
1180 ? __builtin_ctz(static_cast<uint32_t>(bits >> 32)) + 32
1181 : __builtin_ctz(static_cast<uint32_t>(bits)));
1182 # endif // defined(ANGLE_IS_64_BIT_CPU)
1183 }
1184
ScanReverse(uint32_t bits)1185 inline unsigned long ScanReverse(uint32_t bits)
1186 {
1187 ASSERT(bits != 0u);
1188 return static_cast<unsigned long>(sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(bits));
1189 }
1190
ScanReverse(uint64_t bits)1191 inline unsigned long ScanReverse(uint64_t bits)
1192 {
1193 ASSERT(bits != 0u);
1194 # if defined(ANGLE_IS_64_BIT_CPU)
1195 return static_cast<unsigned long>(sizeof(uint64_t) * CHAR_BIT - 1 - __builtin_clzll(bits));
1196 # else
1197 if (static_cast<uint32_t>(bits >> 32) == 0)
1198 {
1199 return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits));
1200 }
1201 else
1202 {
1203 return sizeof(uint32_t) * CHAR_BIT - 1 - __builtin_clz(static_cast<uint32_t>(bits >> 32)) +
1204 32;
1205 }
1206 # endif // defined(ANGLE_IS_64_BIT_CPU)
1207 }
1208 #endif // defined(ANGLE_PLATFORM_POSIX)
1209
ScanForward(uint8_t bits)1210 inline unsigned long ScanForward(uint8_t bits)
1211 {
1212 return ScanForward(static_cast<uint32_t>(bits));
1213 }
1214
ScanForward(uint16_t bits)1215 inline unsigned long ScanForward(uint16_t bits)
1216 {
1217 return ScanForward(static_cast<uint32_t>(bits));
1218 }
1219
ScanReverse(uint8_t bits)1220 inline unsigned long ScanReverse(uint8_t bits)
1221 {
1222 return ScanReverse(static_cast<uint32_t>(bits));
1223 }
1224
ScanReverse(uint16_t bits)1225 inline unsigned long ScanReverse(uint16_t bits)
1226 {
1227 return ScanReverse(static_cast<uint32_t>(bits));
1228 }
1229
1230 // Returns -1 on 0, otherwise the index of the least significant 1 bit as in GLSL.
1231 template <typename T>
FindLSB(T bits)1232 int FindLSB(T bits)
1233 {
1234 static_assert(std::is_integral<T>::value, "must be integral type.");
1235 if (bits == 0u)
1236 {
1237 return -1;
1238 }
1239 else
1240 {
1241 return static_cast<int>(ScanForward(bits));
1242 }
1243 }
1244
1245 // Returns -1 on 0, otherwise the index of the most significant 1 bit as in GLSL.
1246 template <typename T>
FindMSB(T bits)1247 int FindMSB(T bits)
1248 {
1249 static_assert(std::is_integral<T>::value, "must be integral type.");
1250 if (bits == 0u)
1251 {
1252 return -1;
1253 }
1254 else
1255 {
1256 return static_cast<int>(ScanReverse(bits));
1257 }
1258 }
1259
1260 // Returns whether the argument is Not a Number.
1261 // IEEE 754 single precision NaN representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1262 // non-zero.
isNaN(float f)1263 inline bool isNaN(float f)
1264 {
1265 // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1266 // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1267 return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1268 (bitCast<uint32_t>(f) & 0x7fffffu);
1269 }
1270
1271 // Returns whether the argument is infinity.
1272 // IEEE 754 single precision infinity representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1273 // zero.
isInf(float f)1274 inline bool isInf(float f)
1275 {
1276 // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1277 // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1278 return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1279 !(bitCast<uint32_t>(f) & 0x7fffffu);
1280 }
1281
1282 namespace priv
1283 {
1284 template <unsigned int N, unsigned int R>
1285 struct iSquareRoot
1286 {
solveiSquareRoot1287 static constexpr unsigned int solve()
1288 {
1289 return (R * R > N)
1290 ? 0
1291 : ((R * R == N) ? R : static_cast<unsigned int>(iSquareRoot<N, R + 1>::value));
1292 }
1293 enum Result
1294 {
1295 value = iSquareRoot::solve()
1296 };
1297 };
1298
1299 template <unsigned int N>
1300 struct iSquareRoot<N, N>
1301 {
1302 enum result
1303 {
1304 value = N
1305 };
1306 };
1307
1308 } // namespace priv
1309
1310 template <unsigned int N>
1311 constexpr unsigned int iSquareRoot()
1312 {
1313 return priv::iSquareRoot<N, 1>::value;
1314 }
1315
1316 // Sum, difference and multiplication operations for signed ints that wrap on 32-bit overflow.
1317 //
1318 // Unsigned types are defined to do arithmetic modulo 2^n in C++. For signed types, overflow
1319 // behavior is undefined.
1320
1321 template <typename T>
1322 inline T WrappingSum(T lhs, T rhs)
1323 {
1324 uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1325 uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1326 return static_cast<T>(lhsUnsigned + rhsUnsigned);
1327 }
1328
1329 template <typename T>
1330 inline T WrappingDiff(T lhs, T rhs)
1331 {
1332 uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1333 uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1334 return static_cast<T>(lhsUnsigned - rhsUnsigned);
1335 }
1336
1337 inline int32_t WrappingMul(int32_t lhs, int32_t rhs)
1338 {
1339 int64_t lhsWide = static_cast<int64_t>(lhs);
1340 int64_t rhsWide = static_cast<int64_t>(rhs);
1341 // The multiplication is guaranteed not to overflow.
1342 int64_t resultWide = lhsWide * rhsWide;
1343 // Implement the desired wrapping behavior by masking out the high-order 32 bits.
1344 resultWide = resultWide & 0xffffffffll;
1345 // Casting to a narrower signed type is fine since the casted value is representable in the
1346 // narrower type.
1347 return static_cast<int32_t>(resultWide);
1348 }
1349
1350 inline float scaleScreenDimensionToNdc(float dimensionScreen, float viewportDimension)
1351 {
1352 return 2.0f * dimensionScreen / viewportDimension;
1353 }
1354
1355 inline float scaleScreenCoordinateToNdc(float coordinateScreen, float viewportDimension)
1356 {
1357 float halfShifted = coordinateScreen / viewportDimension;
1358 return 2.0f * (halfShifted - 0.5f);
1359 }
1360
1361 } // namespace gl
1362
1363 namespace rx
1364 {
1365
1366 template <typename T>
1367 T roundUp(const T value, const T alignment)
1368 {
1369 auto temp = value + alignment - static_cast<T>(1);
1370 return temp - temp % alignment;
1371 }
1372
1373 template <typename T>
1374 constexpr T roundUpPow2(const T value, const T alignment)
1375 {
1376 ASSERT(gl::isPow2(alignment));
1377 return (value + alignment - 1) & ~(alignment - 1);
1378 }
1379
1380 template <typename T>
1381 angle::CheckedNumeric<T> CheckedRoundUp(const T value, const T alignment)
1382 {
1383 angle::CheckedNumeric<T> checkedValue(value);
1384 angle::CheckedNumeric<T> checkedAlignment(alignment);
1385 return roundUp(checkedValue, checkedAlignment);
1386 }
1387
1388 inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned int divisor)
1389 {
1390 unsigned int divided = value / divisor;
1391 return (divided + ((value % divisor == 0) ? 0 : 1));
1392 }
1393
1394 #if defined(__has_builtin)
1395 # define ANGLE_HAS_BUILTIN(x) __has_builtin(x)
1396 #else
1397 # define ANGLE_HAS_BUILTIN(x) 0
1398 #endif
1399
1400 #if defined(_MSC_VER)
1401
1402 # define ANGLE_ROTL(x, y) _rotl(x, y)
1403 # define ANGLE_ROTL64(x, y) _rotl64(x, y)
1404 # define ANGLE_ROTR16(x, y) _rotr16(x, y)
1405
1406 #elif defined(__clang__) && ANGLE_HAS_BUILTIN(__builtin_rotateleft32) && \
1407 ANGLE_HAS_BUILTIN(__builtin_rotateleft64) && ANGLE_HAS_BUILTIN(__builtin_rotateright16)
1408
1409 # define ANGLE_ROTL(x, y) __builtin_rotateleft32(x, y)
1410 # define ANGLE_ROTL64(x, y) __builtin_rotateleft64(x, y)
1411 # define ANGLE_ROTR16(x, y) __builtin_rotateright16(x, y)
1412
1413 #else
1414
1415 inline uint32_t RotL(uint32_t x, int8_t r)
1416 {
1417 return (x << r) | (x >> (32 - r));
1418 }
1419
1420 inline uint64_t RotL64(uint64_t x, int8_t r)
1421 {
1422 return (x << r) | (x >> (64 - r));
1423 }
1424
1425 inline uint16_t RotR16(uint16_t x, int8_t r)
1426 {
1427 return (x >> r) | (x << (16 - r));
1428 }
1429
1430 # define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
1431 # define ANGLE_ROTL64(x, y) ::rx::RotL64(x, y)
1432 # define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
1433
1434 #endif // namespace rx
1435
1436 constexpr unsigned int Log2(unsigned int bytes)
1437 {
1438 return bytes == 1 ? 0 : (1 + Log2(bytes / 2));
1439 }
1440 } // namespace rx
1441
1442 #endif // COMMON_MATHUTIL_H_
1443