1 //
2 // Copyright 2002 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 // mathutil.h: Math and bit manipulation functions.
8
9 #ifndef COMMON_MATHUTIL_H_
10 #define COMMON_MATHUTIL_H_
11
12 #include <math.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <algorithm>
17 #include <limits>
18
19 #include <anglebase/numerics/safe_math.h>
20
21 #include "common/debug.h"
22 #include "common/platform.h"
23
24 namespace angle
25 {
26 using base::CheckedNumeric;
27 using base::IsValueInRangeForNumericType;
28 } // namespace angle
29
30 namespace gl
31 {
32
33 const unsigned int Float32One = 0x3F800000;
34 const unsigned short Float16One = 0x3C00;
35
36 template <typename T>
isPow2(T x)37 inline constexpr bool isPow2(T x)
38 {
39 static_assert(std::is_integral<T>::value, "isPow2 must be called on an integer type.");
40 return (x & (x - 1)) == 0 && (x != 0);
41 }
42
log2(int x)43 inline int log2(int x)
44 {
45 int r = 0;
46 while ((x >> r) > 1)
47 r++;
48 return r;
49 }
50
ceilPow2(unsigned int x)51 inline unsigned int ceilPow2(unsigned int x)
52 {
53 if (x != 0)
54 x--;
55 x |= x >> 1;
56 x |= x >> 2;
57 x |= x >> 4;
58 x |= x >> 8;
59 x |= x >> 16;
60 x++;
61
62 return x;
63 }
64
65 template <typename DestT, typename SrcT>
clampCast(SrcT value)66 inline DestT clampCast(SrcT value)
67 {
68 // For floating-point types with denormalization, min returns the minimum positive normalized
69 // value. To find the value that has no values less than it, use numeric_limits::lowest.
70 constexpr const long double destLo =
71 static_cast<long double>(std::numeric_limits<DestT>::lowest());
72 constexpr const long double destHi =
73 static_cast<long double>(std::numeric_limits<DestT>::max());
74 constexpr const long double srcLo =
75 static_cast<long double>(std::numeric_limits<SrcT>::lowest());
76 constexpr long double srcHi = static_cast<long double>(std::numeric_limits<SrcT>::max());
77
78 if (destHi < srcHi)
79 {
80 DestT destMax = std::numeric_limits<DestT>::max();
81 if (value >= static_cast<SrcT>(destMax))
82 {
83 return destMax;
84 }
85 }
86
87 if (destLo > srcLo)
88 {
89 DestT destLow = std::numeric_limits<DestT>::lowest();
90 if (value <= static_cast<SrcT>(destLow))
91 {
92 return destLow;
93 }
94 }
95
96 return static_cast<DestT>(value);
97 }
98
99 // Specialize clampCast for bool->int conversion to avoid MSVS 2015 performance warning when the max
100 // value is casted to the source type.
101 template <>
clampCast(bool value)102 inline unsigned int clampCast(bool value)
103 {
104 return static_cast<unsigned int>(value);
105 }
106
107 template <>
clampCast(bool value)108 inline int clampCast(bool value)
109 {
110 return static_cast<int>(value);
111 }
112
113 template <typename T, typename MIN, typename MAX>
clamp(T x,MIN min,MAX max)114 inline T clamp(T x, MIN min, MAX max)
115 {
116 // Since NaNs fail all comparison tests, a NaN value will default to min
117 return x > min ? (x > max ? max : x) : min;
118 }
119
clamp01(float x)120 inline float clamp01(float x)
121 {
122 return clamp(x, 0.0f, 1.0f);
123 }
124
125 template <const int n>
unorm(float x)126 inline unsigned int unorm(float x)
127 {
128 const unsigned int max = 0xFFFFFFFF >> (32 - n);
129
130 if (x > 1)
131 {
132 return max;
133 }
134 else if (x < 0)
135 {
136 return 0;
137 }
138 else
139 {
140 return (unsigned int)(max * x + 0.5f);
141 }
142 }
143
supportsSSE2()144 inline bool supportsSSE2()
145 {
146 #if defined(ANGLE_USE_SSE)
147 static bool checked = false;
148 static bool supports = false;
149
150 if (checked)
151 {
152 return supports;
153 }
154
155 # if defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
156 {
157 int info[4];
158 __cpuid(info, 0);
159
160 if (info[0] >= 1)
161 {
162 __cpuid(info, 1);
163
164 supports = (info[3] >> 26) & 1;
165 }
166 }
167 # endif // defined(ANGLE_PLATFORM_WINDOWS) && !defined(_M_ARM) && !defined(_M_ARM64)
168 checked = true;
169 return supports;
170 #else // defined(ANGLE_USE_SSE)
171 return false;
172 #endif
173 }
174
175 template <typename destType, typename sourceType>
bitCast(const sourceType & source)176 destType bitCast(const sourceType &source)
177 {
178 size_t copySize = std::min(sizeof(destType), sizeof(sourceType));
179 destType output;
180 memcpy(&output, &source, copySize);
181 return output;
182 }
183
184 // https://stackoverflow.com/a/37581284
185 template <typename T>
normalize(T value)186 static constexpr double normalize(T value)
187 {
188 return value < 0 ? -static_cast<double>(value) / std::numeric_limits<T>::min()
189 : static_cast<double>(value) / std::numeric_limits<T>::max();
190 }
191
float32ToFloat16(float fp32)192 inline unsigned short float32ToFloat16(float fp32)
193 {
194 unsigned int fp32i = bitCast<unsigned int>(fp32);
195 unsigned int sign = (fp32i & 0x80000000) >> 16;
196 unsigned int abs = fp32i & 0x7FFFFFFF;
197
198 if (abs > 0x7F800000)
199 { // NaN
200 return 0x7FFF;
201 }
202 else if (abs > 0x47FFEFFF)
203 { // Infinity
204 return static_cast<uint16_t>(sign | 0x7C00);
205 }
206 else if (abs < 0x38800000) // Denormal
207 {
208 unsigned int mantissa = (abs & 0x007FFFFF) | 0x00800000;
209 int e = 113 - (abs >> 23);
210
211 if (e < 24)
212 {
213 abs = mantissa >> e;
214 }
215 else
216 {
217 abs = 0;
218 }
219
220 return static_cast<unsigned short>(sign | (abs + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
221 }
222 else
223 {
224 return static_cast<unsigned short>(
225 sign | (abs + 0xC8000000 + 0x00000FFF + ((abs >> 13) & 1)) >> 13);
226 }
227 }
228
229 float float16ToFloat32(unsigned short h);
230
231 unsigned int convertRGBFloatsTo999E5(float red, float green, float blue);
232 void convert999E5toRGBFloats(unsigned int input, float *red, float *green, float *blue);
233
float32ToFloat11(float fp32)234 inline unsigned short float32ToFloat11(float fp32)
235 {
236 const unsigned int float32MantissaMask = 0x7FFFFF;
237 const unsigned int float32ExponentMask = 0x7F800000;
238 const unsigned int float32SignMask = 0x80000000;
239 const unsigned int float32ValueMask = ~float32SignMask;
240 const unsigned int float32ExponentFirstBit = 23;
241 const unsigned int float32ExponentBias = 127;
242
243 const unsigned short float11Max = 0x7BF;
244 const unsigned short float11MantissaMask = 0x3F;
245 const unsigned short float11ExponentMask = 0x7C0;
246 const unsigned short float11BitMask = 0x7FF;
247 const unsigned int float11ExponentBias = 14;
248
249 const unsigned int float32Maxfloat11 = 0x477E0000;
250 const unsigned int float32Minfloat11 = 0x38800000;
251
252 const unsigned int float32Bits = bitCast<unsigned int>(fp32);
253 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
254
255 unsigned int float32Val = float32Bits & float32ValueMask;
256
257 if ((float32Val & float32ExponentMask) == float32ExponentMask)
258 {
259 // INF or NAN
260 if ((float32Val & float32MantissaMask) != 0)
261 {
262 return float11ExponentMask |
263 (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
264 float11MantissaMask);
265 }
266 else if (float32Sign)
267 {
268 // -INF is clamped to 0 since float11 is positive only
269 return 0;
270 }
271 else
272 {
273 return float11ExponentMask;
274 }
275 }
276 else if (float32Sign)
277 {
278 // float11 is positive only, so clamp to zero
279 return 0;
280 }
281 else if (float32Val > float32Maxfloat11)
282 {
283 // The number is too large to be represented as a float11, set to max
284 return float11Max;
285 }
286 else
287 {
288 if (float32Val < float32Minfloat11)
289 {
290 // The number is too small to be represented as a normalized float11
291 // Convert it to a denormalized value.
292 const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
293 (float32Val >> float32ExponentFirstBit);
294 float32Val =
295 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
296 }
297 else
298 {
299 // Rebias the exponent to represent the value as a normalized float11
300 float32Val += 0xC8000000;
301 }
302
303 return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
304 }
305 }
306
float32ToFloat10(float fp32)307 inline unsigned short float32ToFloat10(float fp32)
308 {
309 const unsigned int float32MantissaMask = 0x7FFFFF;
310 const unsigned int float32ExponentMask = 0x7F800000;
311 const unsigned int float32SignMask = 0x80000000;
312 const unsigned int float32ValueMask = ~float32SignMask;
313 const unsigned int float32ExponentFirstBit = 23;
314 const unsigned int float32ExponentBias = 127;
315
316 const unsigned short float10Max = 0x3DF;
317 const unsigned short float10MantissaMask = 0x1F;
318 const unsigned short float10ExponentMask = 0x3E0;
319 const unsigned short float10BitMask = 0x3FF;
320 const unsigned int float10ExponentBias = 14;
321
322 const unsigned int float32Maxfloat10 = 0x477C0000;
323 const unsigned int float32Minfloat10 = 0x38800000;
324
325 const unsigned int float32Bits = bitCast<unsigned int>(fp32);
326 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
327
328 unsigned int float32Val = float32Bits & float32ValueMask;
329
330 if ((float32Val & float32ExponentMask) == float32ExponentMask)
331 {
332 // INF or NAN
333 if ((float32Val & float32MantissaMask) != 0)
334 {
335 return float10ExponentMask |
336 (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
337 float10MantissaMask);
338 }
339 else if (float32Sign)
340 {
341 // -INF is clamped to 0 since float11 is positive only
342 return 0;
343 }
344 else
345 {
346 return float10ExponentMask;
347 }
348 }
349 else if (float32Sign)
350 {
351 // float10 is positive only, so clamp to zero
352 return 0;
353 }
354 else if (float32Val > float32Maxfloat10)
355 {
356 // The number is too large to be represented as a float11, set to max
357 return float10Max;
358 }
359 else
360 {
361 if (float32Val < float32Minfloat10)
362 {
363 // The number is too small to be represented as a normalized float11
364 // Convert it to a denormalized value.
365 const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
366 (float32Val >> float32ExponentFirstBit);
367 float32Val =
368 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
369 }
370 else
371 {
372 // Rebias the exponent to represent the value as a normalized float11
373 float32Val += 0xC8000000;
374 }
375
376 return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
377 }
378 }
379
float11ToFloat32(unsigned short fp11)380 inline float float11ToFloat32(unsigned short fp11)
381 {
382 unsigned short exponent = (fp11 >> 6) & 0x1F;
383 unsigned short mantissa = fp11 & 0x3F;
384
385 if (exponent == 0x1F)
386 {
387 // INF or NAN
388 return bitCast<float>(0x7f800000 | (mantissa << 17));
389 }
390 else
391 {
392 if (exponent != 0)
393 {
394 // normalized
395 }
396 else if (mantissa != 0)
397 {
398 // The value is denormalized
399 exponent = 1;
400
401 do
402 {
403 exponent--;
404 mantissa <<= 1;
405 } while ((mantissa & 0x40) == 0);
406
407 mantissa = mantissa & 0x3F;
408 }
409 else // The value is zero
410 {
411 exponent = static_cast<unsigned short>(-112);
412 }
413
414 return bitCast<float>(((exponent + 112) << 23) | (mantissa << 17));
415 }
416 }
417
float10ToFloat32(unsigned short fp11)418 inline float float10ToFloat32(unsigned short fp11)
419 {
420 unsigned short exponent = (fp11 >> 5) & 0x1F;
421 unsigned short mantissa = fp11 & 0x1F;
422
423 if (exponent == 0x1F)
424 {
425 // INF or NAN
426 return bitCast<float>(0x7f800000 | (mantissa << 17));
427 }
428 else
429 {
430 if (exponent != 0)
431 {
432 // normalized
433 }
434 else if (mantissa != 0)
435 {
436 // The value is denormalized
437 exponent = 1;
438
439 do
440 {
441 exponent--;
442 mantissa <<= 1;
443 } while ((mantissa & 0x20) == 0);
444
445 mantissa = mantissa & 0x1F;
446 }
447 else // The value is zero
448 {
449 exponent = static_cast<unsigned short>(-112);
450 }
451
452 return bitCast<float>(((exponent + 112) << 23) | (mantissa << 18));
453 }
454 }
455
456 // Convers to and from float and 16.16 fixed point format.
457
FixedToFloat(uint32_t fixedInput)458 inline float FixedToFloat(uint32_t fixedInput)
459 {
460 return static_cast<float>(fixedInput) / 65536.0f;
461 }
462
FloatToFixed(float floatInput)463 inline uint32_t FloatToFixed(float floatInput)
464 {
465 static constexpr uint32_t kHighest = 32767 * 65536 + 65535;
466 static constexpr uint32_t kLowest = static_cast<uint32_t>(-32768 * 65536 + 65535);
467
468 if (floatInput > 32767.65535)
469 {
470 return kHighest;
471 }
472 else if (floatInput < -32768.65535)
473 {
474 return kLowest;
475 }
476 else
477 {
478 return static_cast<uint32_t>(floatInput * 65536);
479 }
480 }
481
482 template <typename T>
normalizedToFloat(T input)483 inline float normalizedToFloat(T input)
484 {
485 static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
486
487 if (sizeof(T) > 2)
488 {
489 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
490 constexpr double inverseMax = 1.0 / std::numeric_limits<T>::max();
491 return static_cast<float>(input * inverseMax);
492 }
493 else
494 {
495 constexpr float inverseMax = 1.0f / std::numeric_limits<T>::max();
496 return input * inverseMax;
497 }
498 }
499
500 template <unsigned int inputBitCount, typename T>
normalizedToFloat(T input)501 inline float normalizedToFloat(T input)
502 {
503 static_assert(std::numeric_limits<T>::is_integer, "T must be an integer.");
504 static_assert(inputBitCount < (sizeof(T) * 8), "T must have more bits than inputBitCount.");
505
506 if (inputBitCount > 23)
507 {
508 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
509 constexpr double inverseMax = 1.0 / ((1 << inputBitCount) - 1);
510 return static_cast<float>(input * inverseMax);
511 }
512 else
513 {
514 constexpr float inverseMax = 1.0f / ((1 << inputBitCount) - 1);
515 return input * inverseMax;
516 }
517 }
518
519 template <typename T>
floatToNormalized(float input)520 inline T floatToNormalized(float input)
521 {
522 if (sizeof(T) > 2)
523 {
524 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
525 return static_cast<T>(std::numeric_limits<T>::max() * static_cast<double>(input) + 0.5);
526 }
527 else
528 {
529 return static_cast<T>(std::numeric_limits<T>::max() * input + 0.5f);
530 }
531 }
532
533 template <unsigned int outputBitCount, typename T>
floatToNormalized(float input)534 inline T floatToNormalized(float input)
535 {
536 static_assert(outputBitCount < (sizeof(T) * 8), "T must have more bits than outputBitCount.");
537
538 if (outputBitCount > 23)
539 {
540 // float has only a 23 bit mantissa, so we need to do the calculation in double precision
541 return static_cast<T>(((1 << outputBitCount) - 1) * static_cast<double>(input) + 0.5);
542 }
543 else
544 {
545 return static_cast<T>(((1 << outputBitCount) - 1) * input + 0.5f);
546 }
547 }
548
549 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
getShiftedData(T input)550 inline T getShiftedData(T input)
551 {
552 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
553 "T must have at least as many bits as inputBitCount + inputBitStart.");
554 const T mask = (1 << inputBitCount) - 1;
555 return (input >> inputBitStart) & mask;
556 }
557
558 template <unsigned int inputBitCount, unsigned int inputBitStart, typename T>
shiftData(T input)559 inline T shiftData(T input)
560 {
561 static_assert(inputBitCount + inputBitStart <= (sizeof(T) * 8),
562 "T must have at least as many bits as inputBitCount + inputBitStart.");
563 const T mask = (1 << inputBitCount) - 1;
564 return (input & mask) << inputBitStart;
565 }
566
CountLeadingZeros(uint32_t x)567 inline unsigned int CountLeadingZeros(uint32_t x)
568 {
569 // Use binary search to find the amount of leading zeros.
570 unsigned int zeros = 32u;
571 uint32_t y;
572
573 y = x >> 16u;
574 if (y != 0)
575 {
576 zeros = zeros - 16u;
577 x = y;
578 }
579 y = x >> 8u;
580 if (y != 0)
581 {
582 zeros = zeros - 8u;
583 x = y;
584 }
585 y = x >> 4u;
586 if (y != 0)
587 {
588 zeros = zeros - 4u;
589 x = y;
590 }
591 y = x >> 2u;
592 if (y != 0)
593 {
594 zeros = zeros - 2u;
595 x = y;
596 }
597 y = x >> 1u;
598 if (y != 0)
599 {
600 return zeros - 2u;
601 }
602 return zeros - x;
603 }
604
average(unsigned char a,unsigned char b)605 inline unsigned char average(unsigned char a, unsigned char b)
606 {
607 return ((a ^ b) >> 1) + (a & b);
608 }
609
average(signed char a,signed char b)610 inline signed char average(signed char a, signed char b)
611 {
612 return ((short)a + (short)b) / 2;
613 }
614
average(unsigned short a,unsigned short b)615 inline unsigned short average(unsigned short a, unsigned short b)
616 {
617 return ((a ^ b) >> 1) + (a & b);
618 }
619
average(signed short a,signed short b)620 inline signed short average(signed short a, signed short b)
621 {
622 return ((int)a + (int)b) / 2;
623 }
624
average(unsigned int a,unsigned int b)625 inline unsigned int average(unsigned int a, unsigned int b)
626 {
627 return ((a ^ b) >> 1) + (a & b);
628 }
629
average(int a,int b)630 inline int average(int a, int b)
631 {
632 long long average = (static_cast<long long>(a) + static_cast<long long>(b)) / 2ll;
633 return static_cast<int>(average);
634 }
635
average(float a,float b)636 inline float average(float a, float b)
637 {
638 return (a + b) * 0.5f;
639 }
640
averageHalfFloat(unsigned short a,unsigned short b)641 inline unsigned short averageHalfFloat(unsigned short a, unsigned short b)
642 {
643 return float32ToFloat16((float16ToFloat32(a) + float16ToFloat32(b)) * 0.5f);
644 }
645
averageFloat11(unsigned int a,unsigned int b)646 inline unsigned int averageFloat11(unsigned int a, unsigned int b)
647 {
648 return float32ToFloat11((float11ToFloat32(static_cast<unsigned short>(a)) +
649 float11ToFloat32(static_cast<unsigned short>(b))) *
650 0.5f);
651 }
652
averageFloat10(unsigned int a,unsigned int b)653 inline unsigned int averageFloat10(unsigned int a, unsigned int b)
654 {
655 return float32ToFloat10((float10ToFloat32(static_cast<unsigned short>(a)) +
656 float10ToFloat32(static_cast<unsigned short>(b))) *
657 0.5f);
658 }
659
660 template <typename T>
661 class Range
662 {
663 public:
Range()664 Range() {}
Range(T lo,T hi)665 Range(T lo, T hi) : mLow(lo), mHigh(hi) {}
666
length()667 T length() const { return (empty() ? 0 : (mHigh - mLow)); }
668
intersects(Range<T> other)669 bool intersects(Range<T> other)
670 {
671 if (mLow <= other.mLow)
672 {
673 return other.mLow < mHigh;
674 }
675 else
676 {
677 return mLow < other.mHigh;
678 }
679 }
680
681 // Assumes that end is non-inclusive.. for example, extending to 5 will make "end" 6.
extend(T value)682 void extend(T value)
683 {
684 mLow = value < mLow ? value : mLow;
685 mHigh = value >= mHigh ? (value + 1) : mHigh;
686 }
687
empty()688 bool empty() const { return mHigh <= mLow; }
689
contains(T value)690 bool contains(T value) const { return value >= mLow && value < mHigh; }
691
692 class Iterator final
693 {
694 public:
Iterator(T value)695 Iterator(T value) : mCurrent(value) {}
696
697 Iterator &operator++()
698 {
699 mCurrent++;
700 return *this;
701 }
702 bool operator==(const Iterator &other) const { return mCurrent == other.mCurrent; }
703 bool operator!=(const Iterator &other) const { return mCurrent != other.mCurrent; }
704 T operator*() const { return mCurrent; }
705
706 private:
707 T mCurrent;
708 };
709
begin()710 Iterator begin() const { return Iterator(mLow); }
711
end()712 Iterator end() const { return Iterator(mHigh); }
713
low()714 T low() const { return mLow; }
high()715 T high() const { return mHigh; }
716
invalidate()717 void invalidate()
718 {
719 mLow = std::numeric_limits<T>::max();
720 mHigh = std::numeric_limits<T>::min();
721 }
722
723 private:
724 T mLow;
725 T mHigh;
726 };
727
728 typedef Range<int> RangeI;
729 typedef Range<unsigned int> RangeUI;
730
731 struct IndexRange
732 {
733 struct Undefined
734 {};
IndexRangeIndexRange735 IndexRange(Undefined) {}
IndexRangeIndexRange736 IndexRange() : IndexRange(0, 0, 0) {}
IndexRangeIndexRange737 IndexRange(size_t start_, size_t end_, size_t vertexIndexCount_)
738 : start(start_), end(end_), vertexIndexCount(vertexIndexCount_)
739 {
740 ASSERT(start <= end);
741 }
742
743 // Number of vertices in the range.
vertexCountIndexRange744 size_t vertexCount() const { return (end - start) + 1; }
745
746 // Inclusive range of indices that are not primitive restart
747 size_t start;
748 size_t end;
749
750 // Number of non-primitive restart indices
751 size_t vertexIndexCount;
752 };
753
754 // Combine a floating-point value representing a mantissa (x) and an integer exponent (exp) into a
755 // floating-point value. As in GLSL ldexp() built-in.
Ldexp(float x,int exp)756 inline float Ldexp(float x, int exp)
757 {
758 if (exp > 128)
759 {
760 return std::numeric_limits<float>::infinity();
761 }
762 if (exp < -126)
763 {
764 return 0.0f;
765 }
766 double result = static_cast<double>(x) * std::pow(2.0, static_cast<double>(exp));
767 return static_cast<float>(result);
768 }
769
770 // First, both normalized floating-point values are converted into 16-bit integer values.
771 // Then, the results are packed into the returned 32-bit unsigned integer.
772 // The first float value will be written to the least significant bits of the output;
773 // the last float value will be written to the most significant bits.
774 // The conversion of each value to fixed point is done as follows :
775 // packSnorm2x16 : round(clamp(c, -1, +1) * 32767.0)
packSnorm2x16(float f1,float f2)776 inline uint32_t packSnorm2x16(float f1, float f2)
777 {
778 int16_t leastSignificantBits = static_cast<int16_t>(roundf(clamp(f1, -1.0f, 1.0f) * 32767.0f));
779 int16_t mostSignificantBits = static_cast<int16_t>(roundf(clamp(f2, -1.0f, 1.0f) * 32767.0f));
780 return static_cast<uint32_t>(mostSignificantBits) << 16 |
781 (static_cast<uint32_t>(leastSignificantBits) & 0xFFFF);
782 }
783
784 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
785 // each component is converted to a normalized floating-point value to generate the returned two
786 // float values. The first float value will be extracted from the least significant bits of the
787 // input; the last float value will be extracted from the most-significant bits. The conversion for
788 // unpacked fixed-point value to floating point is done as follows: unpackSnorm2x16 : clamp(f /
789 // 32767.0, -1, +1)
unpackSnorm2x16(uint32_t u,float * f1,float * f2)790 inline void unpackSnorm2x16(uint32_t u, float *f1, float *f2)
791 {
792 int16_t leastSignificantBits = static_cast<int16_t>(u & 0xFFFF);
793 int16_t mostSignificantBits = static_cast<int16_t>(u >> 16);
794 *f1 = clamp(static_cast<float>(leastSignificantBits) / 32767.0f, -1.0f, 1.0f);
795 *f2 = clamp(static_cast<float>(mostSignificantBits) / 32767.0f, -1.0f, 1.0f);
796 }
797
798 // First, both normalized floating-point values are converted into 16-bit integer values.
799 // Then, the results are packed into the returned 32-bit unsigned integer.
800 // The first float value will be written to the least significant bits of the output;
801 // the last float value will be written to the most significant bits.
802 // The conversion of each value to fixed point is done as follows:
803 // packUnorm2x16 : round(clamp(c, 0, +1) * 65535.0)
packUnorm2x16(float f1,float f2)804 inline uint32_t packUnorm2x16(float f1, float f2)
805 {
806 uint16_t leastSignificantBits = static_cast<uint16_t>(roundf(clamp(f1, 0.0f, 1.0f) * 65535.0f));
807 uint16_t mostSignificantBits = static_cast<uint16_t>(roundf(clamp(f2, 0.0f, 1.0f) * 65535.0f));
808 return static_cast<uint32_t>(mostSignificantBits) << 16 |
809 static_cast<uint32_t>(leastSignificantBits);
810 }
811
812 // First, unpacks a single 32-bit unsigned integer u into a pair of 16-bit unsigned integers. Then,
813 // each component is converted to a normalized floating-point value to generate the returned two
814 // float values. The first float value will be extracted from the least significant bits of the
815 // input; the last float value will be extracted from the most-significant bits. The conversion for
816 // unpacked fixed-point value to floating point is done as follows: unpackUnorm2x16 : f / 65535.0
unpackUnorm2x16(uint32_t u,float * f1,float * f2)817 inline void unpackUnorm2x16(uint32_t u, float *f1, float *f2)
818 {
819 uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
820 uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
821 *f1 = static_cast<float>(leastSignificantBits) / 65535.0f;
822 *f2 = static_cast<float>(mostSignificantBits) / 65535.0f;
823 }
824
825 // Helper functions intended to be used only here.
826 namespace priv
827 {
828
ToPackedUnorm8(float f)829 inline uint8_t ToPackedUnorm8(float f)
830 {
831 return static_cast<uint8_t>(roundf(clamp(f, 0.0f, 1.0f) * 255.0f));
832 }
833
ToPackedSnorm8(float f)834 inline int8_t ToPackedSnorm8(float f)
835 {
836 return static_cast<int8_t>(roundf(clamp(f, -1.0f, 1.0f) * 127.0f));
837 }
838
839 } // namespace priv
840
841 // Packs 4 normalized unsigned floating-point values to a single 32-bit unsigned integer. Works
842 // similarly to packUnorm2x16. The floats are clamped to the range 0.0 to 1.0, and written to the
843 // unsigned integer starting from the least significant bits.
PackUnorm4x8(float f1,float f2,float f3,float f4)844 inline uint32_t PackUnorm4x8(float f1, float f2, float f3, float f4)
845 {
846 uint8_t bits[4];
847 bits[0] = priv::ToPackedUnorm8(f1);
848 bits[1] = priv::ToPackedUnorm8(f2);
849 bits[2] = priv::ToPackedUnorm8(f3);
850 bits[3] = priv::ToPackedUnorm8(f4);
851 uint32_t result = 0u;
852 for (int i = 0; i < 4; ++i)
853 {
854 int shift = i * 8;
855 result |= (static_cast<uint32_t>(bits[i]) << shift);
856 }
857 return result;
858 }
859
860 // Unpacks 4 normalized unsigned floating-point values from a single 32-bit unsigned integer into f.
861 // Works similarly to unpackUnorm2x16. The floats are unpacked starting from the least significant
862 // bits.
UnpackUnorm4x8(uint32_t u,float * f)863 inline void UnpackUnorm4x8(uint32_t u, float *f)
864 {
865 for (int i = 0; i < 4; ++i)
866 {
867 int shift = i * 8;
868 uint8_t bits = static_cast<uint8_t>((u >> shift) & 0xFF);
869 f[i] = static_cast<float>(bits) / 255.0f;
870 }
871 }
872
873 // Packs 4 normalized signed floating-point values to a single 32-bit unsigned integer. The floats
874 // are clamped to the range -1.0 to 1.0, and written to the unsigned integer starting from the least
875 // significant bits.
PackSnorm4x8(float f1,float f2,float f3,float f4)876 inline uint32_t PackSnorm4x8(float f1, float f2, float f3, float f4)
877 {
878 int8_t bits[4];
879 bits[0] = priv::ToPackedSnorm8(f1);
880 bits[1] = priv::ToPackedSnorm8(f2);
881 bits[2] = priv::ToPackedSnorm8(f3);
882 bits[3] = priv::ToPackedSnorm8(f4);
883 uint32_t result = 0u;
884 for (int i = 0; i < 4; ++i)
885 {
886 int shift = i * 8;
887 result |= ((static_cast<uint32_t>(bits[i]) & 0xFF) << shift);
888 }
889 return result;
890 }
891
892 // Unpacks 4 normalized signed floating-point values from a single 32-bit unsigned integer into f.
893 // Works similarly to unpackSnorm2x16. The floats are unpacked starting from the least significant
894 // bits, and clamped to the range -1.0 to 1.0.
UnpackSnorm4x8(uint32_t u,float * f)895 inline void UnpackSnorm4x8(uint32_t u, float *f)
896 {
897 for (int i = 0; i < 4; ++i)
898 {
899 int shift = i * 8;
900 int8_t bits = static_cast<int8_t>((u >> shift) & 0xFF);
901 f[i] = clamp(static_cast<float>(bits) / 127.0f, -1.0f, 1.0f);
902 }
903 }
904
905 // Returns an unsigned integer obtained by converting the two floating-point values to the 16-bit
906 // floating-point representation found in the OpenGL ES Specification, and then packing these
907 // two 16-bit integers into a 32-bit unsigned integer.
908 // f1: The 16 least-significant bits of the result;
909 // f2: The 16 most-significant bits.
packHalf2x16(float f1,float f2)910 inline uint32_t packHalf2x16(float f1, float f2)
911 {
912 uint16_t leastSignificantBits = static_cast<uint16_t>(float32ToFloat16(f1));
913 uint16_t mostSignificantBits = static_cast<uint16_t>(float32ToFloat16(f2));
914 return static_cast<uint32_t>(mostSignificantBits) << 16 |
915 static_cast<uint32_t>(leastSignificantBits);
916 }
917
918 // Returns two floating-point values obtained by unpacking a 32-bit unsigned integer into a pair of
919 // 16-bit values, interpreting those values as 16-bit floating-point numbers according to the OpenGL
920 // ES Specification, and converting them to 32-bit floating-point values. The first float value is
921 // obtained from the 16 least-significant bits of u; the second component is obtained from the 16
922 // most-significant bits of u.
unpackHalf2x16(uint32_t u,float * f1,float * f2)923 inline void unpackHalf2x16(uint32_t u, float *f1, float *f2)
924 {
925 uint16_t leastSignificantBits = static_cast<uint16_t>(u & 0xFFFF);
926 uint16_t mostSignificantBits = static_cast<uint16_t>(u >> 16);
927
928 *f1 = float16ToFloat32(leastSignificantBits);
929 *f2 = float16ToFloat32(mostSignificantBits);
930 }
931
sRGBToLinear(uint8_t srgbValue)932 inline uint8_t sRGBToLinear(uint8_t srgbValue)
933 {
934 float value = srgbValue / 255.0f;
935 if (value <= 0.04045f)
936 {
937 value = value / 12.92f;
938 }
939 else
940 {
941 value = std::pow((value + 0.055f) / 1.055f, 2.4f);
942 }
943 return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
944 }
945
linearToSRGB(uint8_t linearValue)946 inline uint8_t linearToSRGB(uint8_t linearValue)
947 {
948 float value = linearValue / 255.0f;
949 if (value <= 0.0f)
950 {
951 value = 0.0f;
952 }
953 else if (value < 0.0031308f)
954 {
955 value = value * 12.92f;
956 }
957 else if (value < 1.0f)
958 {
959 value = std::pow(value, 0.41666f) * 1.055f - 0.055f;
960 }
961 else
962 {
963 value = 1.0f;
964 }
965 return static_cast<uint8_t>(clamp(value * 255.0f + 0.5f, 0.0f, 255.0f));
966 }
967
968 // Reverse the order of the bits.
BitfieldReverse(uint32_t value)969 inline uint32_t BitfieldReverse(uint32_t value)
970 {
971 // TODO(oetuaho@nvidia.com): Optimize this if needed. There don't seem to be compiler intrinsics
972 // for this, and right now it's not used in performance-critical paths.
973 uint32_t result = 0u;
974 for (size_t j = 0u; j < 32u; ++j)
975 {
976 result |= (((value >> j) & 1u) << (31u - j));
977 }
978 return result;
979 }
980
981 // Count the 1 bits.
982 #if defined(_M_IX86) || defined(_M_X64)
983 # define ANGLE_HAS_BITCOUNT_32
BitCount(uint32_t bits)984 inline int BitCount(uint32_t bits)
985 {
986 return static_cast<int>(__popcnt(bits));
987 }
988 # if defined(_M_X64)
989 # define ANGLE_HAS_BITCOUNT_64
BitCount(uint64_t bits)990 inline int BitCount(uint64_t bits)
991 {
992 return static_cast<int>(__popcnt64(bits));
993 }
994 # endif // defined(_M_X64)
995 #endif // defined(_M_IX86) || defined(_M_X64)
996
997 #if defined(ANGLE_PLATFORM_POSIX)
998 # define ANGLE_HAS_BITCOUNT_32
BitCount(uint32_t bits)999 inline int BitCount(uint32_t bits)
1000 {
1001 return __builtin_popcount(bits);
1002 }
1003
1004 # if defined(ANGLE_IS_64_BIT_CPU)
1005 # define ANGLE_HAS_BITCOUNT_64
BitCount(uint64_t bits)1006 inline int BitCount(uint64_t bits)
1007 {
1008 return __builtin_popcountll(bits);
1009 }
1010 # endif // defined(ANGLE_IS_64_BIT_CPU)
1011 #endif // defined(ANGLE_PLATFORM_POSIX)
1012
1013 int BitCountPolyfill(uint32_t bits);
1014
1015 #if !defined(ANGLE_HAS_BITCOUNT_32)
BitCount(const uint32_t bits)1016 inline int BitCount(const uint32_t bits)
1017 {
1018 return BitCountPolyfill(bits);
1019 }
1020 #endif // !defined(ANGLE_HAS_BITCOUNT_32)
1021
1022 #if !defined(ANGLE_HAS_BITCOUNT_64)
BitCount(const uint64_t bits)1023 inline int BitCount(const uint64_t bits)
1024 {
1025 return BitCount(static_cast<uint32_t>(bits >> 32)) + BitCount(static_cast<uint32_t>(bits));
1026 }
1027 #endif // !defined(ANGLE_HAS_BITCOUNT_64)
1028 #undef ANGLE_HAS_BITCOUNT_32
1029 #undef ANGLE_HAS_BITCOUNT_64
1030
BitCount(uint8_t bits)1031 inline int BitCount(uint8_t bits)
1032 {
1033 return BitCount(static_cast<uint32_t>(bits));
1034 }
1035
BitCount(uint16_t bits)1036 inline int BitCount(uint16_t bits)
1037 {
1038 return BitCount(static_cast<uint32_t>(bits));
1039 }
1040
1041 #if defined(ANGLE_PLATFORM_WINDOWS)
1042 // Return the index of the least significant bit set. Indexing is such that bit 0 is the least
1043 // significant bit. Implemented for different bit widths on different platforms.
ScanForward(uint32_t bits)1044 inline unsigned long ScanForward(uint32_t bits)
1045 {
1046 ASSERT(bits != 0u);
1047 unsigned long firstBitIndex = 0ul;
1048 unsigned char ret = _BitScanForward(&firstBitIndex, bits);
1049 ASSERT(ret != 0u);
1050 return firstBitIndex;
1051 }
1052
1053 # if defined(ANGLE_IS_64_BIT_CPU)
ScanForward(uint64_t bits)1054 inline unsigned long ScanForward(uint64_t bits)
1055 {
1056 ASSERT(bits != 0u);
1057 unsigned long firstBitIndex = 0ul;
1058 unsigned char ret = _BitScanForward64(&firstBitIndex, bits);
1059 ASSERT(ret != 0u);
1060 return firstBitIndex;
1061 }
1062 # endif // defined(ANGLE_IS_64_BIT_CPU)
1063 #endif // defined(ANGLE_PLATFORM_WINDOWS)
1064
1065 #if defined(ANGLE_PLATFORM_POSIX)
ScanForward(uint32_t bits)1066 inline unsigned long ScanForward(uint32_t bits)
1067 {
1068 ASSERT(bits != 0u);
1069 return static_cast<unsigned long>(__builtin_ctz(bits));
1070 }
1071
1072 # if defined(ANGLE_IS_64_BIT_CPU)
ScanForward(uint64_t bits)1073 inline unsigned long ScanForward(uint64_t bits)
1074 {
1075 ASSERT(bits != 0u);
1076 return static_cast<unsigned long>(__builtin_ctzll(bits));
1077 }
1078 # endif // defined(ANGLE_IS_64_BIT_CPU)
1079 #endif // defined(ANGLE_PLATFORM_POSIX)
1080
ScanForward(uint8_t bits)1081 inline unsigned long ScanForward(uint8_t bits)
1082 {
1083 return ScanForward(static_cast<uint32_t>(bits));
1084 }
1085
ScanForward(uint16_t bits)1086 inline unsigned long ScanForward(uint16_t bits)
1087 {
1088 return ScanForward(static_cast<uint32_t>(bits));
1089 }
1090
1091 // Return the index of the most significant bit set. Indexing is such that bit 0 is the least
1092 // significant bit.
ScanReverse(unsigned long bits)1093 inline unsigned long ScanReverse(unsigned long bits)
1094 {
1095 ASSERT(bits != 0u);
1096 #if defined(ANGLE_PLATFORM_WINDOWS)
1097 unsigned long lastBitIndex = 0ul;
1098 unsigned char ret = _BitScanReverse(&lastBitIndex, bits);
1099 ASSERT(ret != 0u);
1100 return lastBitIndex;
1101 #elif defined(ANGLE_PLATFORM_POSIX)
1102 return static_cast<unsigned long>(sizeof(unsigned long) * CHAR_BIT - 1 - __builtin_clzl(bits));
1103 #else
1104 # error Please implement bit-scan-reverse for your platform!
1105 #endif
1106 }
1107
1108 // Returns -1 on 0, otherwise the index of the least significant 1 bit as in GLSL.
1109 template <typename T>
FindLSB(T bits)1110 int FindLSB(T bits)
1111 {
1112 static_assert(std::is_integral<T>::value, "must be integral type.");
1113 if (bits == 0u)
1114 {
1115 return -1;
1116 }
1117 else
1118 {
1119 return static_cast<int>(ScanForward(bits));
1120 }
1121 }
1122
1123 // Returns -1 on 0, otherwise the index of the most significant 1 bit as in GLSL.
1124 template <typename T>
FindMSB(T bits)1125 int FindMSB(T bits)
1126 {
1127 static_assert(std::is_integral<T>::value, "must be integral type.");
1128 if (bits == 0u)
1129 {
1130 return -1;
1131 }
1132 else
1133 {
1134 return static_cast<int>(ScanReverse(bits));
1135 }
1136 }
1137
1138 // Returns whether the argument is Not a Number.
1139 // IEEE 754 single precision NaN representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1140 // non-zero.
isNaN(float f)1141 inline bool isNaN(float f)
1142 {
1143 // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1144 // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1145 return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1146 (bitCast<uint32_t>(f) & 0x7fffffu);
1147 }
1148
1149 // Returns whether the argument is infinity.
1150 // IEEE 754 single precision infinity representation: Exponent(8 bits) - 255, Mantissa(23 bits) -
1151 // zero.
isInf(float f)1152 inline bool isInf(float f)
1153 {
1154 // Exponent mask: ((1u << 8) - 1u) << 23 = 0x7f800000u
1155 // Mantissa mask: ((1u << 23) - 1u) = 0x7fffffu
1156 return ((bitCast<uint32_t>(f) & 0x7f800000u) == 0x7f800000u) &&
1157 !(bitCast<uint32_t>(f) & 0x7fffffu);
1158 }
1159
1160 namespace priv
1161 {
1162 template <unsigned int N, unsigned int R>
1163 struct iSquareRoot
1164 {
solveiSquareRoot1165 static constexpr unsigned int solve()
1166 {
1167 return (R * R > N)
1168 ? 0
1169 : ((R * R == N) ? R : static_cast<unsigned int>(iSquareRoot<N, R + 1>::value));
1170 }
1171 enum Result
1172 {
1173 value = iSquareRoot::solve()
1174 };
1175 };
1176
1177 template <unsigned int N>
1178 struct iSquareRoot<N, N>
1179 {
1180 enum result
1181 {
1182 value = N
1183 };
1184 };
1185
1186 } // namespace priv
1187
1188 template <unsigned int N>
1189 constexpr unsigned int iSquareRoot()
1190 {
1191 return priv::iSquareRoot<N, 1>::value;
1192 }
1193
1194 // Sum, difference and multiplication operations for signed ints that wrap on 32-bit overflow.
1195 //
1196 // Unsigned types are defined to do arithmetic modulo 2^n in C++. For signed types, overflow
1197 // behavior is undefined.
1198
1199 template <typename T>
1200 inline T WrappingSum(T lhs, T rhs)
1201 {
1202 uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1203 uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1204 return static_cast<T>(lhsUnsigned + rhsUnsigned);
1205 }
1206
1207 template <typename T>
1208 inline T WrappingDiff(T lhs, T rhs)
1209 {
1210 uint32_t lhsUnsigned = static_cast<uint32_t>(lhs);
1211 uint32_t rhsUnsigned = static_cast<uint32_t>(rhs);
1212 return static_cast<T>(lhsUnsigned - rhsUnsigned);
1213 }
1214
1215 inline int32_t WrappingMul(int32_t lhs, int32_t rhs)
1216 {
1217 int64_t lhsWide = static_cast<int64_t>(lhs);
1218 int64_t rhsWide = static_cast<int64_t>(rhs);
1219 // The multiplication is guaranteed not to overflow.
1220 int64_t resultWide = lhsWide * rhsWide;
1221 // Implement the desired wrapping behavior by masking out the high-order 32 bits.
1222 resultWide = resultWide & 0xffffffffll;
1223 // Casting to a narrower signed type is fine since the casted value is representable in the
1224 // narrower type.
1225 return static_cast<int32_t>(resultWide);
1226 }
1227
1228 inline float scaleScreenDimensionToNdc(float dimensionScreen, float viewportDimension)
1229 {
1230 return 2.0f * dimensionScreen / viewportDimension;
1231 }
1232
1233 inline float scaleScreenCoordinateToNdc(float coordinateScreen, float viewportDimension)
1234 {
1235 float halfShifted = coordinateScreen / viewportDimension;
1236 return 2.0f * (halfShifted - 0.5f);
1237 }
1238
1239 } // namespace gl
1240
1241 namespace rx
1242 {
1243
1244 template <typename T>
1245 T roundUp(const T value, const T alignment)
1246 {
1247 auto temp = value + alignment - static_cast<T>(1);
1248 return temp - temp % alignment;
1249 }
1250
1251 template <typename T>
1252 constexpr T roundUpPow2(const T value, const T alignment)
1253 {
1254 ASSERT(gl::isPow2(alignment));
1255 return (value + alignment - 1) & ~(alignment - 1);
1256 }
1257
1258 template <typename T>
1259 angle::CheckedNumeric<T> CheckedRoundUp(const T value, const T alignment)
1260 {
1261 angle::CheckedNumeric<T> checkedValue(value);
1262 angle::CheckedNumeric<T> checkedAlignment(alignment);
1263 return roundUp(checkedValue, checkedAlignment);
1264 }
1265
1266 inline constexpr unsigned int UnsignedCeilDivide(unsigned int value, unsigned int divisor)
1267 {
1268 unsigned int divided = value / divisor;
1269 return (divided + ((value % divisor == 0) ? 0 : 1));
1270 }
1271
1272 #if defined(__has_builtin)
1273 # define ANGLE_HAS_BUILTIN(x) __has_builtin(x)
1274 #else
1275 # define ANGLE_HAS_BUILTIN(x) 0
1276 #endif
1277
1278 #if defined(_MSC_VER)
1279
1280 # define ANGLE_ROTL(x, y) _rotl(x, y)
1281 # define ANGLE_ROTL64(x, y) _rotl64(x, y)
1282 # define ANGLE_ROTR16(x, y) _rotr16(x, y)
1283
1284 #elif defined(__clang__) && ANGLE_HAS_BUILTIN(__builtin_rotateleft32) && \
1285 ANGLE_HAS_BUILTIN(__builtin_rotateleft64) && ANGLE_HAS_BUILTIN(__builtin_rotateright16)
1286
1287 # define ANGLE_ROTL(x, y) __builtin_rotateleft32(x, y)
1288 # define ANGLE_ROTL64(x, y) __builtin_rotateleft64(x, y)
1289 # define ANGLE_ROTR16(x, y) __builtin_rotateright16(x, y)
1290
1291 #else
1292
1293 inline uint32_t RotL(uint32_t x, int8_t r)
1294 {
1295 return (x << r) | (x >> (32 - r));
1296 }
1297
1298 inline uint64_t RotL64(uint64_t x, int8_t r)
1299 {
1300 return (x << r) | (x >> (64 - r));
1301 }
1302
1303 inline uint16_t RotR16(uint16_t x, int8_t r)
1304 {
1305 return (x >> r) | (x << (16 - r));
1306 }
1307
1308 # define ANGLE_ROTL(x, y) ::rx::RotL(x, y)
1309 # define ANGLE_ROTL64(x, y) ::rx::RotL64(x, y)
1310 # define ANGLE_ROTR16(x, y) ::rx::RotR16(x, y)
1311
1312 #endif // namespace rx
1313
1314 constexpr unsigned int Log2(unsigned int bytes)
1315 {
1316 return bytes == 1 ? 0 : (1 + Log2(bytes / 2));
1317 }
1318 } // namespace rx
1319
1320 #endif // COMMON_MATHUTIL_H_
1321