1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_Math_hpp 16 #define sw_Math_hpp 17 18 #include "Types.hpp" 19 20 #include <cmath> 21 #if defined(_MSC_VER) 22 #include <intrin.h> 23 #endif 24 25 namespace sw 26 { 27 using std::abs; 28 29 #undef min 30 #undef max 31 32 template<class T> max(T a,T b)33 inline T max(T a, T b) 34 { 35 return a > b ? a : b; 36 } 37 38 template<class T> min(T a,T b)39 inline T min(T a, T b) 40 { 41 return a < b ? a : b; 42 } 43 44 template<class T> max(T a,T b,T c)45 inline T max(T a, T b, T c) 46 { 47 return max(max(a, b), c); 48 } 49 50 template<class T> min(T a,T b,T c)51 inline T min(T a, T b, T c) 52 { 53 return min(min(a, b), c); 54 } 55 56 template<class T> max(T a,T b,T c,T d)57 inline T max(T a, T b, T c, T d) 58 { 59 return max(max(a, b), max(c, d)); 60 } 61 62 template<class T> min(T a,T b,T c,T d)63 inline T min(T a, T b, T c, T d) 64 { 65 return min(min(a, b), min(c, d)); 66 } 67 68 template<class T> swap(T & a,T & b)69 inline void swap(T &a, T &b) 70 { 71 T t = a; 72 a = b; 73 b = t; 74 } 75 iround(float x)76 inline int iround(float x) 77 { 78 return (int)floor(x + 0.5f); 79 // return _mm_cvtss_si32(_mm_load_ss(&x)); // FIXME: Demands SSE support 80 } 81 ifloor(float x)82 inline int ifloor(float x) 83 { 84 return (int)floor(x); 85 } 86 ceilFix4(int x)87 inline int ceilFix4(int x) 88 { 89 return (x + 0xF) & 0xFFFFFFF0; 90 } 91 ceilInt4(int x)92 inline int ceilInt4(int x) 93 { 94 return (x + 0xF) >> 4; 95 } 96 97 #define BITS(x) ( \ 98 !!((x) & 0x80000000) + \ 99 !!((x) & 0xC0000000) + \ 100 !!((x) & 0xE0000000) + \ 101 !!((x) & 0xF0000000) + \ 102 !!((x) & 0xF8000000) + \ 103 !!((x) & 0xFC000000) + \ 104 !!((x) & 0xFE000000) + \ 105 !!((x) & 0xFF000000) + \ 106 !!((x) & 0xFF800000) + \ 107 !!((x) & 0xFFC00000) + \ 108 !!((x) & 0xFFE00000) + \ 109 !!((x) & 0xFFF00000) + \ 110 !!((x) & 0xFFF80000) + \ 111 !!((x) & 0xFFFC0000) + \ 112 !!((x) & 0xFFFE0000) + \ 113 !!((x) & 0xFFFF0000) + \ 114 !!((x) & 0xFFFF8000) + \ 115 !!((x) & 0xFFFFC000) + \ 116 !!((x) & 0xFFFFE000) + \ 117 !!((x) & 0xFFFFF000) + \ 118 !!((x) & 0xFFFFF800) + \ 119 !!((x) & 0xFFFFFC00) + \ 120 !!((x) & 0xFFFFFE00) + \ 121 !!((x) & 0xFFFFFF00) + \ 122 !!((x) & 0xFFFFFF80) + \ 123 !!((x) & 0xFFFFFFC0) + \ 124 !!((x) & 0xFFFFFFE0) + \ 125 !!((x) & 0xFFFFFFF0) + \ 126 !!((x) & 0xFFFFFFF8) + \ 127 !!((x) & 0xFFFFFFFC) + \ 128 !!((x) & 0xFFFFFFFE) + \ 129 !!((x) & 0xFFFFFFFF)) 130 131 #define MAX(x, y) ((x) > (y) ? (x) : (y)) 132 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 133 exp2(float x)134 inline float exp2(float x) 135 { 136 return exp2f(x); 137 } 138 exp2(int x)139 inline int exp2(int x) 140 { 141 return 1 << x; 142 } 143 log2(int x)144 inline unsigned long log2(int x) 145 { 146 #if defined(_MSC_VER) 147 unsigned long y; 148 _BitScanReverse(&y, x); 149 return y; 150 #else 151 return 31 - __builtin_clz(x); 152 #endif 153 } 154 ilog2(float x)155 inline int ilog2(float x) 156 { 157 unsigned int y = *(unsigned int*)&x; 158 159 return ((y & 0x7F800000) >> 23) - 127; 160 } 161 log2(float x)162 inline float log2(float x) 163 { 164 return logf(x) * 1.44269504f; // 1.0 / log[e](2) 165 } 166 isPow2(int x)167 inline bool isPow2(int x) 168 { 169 return (x & -x) == x; 170 } 171 172 template<class T> clamp(T x,T a,T b)173 inline T clamp(T x, T a, T b) 174 { 175 if(x < a) x = a; 176 if(x > b) x = b; 177 178 return x; 179 } 180 clamp01(float x)181 inline float clamp01(float x) 182 { 183 return clamp(x, 0.0f, 1.0f); 184 } 185 ceilPow2(int x)186 inline int ceilPow2(int x) 187 { 188 int i = 1; 189 190 while(i < x) 191 { 192 i <<= 1; 193 } 194 195 return i; 196 } 197 floorDiv(int a,int b)198 inline int floorDiv(int a, int b) 199 { 200 return a / b + ((a % b) >> 31); 201 } 202 floorMod(int a,int b)203 inline int floorMod(int a, int b) 204 { 205 int r = a % b; 206 return r + ((r >> 31) & b); 207 } 208 ceilDiv(int a,int b)209 inline int ceilDiv(int a, int b) 210 { 211 return a / b - (-(a % b) >> 31); 212 } 213 ceilMod(int a,int b)214 inline int ceilMod(int a, int b) 215 { 216 int r = a % b; 217 return r - ((-r >> 31) & b); 218 } 219 220 template<const int n> unorm(float x)221 inline unsigned int unorm(float x) 222 { 223 static const unsigned int max = 0xFFFFFFFF >> (32 - n); 224 static const float maxf = static_cast<float>(max); 225 226 if(x >= 1.0f) 227 { 228 return max; 229 } 230 else if(x <= 0.0f) 231 { 232 return 0; 233 } 234 else 235 { 236 return static_cast<unsigned int>(maxf * x + 0.5f); 237 } 238 } 239 240 template<const int n> snorm(float x)241 inline int snorm(float x) 242 { 243 static const unsigned int min = 0x80000000 >> (32 - n); 244 static const unsigned int max = 0xFFFFFFFF >> (32 - n + 1); 245 static const float maxf = static_cast<float>(max); 246 static const unsigned int range = 0xFFFFFFFF >> (32 - n); 247 248 if(x >= 0.0f) 249 { 250 if(x >= 1.0f) 251 { 252 return max; 253 } 254 else 255 { 256 return static_cast<int>(maxf * x + 0.5f); 257 } 258 } 259 else 260 { 261 if(x <= -1.0f) 262 { 263 return min; 264 } 265 else 266 { 267 return static_cast<int>(maxf * x - 0.5f) & range; 268 } 269 } 270 } 271 272 template<const int n> ucast(float x)273 inline unsigned int ucast(float x) 274 { 275 static const unsigned int max = 0xFFFFFFFF >> (32 - n); 276 static const float maxf = static_cast<float>(max); 277 278 if(x >= maxf) 279 { 280 return max; 281 } 282 else if(x <= 0.0f) 283 { 284 return 0; 285 } 286 else 287 { 288 return static_cast<unsigned int>(x + 0.5f); 289 } 290 } 291 292 template<const int n> scast(float x)293 inline int scast(float x) 294 { 295 static const unsigned int min = 0x80000000 >> (32 - n); 296 static const unsigned int max = 0xFFFFFFFF >> (32 - n + 1); 297 static const float maxf = static_cast<float>(max); 298 static const unsigned int range = 0xFFFFFFFF >> (32 - n); 299 300 if(x > 0.0f) 301 { 302 if(x >= maxf) 303 { 304 return max; 305 } 306 else 307 { 308 return static_cast<int>(maxf * x + 0.5f); 309 } 310 } 311 else 312 { 313 if(x <= -1.0f) 314 { 315 return min; 316 } 317 else 318 { 319 return static_cast<int>(maxf * x - 0.5f) & range; 320 } 321 } 322 } 323 sRGBtoLinear(float c)324 inline float sRGBtoLinear(float c) 325 { 326 if(c <= 0.04045f) 327 { 328 return c * 0.07739938f; // 1.0f / 12.92f; 329 } 330 else 331 { 332 return powf((c + 0.055f) * 0.9478673f, 2.4f); // 1.0f / 1.055f 333 } 334 } 335 linearToSRGB(float c)336 inline float linearToSRGB(float c) 337 { 338 if(c <= 0.0031308f) 339 { 340 return c * 12.92f; 341 } 342 else 343 { 344 return 1.055f * powf(c, 0.4166667f) - 0.055f; // 1.0f / 2.4f 345 } 346 } 347 348 unsigned char sRGB8toLinear8(unsigned char value); 349 350 uint64_t FNV_1a(const unsigned char *data, int size); // Fowler-Noll-Vo hash function 351 352 // Round up to the next multiple of alignment align(unsigned int value,unsigned int alignment)353 inline unsigned int align(unsigned int value, unsigned int alignment) 354 { 355 return ((value + alignment - 1) / alignment) * alignment; 356 } 357 clampToSignedInt(unsigned int x)358 inline int clampToSignedInt(unsigned int x) 359 { 360 return static_cast<int>(min(x, 0x7FFFFFFFu)); 361 } 362 363 class RGB9E5Data 364 { 365 unsigned int R : 9; 366 unsigned int G : 9; 367 unsigned int B : 9; 368 unsigned int E : 5; 369 370 public: toRGBFloats(float * rgb) const371 void toRGBFloats(float* rgb) const 372 { 373 static const float Offset = -24.0f; // Exponent Bias (15) + Number of mantissa bits per component (9) = 24 374 375 const float factor = powf(2.0f, static_cast<float>(E) + Offset); 376 rgb[0] = static_cast<float>(R) * factor; 377 rgb[1] = static_cast<float>(G) * factor; 378 rgb[2] = static_cast<float>(B) * factor; 379 } 380 }; 381 382 class R11G11B10FData 383 { 384 unsigned int R : 11; 385 unsigned int G : 11; 386 unsigned int B : 10; 387 float11ToFloat32(unsigned short fp11)388 static inline float float11ToFloat32(unsigned short fp11) 389 { 390 unsigned short exponent = (fp11 >> 6) & 0x1F; 391 unsigned short mantissa = fp11 & 0x3F; 392 393 unsigned int output; 394 if(exponent == 0x1F) 395 { 396 // INF or NAN 397 output = 0x7f800000 | (mantissa << 17); 398 } 399 else 400 { 401 if(exponent != 0) 402 { 403 // normalized 404 } 405 else if(mantissa != 0) 406 { 407 // The value is denormalized 408 exponent = 1; 409 410 do 411 { 412 exponent--; 413 mantissa <<= 1; 414 } while((mantissa & 0x40) == 0); 415 416 mantissa = mantissa & 0x3F; 417 } 418 else // The value is zero 419 { 420 exponent = static_cast<unsigned short>(-112); 421 } 422 423 output = ((exponent + 112) << 23) | (mantissa << 17); 424 } 425 426 return *(float*)(&output); 427 } 428 float10ToFloat32(unsigned short fp10)429 static inline float float10ToFloat32(unsigned short fp10) 430 { 431 unsigned short exponent = (fp10 >> 5) & 0x1F; 432 unsigned short mantissa = fp10 & 0x1F; 433 434 unsigned int output; 435 if(exponent == 0x1F) 436 { 437 // INF or NAN 438 output = 0x7f800000 | (mantissa << 17); 439 } 440 else 441 { 442 if(exponent != 0) 443 { 444 // normalized 445 } 446 else if(mantissa != 0) 447 { 448 // The value is denormalized 449 exponent = 1; 450 451 do 452 { 453 exponent--; 454 mantissa <<= 1; 455 } while((mantissa & 0x20) == 0); 456 457 mantissa = mantissa & 0x1F; 458 } 459 else // The value is zero 460 { 461 exponent = static_cast<unsigned short>(-112); 462 } 463 464 output = ((exponent + 112) << 23) | (mantissa << 18); 465 } 466 467 return *(float*)(&output); 468 } 469 470 public: toRGBFloats(float * rgb) const471 void toRGBFloats(float* rgb) const 472 { 473 rgb[0] = float11ToFloat32(R); 474 rgb[1] = float11ToFloat32(G); 475 rgb[2] = float10ToFloat32(B); 476 } 477 }; 478 } 479 480 #endif // sw_Math_hpp 481