1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "ShaderCore.hpp" 16 17 #include "Renderer/Renderer.hpp" 18 #include "Common/Debug.hpp" 19 20 #include <limits.h> 21 22 namespace sw 23 { 24 extern TranscendentalPrecision logPrecision; 25 extern TranscendentalPrecision expPrecision; 26 extern TranscendentalPrecision rcpPrecision; 27 extern TranscendentalPrecision rsqPrecision; 28 Vector4s()29 Vector4s::Vector4s() 30 { 31 } 32 Vector4s(unsigned short x,unsigned short y,unsigned short z,unsigned short w)33 Vector4s::Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w) 34 { 35 this->x = Short4(x); 36 this->y = Short4(y); 37 this->z = Short4(z); 38 this->w = Short4(w); 39 } 40 Vector4s(const Vector4s & rhs)41 Vector4s::Vector4s(const Vector4s &rhs) 42 { 43 x = rhs.x; 44 y = rhs.y; 45 z = rhs.z; 46 w = rhs.w; 47 } 48 operator =(const Vector4s & rhs)49 Vector4s &Vector4s::operator=(const Vector4s &rhs) 50 { 51 x = rhs.x; 52 y = rhs.y; 53 z = rhs.z; 54 w = rhs.w; 55 56 return *this; 57 } 58 operator [](int i)59 Short4 &Vector4s::operator[](int i) 60 { 61 switch(i) 62 { 63 case 0: return x; 64 case 1: return y; 65 case 2: return z; 66 case 3: return w; 67 } 68 69 return x; 70 } 71 Vector4f()72 Vector4f::Vector4f() 73 { 74 } 75 Vector4f(float x,float y,float z,float w)76 Vector4f::Vector4f(float x, float y, float z, float w) 77 { 78 this->x = Float4(x); 79 this->y = Float4(y); 80 this->z = Float4(z); 81 this->w = Float4(w); 82 } 83 Vector4f(const Vector4f & rhs)84 Vector4f::Vector4f(const Vector4f &rhs) 85 { 86 x = rhs.x; 87 y = rhs.y; 88 z = rhs.z; 89 w = rhs.w; 90 } 91 operator =(const Vector4f & rhs)92 Vector4f &Vector4f::operator=(const Vector4f &rhs) 93 { 94 x = rhs.x; 95 y = rhs.y; 96 z = rhs.z; 97 w = rhs.w; 98 99 return *this; 100 } 101 operator [](int i)102 Float4 &Vector4f::operator[](int i) 103 { 104 switch(i) 105 { 106 case 0: return x; 107 case 1: return y; 108 case 2: return z; 109 case 3: return w; 110 } 111 112 return x; 113 } 114 exponential2(RValue<Float4> x,bool pp)115 Float4 exponential2(RValue<Float4> x, bool pp) 116 { 117 Float4 x0; 118 Float4 x1; 119 Int4 x2; 120 121 x0 = x; 122 123 x0 = Min(x0, As<Float4>(Int4(0x43010000))); // 129.00000e+0f 124 x0 = Max(x0, As<Float4>(Int4(0xC2FDFFFF))); // -126.99999e+0f 125 x1 = x0; 126 x1 -= Float4(0.5f); 127 x2 = RoundInt(x1); 128 x1 = Float4(x2); 129 x2 += Int4(0x0000007F); // 127 130 x2 = x2 << 23; 131 x0 -= x1; 132 x1 = As<Float4>(Int4(0x3AF61905)); // 1.8775767e-3f 133 x1 *= x0; 134 x1 += As<Float4>(Int4(0x3C134806)); // 8.9893397e-3f 135 x1 *= x0; 136 x1 += As<Float4>(Int4(0x3D64AA23)); // 5.5826318e-2f 137 x1 *= x0; 138 x1 += As<Float4>(Int4(0x3E75EAD4)); // 2.4015361e-1f 139 x1 *= x0; 140 x1 += As<Float4>(Int4(0x3F31727B)); // 6.9315308e-1f 141 x1 *= x0; 142 x1 += As<Float4>(Int4(0x3F7FFFFF)); // 9.9999994e-1f 143 x1 *= As<Float4>(x2); 144 145 return x1; 146 } 147 logarithm2(RValue<Float4> x,bool absolute,bool pp)148 Float4 logarithm2(RValue<Float4> x, bool absolute, bool pp) 149 { 150 Float4 x0; 151 Float4 x1; 152 Float4 x2; 153 Float4 x3; 154 155 x0 = x; 156 157 x1 = As<Float4>(As<Int4>(x0) & Int4(0x7F800000)); 158 x1 = As<Float4>(As<UInt4>(x1) >> 8); 159 x1 = As<Float4>(As<Int4>(x1) | As<Int4>(Float4(1.0f))); 160 x1 = (x1 - Float4(1.4960938f)) * Float4(256.0f); // FIXME: (x1 - 1.4960938f) * 256.0f; 161 x0 = As<Float4>((As<Int4>(x0) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f))); 162 163 x2 = (Float4(9.5428179e-2f) * x0 + Float4(4.7779095e-1f)) * x0 + Float4(1.9782813e-1f); 164 x3 = ((Float4(1.6618466e-2f) * x0 + Float4(2.0350508e-1f)) * x0 + Float4(2.7382900e-1f)) * x0 + Float4(4.0496687e-2f); 165 x2 /= x3; 166 167 x1 += (x0 - Float4(1.0f)) * x2; 168 169 return x1; 170 } 171 exponential(RValue<Float4> x,bool pp)172 Float4 exponential(RValue<Float4> x, bool pp) 173 { 174 // FIXME: Propagate the constant 175 return exponential2(Float4(1.44269541f) * x, pp); // 1/ln(2) 176 } 177 logarithm(RValue<Float4> x,bool absolute,bool pp)178 Float4 logarithm(RValue<Float4> x, bool absolute, bool pp) 179 { 180 // FIXME: Propagate the constant 181 return Float4(6.93147181e-1f) * logarithm2(x, absolute, pp); // ln(2) 182 } 183 power(RValue<Float4> x,RValue<Float4> y,bool pp)184 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp) 185 { 186 Float4 log = logarithm2(x, true, pp); 187 log *= y; 188 return exponential2(log, pp); 189 } 190 reciprocal(RValue<Float4> x,bool pp,bool finite,bool exactAtPow2)191 Float4 reciprocal(RValue<Float4> x, bool pp, bool finite, bool exactAtPow2) 192 { 193 Float4 rcp; 194 195 if(!pp && rcpPrecision >= WHQL) 196 { 197 rcp = Float4(1.0f) / x; 198 } 199 else 200 { 201 rcp = Rcp_pp(x, exactAtPow2); 202 203 if(!pp) 204 { 205 rcp = (rcp + rcp) - (x * rcp * rcp); 206 } 207 } 208 209 if(finite) 210 { 211 int big = 0x7F7FFFFF; 212 rcp = Min(rcp, Float4((float&)big)); 213 } 214 215 return rcp; 216 } 217 reciprocalSquareRoot(RValue<Float4> x,bool absolute,bool pp)218 Float4 reciprocalSquareRoot(RValue<Float4> x, bool absolute, bool pp) 219 { 220 Float4 abs = x; 221 222 if(absolute) 223 { 224 abs = Abs(abs); 225 } 226 227 Float4 rsq; 228 229 if(!pp && rsqPrecision >= IEEE) 230 { 231 rsq = Float4(1.0f) / Sqrt(abs); 232 } 233 else 234 { 235 rsq = RcpSqrt_pp(abs); 236 237 if(!pp) 238 { 239 rsq = rsq * (Float4(3.0f) - rsq * rsq * abs) * Float4(0.5f); 240 } 241 } 242 243 int big = 0x7F7FFFFF; 244 rsq = Min(rsq, Float4((float&)big)); 245 246 return rsq; 247 } 248 modulo(RValue<Float4> x,RValue<Float4> y)249 Float4 modulo(RValue<Float4> x, RValue<Float4> y) 250 { 251 return x - y * Floor(x / y); 252 } 253 sine_pi(RValue<Float4> x,bool pp)254 Float4 sine_pi(RValue<Float4> x, bool pp) 255 { 256 const Float4 A = Float4(-4.05284734e-1f); // -4/pi^2 257 const Float4 B = Float4(1.27323954e+0f); // 4/pi 258 const Float4 C = Float4(7.75160950e-1f); 259 const Float4 D = Float4(2.24839049e-1f); 260 261 // Parabola approximating sine 262 Float4 sin = x * (Abs(x) * A + B); 263 264 // Improve precision from 0.06 to 0.001 265 if(true) 266 { 267 sin = sin * (Abs(sin) * D + C); 268 } 269 270 return sin; 271 } 272 cosine_pi(RValue<Float4> x,bool pp)273 Float4 cosine_pi(RValue<Float4> x, bool pp) 274 { 275 // cos(x) = sin(x + pi/2) 276 Float4 y = x + Float4(1.57079632e+0f); 277 278 // Wrap around 279 y -= As<Float4>(CmpNLT(y, Float4(3.14159265e+0f)) & As<Int4>(Float4(6.28318530e+0f))); 280 281 return sine_pi(y, pp); 282 } 283 sine(RValue<Float4> x,bool pp)284 Float4 sine(RValue<Float4> x, bool pp) 285 { 286 // Reduce to [-0.5, 0.5] range 287 Float4 y = x * Float4(1.59154943e-1f); // 1/2pi 288 y = y - Round(y); 289 290 const Float4 A = Float4(-16.0f); 291 const Float4 B = Float4(8.0f); 292 const Float4 C = Float4(7.75160950e-1f); 293 const Float4 D = Float4(2.24839049e-1f); 294 295 // Parabola approximating sine 296 Float4 sin = y * (Abs(y) * A + B); 297 298 // Improve precision from 0.06 to 0.001 299 if(true) 300 { 301 sin = sin * (Abs(sin) * D + C); 302 } 303 304 return sin; 305 } 306 cosine(RValue<Float4> x,bool pp)307 Float4 cosine(RValue<Float4> x, bool pp) 308 { 309 // cos(x) = sin(x + pi/2) 310 Float4 y = x + Float4(1.57079632e+0f); 311 return sine(y, pp); 312 } 313 tangent(RValue<Float4> x,bool pp)314 Float4 tangent(RValue<Float4> x, bool pp) 315 { 316 return sine(x, pp) / cosine(x, pp); 317 } 318 arccos(RValue<Float4> x,bool pp)319 Float4 arccos(RValue<Float4> x, bool pp) 320 { 321 // pi/2 - arcsin(x) 322 return Float4(1.57079632e+0f) - arcsin(x); 323 } 324 arcsin(RValue<Float4> x,bool pp)325 Float4 arcsin(RValue<Float4> x, bool pp) 326 { 327 // x*(pi/2-sqrt(1-x*x)*pi/5) 328 return x * (Float4(1.57079632e+0f) - Sqrt(Float4(1.0f) - x*x) * Float4(6.28318531e-1f)); 329 } 330 arctan(RValue<Float4> x,bool pp)331 Float4 arctan(RValue<Float4> x, bool pp) 332 { 333 Int4 O = CmpNLT(Abs(x), Float4(1.0f)); 334 Float4 y = As<Float4>((O & As<Int4>(Float4(1.0f) / x)) | (~O & As<Int4>(x))); // FIXME: Vector select 335 336 // Approximation of atan in [-1..1] 337 Float4 theta = y * (Float4(-0.27f) * Abs(y) + Float4(1.05539816f)); 338 339 // +/-pi/2 depending on sign of x 340 Float4 sgnPi_2 = As<Float4>(As<Int4>(Float4(1.57079632e+0f)) ^ (As<Int4>(x) & Int4(0x80000000))); 341 342 theta = As<Float4>((O & As<Int4>(sgnPi_2 - theta)) | (~O & As<Int4>(theta))); // FIXME: Vector select 343 344 return theta; 345 } 346 arctan(RValue<Float4> y,RValue<Float4> x,bool pp)347 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp) 348 { 349 // Rotate to upper semicircle when in lower semicircle 350 Int4 S = CmpLT(y, Float4(0.0f)); 351 Float4 theta = As<Float4>(S & As<Int4>(Float4(-3.14159265e+0f))); // -pi 352 Float4 x0 = As<Float4>((As<Int4>(y) & Int4(0x80000000)) ^ As<Int4>(x)); 353 Float4 y0 = Abs(y); 354 355 // Rotate to right quadrant when in left quadrant 356 Int4 Q = CmpLT(x0, Float4(0.0f)); 357 theta += As<Float4>(Q & As<Int4>(Float4(1.57079632e+0f))); // pi/2 358 Float4 x1 = As<Float4>((Q & As<Int4>(y0)) | (~Q & As<Int4>(x0))); // FIXME: Vector select 359 Float4 y1 = As<Float4>((Q & As<Int4>(-x0)) | (~Q & As<Int4>(y0))); // FIXME: Vector select 360 361 // Rotate to first octant when in second octant 362 Int4 O = CmpNLT(y1, x1); 363 theta += As<Float4>(O & As<Int4>(Float4(7.85398163e-1f))); // pi/4 364 Float4 x2 = As<Float4>((O & As<Int4>(Float4(7.07106781e-1f) * x1 + Float4(7.07106781e-1f) * y1)) | (~O & As<Int4>(x1))); // sqrt(2)/2 // FIXME: Vector select 365 Float4 y2 = As<Float4>((O & As<Int4>(Float4(7.07106781e-1f) * y1 - Float4(7.07106781e-1f) * x1)) | (~O & As<Int4>(y1))); // FIXME: Vector select 366 367 // Approximation of atan in [0..1] 368 Float4 y_x = y2 / x2; 369 theta += y_x * (Float4(-0.27f) * y_x + Float4(1.05539816f)); 370 371 return theta; 372 } 373 sineh(RValue<Float4> x,bool pp)374 Float4 sineh(RValue<Float4> x, bool pp) 375 { 376 return (exponential(x, pp) - exponential(-x, pp)) * Float4(0.5f); 377 } 378 cosineh(RValue<Float4> x,bool pp)379 Float4 cosineh(RValue<Float4> x, bool pp) 380 { 381 return (exponential(x, pp) + exponential(-x, pp)) * Float4(0.5f); 382 } 383 tangenth(RValue<Float4> x,bool pp)384 Float4 tangenth(RValue<Float4> x, bool pp) 385 { 386 Float4 e_x = exponential(x, pp); 387 Float4 e_minus_x = exponential(-x, pp); 388 return (e_x - e_minus_x) / (e_x + e_minus_x); 389 } 390 arccosh(RValue<Float4> x,bool pp)391 Float4 arccosh(RValue<Float4> x, bool pp) 392 { 393 return logarithm(x + Sqrt(x + Float4(1.0f)) * Sqrt(x - Float4(1.0f)), pp); 394 } 395 arcsinh(RValue<Float4> x,bool pp)396 Float4 arcsinh(RValue<Float4> x, bool pp) 397 { 398 return logarithm(x + Sqrt(x * x + Float4(1.0f)), pp); 399 } 400 arctanh(RValue<Float4> x,bool pp)401 Float4 arctanh(RValue<Float4> x, bool pp) 402 { 403 return logarithm((Float4(1.0f) + x) / (Float4(1.0f) - x), pp) * Float4(0.5f); 404 } 405 dot2(const Vector4f & v0,const Vector4f & v1)406 Float4 dot2(const Vector4f &v0, const Vector4f &v1) 407 { 408 return v0.x * v1.x + v0.y * v1.y; 409 } 410 dot3(const Vector4f & v0,const Vector4f & v1)411 Float4 dot3(const Vector4f &v0, const Vector4f &v1) 412 { 413 return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; 414 } 415 dot4(const Vector4f & v0,const Vector4f & v1)416 Float4 dot4(const Vector4f &v0, const Vector4f &v1) 417 { 418 return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z + v0.w * v1.w; 419 } 420 transpose4x4(Short4 & row0,Short4 & row1,Short4 & row2,Short4 & row3)421 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3) 422 { 423 Int2 tmp0 = UnpackHigh(row0, row1); 424 Int2 tmp1 = UnpackHigh(row2, row3); 425 Int2 tmp2 = UnpackLow(row0, row1); 426 Int2 tmp3 = UnpackLow(row2, row3); 427 428 row0 = UnpackLow(tmp2, tmp3); 429 row1 = UnpackHigh(tmp2, tmp3); 430 row2 = UnpackLow(tmp0, tmp1); 431 row3 = UnpackHigh(tmp0, tmp1); 432 } 433 transpose4x4(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)434 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 435 { 436 Float4 tmp0 = UnpackLow(row0, row1); 437 Float4 tmp1 = UnpackLow(row2, row3); 438 Float4 tmp2 = UnpackHigh(row0, row1); 439 Float4 tmp3 = UnpackHigh(row2, row3); 440 441 row0 = Float4(tmp0.xy, tmp1.xy); 442 row1 = Float4(tmp0.zw, tmp1.zw); 443 row2 = Float4(tmp2.xy, tmp3.xy); 444 row3 = Float4(tmp2.zw, tmp3.zw); 445 } 446 transpose4x3(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)447 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 448 { 449 Float4 tmp0 = UnpackLow(row0, row1); 450 Float4 tmp1 = UnpackLow(row2, row3); 451 Float4 tmp2 = UnpackHigh(row0, row1); 452 Float4 tmp3 = UnpackHigh(row2, row3); 453 454 row0 = Float4(tmp0.xy, tmp1.xy); 455 row1 = Float4(tmp0.zw, tmp1.zw); 456 row2 = Float4(tmp2.xy, tmp3.xy); 457 } 458 transpose4x2(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)459 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 460 { 461 Float4 tmp0 = UnpackLow(row0, row1); 462 Float4 tmp1 = UnpackLow(row2, row3); 463 464 row0 = Float4(tmp0.xy, tmp1.xy); 465 row1 = Float4(tmp0.zw, tmp1.zw); 466 } 467 transpose4x1(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)468 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 469 { 470 Float4 tmp0 = UnpackLow(row0, row1); 471 Float4 tmp1 = UnpackLow(row2, row3); 472 473 row0 = Float4(tmp0.xy, tmp1.xy); 474 } 475 transpose2x4(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3)476 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3) 477 { 478 Float4 tmp01 = UnpackLow(row0, row1); 479 Float4 tmp23 = UnpackHigh(row0, row1); 480 481 row0 = tmp01; 482 row1 = Float4(tmp01.zw, row1.zw); 483 row2 = tmp23; 484 row3 = Float4(tmp23.zw, row3.zw); 485 } 486 transpose4xN(Float4 & row0,Float4 & row1,Float4 & row2,Float4 & row3,int N)487 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N) 488 { 489 switch(N) 490 { 491 case 1: transpose4x1(row0, row1, row2, row3); break; 492 case 2: transpose4x2(row0, row1, row2, row3); break; 493 case 3: transpose4x3(row0, row1, row2, row3); break; 494 case 4: transpose4x4(row0, row1, row2, row3); break; 495 } 496 } 497 mov(Vector4f & dst,const Vector4f & src,bool integerDestination)498 void ShaderCore::mov(Vector4f &dst, const Vector4f &src, bool integerDestination) 499 { 500 if(integerDestination) 501 { 502 dst.x = As<Float4>(RoundInt(src.x)); 503 dst.y = As<Float4>(RoundInt(src.y)); 504 dst.z = As<Float4>(RoundInt(src.z)); 505 dst.w = As<Float4>(RoundInt(src.w)); 506 } 507 else 508 { 509 dst = src; 510 } 511 } 512 neg(Vector4f & dst,const Vector4f & src)513 void ShaderCore::neg(Vector4f &dst, const Vector4f &src) 514 { 515 dst.x = -src.x; 516 dst.y = -src.y; 517 dst.z = -src.z; 518 dst.w = -src.w; 519 } 520 ineg(Vector4f & dst,const Vector4f & src)521 void ShaderCore::ineg(Vector4f &dst, const Vector4f &src) 522 { 523 dst.x = As<Float4>(-As<Int4>(src.x)); 524 dst.y = As<Float4>(-As<Int4>(src.y)); 525 dst.z = As<Float4>(-As<Int4>(src.z)); 526 dst.w = As<Float4>(-As<Int4>(src.w)); 527 } 528 f2b(Vector4f & dst,const Vector4f & src)529 void ShaderCore::f2b(Vector4f &dst, const Vector4f &src) 530 { 531 dst.x = As<Float4>(CmpNEQ(src.x, Float4(0.0f))); 532 dst.y = As<Float4>(CmpNEQ(src.y, Float4(0.0f))); 533 dst.z = As<Float4>(CmpNEQ(src.z, Float4(0.0f))); 534 dst.w = As<Float4>(CmpNEQ(src.w, Float4(0.0f))); 535 } 536 b2f(Vector4f & dst,const Vector4f & src)537 void ShaderCore::b2f(Vector4f &dst, const Vector4f &src) 538 { 539 dst.x = As<Float4>(As<Int4>(src.x) & As<Int4>(Float4(1.0f))); 540 dst.y = As<Float4>(As<Int4>(src.y) & As<Int4>(Float4(1.0f))); 541 dst.z = As<Float4>(As<Int4>(src.z) & As<Int4>(Float4(1.0f))); 542 dst.w = As<Float4>(As<Int4>(src.w) & As<Int4>(Float4(1.0f))); 543 } 544 f2i(Vector4f & dst,const Vector4f & src)545 void ShaderCore::f2i(Vector4f &dst, const Vector4f &src) 546 { 547 dst.x = As<Float4>(Int4(src.x)); 548 dst.y = As<Float4>(Int4(src.y)); 549 dst.z = As<Float4>(Int4(src.z)); 550 dst.w = As<Float4>(Int4(src.w)); 551 } 552 i2f(Vector4f & dst,const Vector4f & src)553 void ShaderCore::i2f(Vector4f &dst, const Vector4f &src) 554 { 555 dst.x = Float4(As<Int4>(src.x)); 556 dst.y = Float4(As<Int4>(src.y)); 557 dst.z = Float4(As<Int4>(src.z)); 558 dst.w = Float4(As<Int4>(src.w)); 559 } 560 f2u(Vector4f & dst,const Vector4f & src)561 void ShaderCore::f2u(Vector4f &dst, const Vector4f &src) 562 { 563 dst.x = As<Float4>(UInt4(src.x)); 564 dst.y = As<Float4>(UInt4(src.y)); 565 dst.z = As<Float4>(UInt4(src.z)); 566 dst.w = As<Float4>(UInt4(src.w)); 567 } 568 u2f(Vector4f & dst,const Vector4f & src)569 void ShaderCore::u2f(Vector4f &dst, const Vector4f &src) 570 { 571 dst.x = Float4(As<UInt4>(src.x)); 572 dst.y = Float4(As<UInt4>(src.y)); 573 dst.z = Float4(As<UInt4>(src.z)); 574 dst.w = Float4(As<UInt4>(src.w)); 575 } 576 i2b(Vector4f & dst,const Vector4f & src)577 void ShaderCore::i2b(Vector4f &dst, const Vector4f &src) 578 { 579 dst.x = As<Float4>(CmpNEQ(As<Int4>(src.x), Int4(0))); 580 dst.y = As<Float4>(CmpNEQ(As<Int4>(src.y), Int4(0))); 581 dst.z = As<Float4>(CmpNEQ(As<Int4>(src.z), Int4(0))); 582 dst.w = As<Float4>(CmpNEQ(As<Int4>(src.w), Int4(0))); 583 } 584 b2i(Vector4f & dst,const Vector4f & src)585 void ShaderCore::b2i(Vector4f &dst, const Vector4f &src) 586 { 587 dst.x = As<Float4>(As<Int4>(src.x) & Int4(1)); 588 dst.y = As<Float4>(As<Int4>(src.y) & Int4(1)); 589 dst.z = As<Float4>(As<Int4>(src.z) & Int4(1)); 590 dst.w = As<Float4>(As<Int4>(src.w) & Int4(1)); 591 } 592 add(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)593 void ShaderCore::add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 594 { 595 dst.x = src0.x + src1.x; 596 dst.y = src0.y + src1.y; 597 dst.z = src0.z + src1.z; 598 dst.w = src0.w + src1.w; 599 } 600 iadd(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)601 void ShaderCore::iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 602 { 603 dst.x = As<Float4>(As<Int4>(src0.x) + As<Int4>(src1.x)); 604 dst.y = As<Float4>(As<Int4>(src0.y) + As<Int4>(src1.y)); 605 dst.z = As<Float4>(As<Int4>(src0.z) + As<Int4>(src1.z)); 606 dst.w = As<Float4>(As<Int4>(src0.w) + As<Int4>(src1.w)); 607 } 608 sub(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)609 void ShaderCore::sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 610 { 611 dst.x = src0.x - src1.x; 612 dst.y = src0.y - src1.y; 613 dst.z = src0.z - src1.z; 614 dst.w = src0.w - src1.w; 615 } 616 isub(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)617 void ShaderCore::isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 618 { 619 dst.x = As<Float4>(As<Int4>(src0.x) - As<Int4>(src1.x)); 620 dst.y = As<Float4>(As<Int4>(src0.y) - As<Int4>(src1.y)); 621 dst.z = As<Float4>(As<Int4>(src0.z) - As<Int4>(src1.z)); 622 dst.w = As<Float4>(As<Int4>(src0.w) - As<Int4>(src1.w)); 623 } 624 mad(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)625 void ShaderCore::mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 626 { 627 dst.x = src0.x * src1.x + src2.x; 628 dst.y = src0.y * src1.y + src2.y; 629 dst.z = src0.z * src1.z + src2.z; 630 dst.w = src0.w * src1.w + src2.w; 631 } 632 imad(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)633 void ShaderCore::imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 634 { 635 dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x) + As<Int4>(src2.x)); 636 dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y) + As<Int4>(src2.y)); 637 dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z) + As<Int4>(src2.z)); 638 dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w) + As<Int4>(src2.w)); 639 } 640 mul(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)641 void ShaderCore::mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 642 { 643 dst.x = src0.x * src1.x; 644 dst.y = src0.y * src1.y; 645 dst.z = src0.z * src1.z; 646 dst.w = src0.w * src1.w; 647 } 648 imul(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)649 void ShaderCore::imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 650 { 651 dst.x = As<Float4>(As<Int4>(src0.x) * As<Int4>(src1.x)); 652 dst.y = As<Float4>(As<Int4>(src0.y) * As<Int4>(src1.y)); 653 dst.z = As<Float4>(As<Int4>(src0.z) * As<Int4>(src1.z)); 654 dst.w = As<Float4>(As<Int4>(src0.w) * As<Int4>(src1.w)); 655 } 656 rcpx(Vector4f & dst,const Vector4f & src,bool pp)657 void ShaderCore::rcpx(Vector4f &dst, const Vector4f &src, bool pp) 658 { 659 Float4 rcp = reciprocal(src.x, pp, true); 660 661 dst.x = rcp; 662 dst.y = rcp; 663 dst.z = rcp; 664 dst.w = rcp; 665 } 666 div(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)667 void ShaderCore::div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 668 { 669 dst.x = src0.x / src1.x; 670 dst.y = src0.y / src1.y; 671 dst.z = src0.z / src1.z; 672 dst.w = src0.w / src1.w; 673 } 674 idiv(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)675 void ShaderCore::idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 676 { 677 Float4 intMax(As<Float4>(Int4(INT_MAX))); 678 cmp0i(dst.x, src1.x, intMax, src1.x); 679 dst.x = As<Float4>(As<Int4>(src0.x) / As<Int4>(dst.x)); 680 cmp0i(dst.y, src1.y, intMax, src1.y); 681 dst.y = As<Float4>(As<Int4>(src0.y) / As<Int4>(dst.y)); 682 cmp0i(dst.z, src1.z, intMax, src1.z); 683 dst.z = As<Float4>(As<Int4>(src0.z) / As<Int4>(dst.z)); 684 cmp0i(dst.w, src1.w, intMax, src1.w); 685 dst.w = As<Float4>(As<Int4>(src0.w) / As<Int4>(dst.w)); 686 } 687 udiv(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)688 void ShaderCore::udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 689 { 690 Float4 uintMax(As<Float4>(UInt4(UINT_MAX))); 691 cmp0i(dst.x, src1.x, uintMax, src1.x); 692 dst.x = As<Float4>(As<UInt4>(src0.x) / As<UInt4>(dst.x)); 693 cmp0i(dst.y, src1.y, uintMax, src1.y); 694 dst.y = As<Float4>(As<UInt4>(src0.y) / As<UInt4>(dst.y)); 695 cmp0i(dst.z, src1.z, uintMax, src1.z); 696 dst.z = As<Float4>(As<UInt4>(src0.z) / As<UInt4>(dst.z)); 697 cmp0i(dst.w, src1.w, uintMax, src1.w); 698 dst.w = As<Float4>(As<UInt4>(src0.w) / As<UInt4>(dst.w)); 699 } 700 mod(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)701 void ShaderCore::mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 702 { 703 dst.x = modulo(src0.x, src1.x); 704 dst.y = modulo(src0.y, src1.y); 705 dst.z = modulo(src0.z, src1.z); 706 dst.w = modulo(src0.w, src1.w); 707 } 708 imod(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)709 void ShaderCore::imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 710 { 711 cmp0i(dst.x, src1.x, src0.x, src1.x); 712 dst.x = As<Float4>(As<Int4>(src0.x) % As<Int4>(dst.x)); 713 cmp0i(dst.y, src1.y, src0.y, src1.y); 714 dst.y = As<Float4>(As<Int4>(src0.y) % As<Int4>(dst.y)); 715 cmp0i(dst.z, src1.z, src0.z, src1.z); 716 dst.z = As<Float4>(As<Int4>(src0.z) % As<Int4>(dst.z)); 717 cmp0i(dst.w, src1.w, src0.w, src1.w); 718 dst.w = As<Float4>(As<Int4>(src0.w) % As<Int4>(dst.w)); 719 } umod(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)720 void ShaderCore::umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 721 { 722 cmp0i(dst.x, src1.x, src0.x, src1.x); 723 dst.x = As<Float4>(As<UInt4>(src0.x) % As<UInt4>(dst.x)); 724 cmp0i(dst.y, src1.y, src0.y, src1.y); 725 dst.y = As<Float4>(As<UInt4>(src0.y) % As<UInt4>(dst.y)); 726 cmp0i(dst.z, src1.z, src0.z, src1.z); 727 dst.z = As<Float4>(As<UInt4>(src0.z) % As<UInt4>(dst.z)); 728 cmp0i(dst.w, src1.w, src0.w, src1.w); 729 dst.w = As<Float4>(As<UInt4>(src0.w) % As<UInt4>(dst.w)); 730 } 731 shl(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)732 void ShaderCore::shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 733 { 734 dst.x = As<Float4>(As<Int4>(src0.x) << As<Int4>(src1.x)); 735 dst.y = As<Float4>(As<Int4>(src0.y) << As<Int4>(src1.y)); 736 dst.z = As<Float4>(As<Int4>(src0.z) << As<Int4>(src1.z)); 737 dst.w = As<Float4>(As<Int4>(src0.w) << As<Int4>(src1.w)); 738 } 739 ishr(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)740 void ShaderCore::ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 741 { 742 dst.x = As<Float4>(As<Int4>(src0.x) >> As<Int4>(src1.x)); 743 dst.y = As<Float4>(As<Int4>(src0.y) >> As<Int4>(src1.y)); 744 dst.z = As<Float4>(As<Int4>(src0.z) >> As<Int4>(src1.z)); 745 dst.w = As<Float4>(As<Int4>(src0.w) >> As<Int4>(src1.w)); 746 } 747 ushr(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)748 void ShaderCore::ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 749 { 750 dst.x = As<Float4>(As<UInt4>(src0.x) >> As<UInt4>(src1.x)); 751 dst.y = As<Float4>(As<UInt4>(src0.y) >> As<UInt4>(src1.y)); 752 dst.z = As<Float4>(As<UInt4>(src0.z) >> As<UInt4>(src1.z)); 753 dst.w = As<Float4>(As<UInt4>(src0.w) >> As<UInt4>(src1.w)); 754 } 755 rsqx(Vector4f & dst,const Vector4f & src,bool pp)756 void ShaderCore::rsqx(Vector4f &dst, const Vector4f &src, bool pp) 757 { 758 Float4 rsq = reciprocalSquareRoot(src.x, true, pp); 759 760 dst.x = rsq; 761 dst.y = rsq; 762 dst.z = rsq; 763 dst.w = rsq; 764 } 765 sqrt(Vector4f & dst,const Vector4f & src,bool pp)766 void ShaderCore::sqrt(Vector4f &dst, const Vector4f &src, bool pp) 767 { 768 dst.x = Sqrt(src.x); 769 dst.y = Sqrt(src.y); 770 dst.z = Sqrt(src.z); 771 dst.w = Sqrt(src.w); 772 } 773 rsq(Vector4f & dst,const Vector4f & src,bool pp)774 void ShaderCore::rsq(Vector4f &dst, const Vector4f &src, bool pp) 775 { 776 dst.x = reciprocalSquareRoot(src.x, false, pp); 777 dst.y = reciprocalSquareRoot(src.y, false, pp); 778 dst.z = reciprocalSquareRoot(src.z, false, pp); 779 dst.w = reciprocalSquareRoot(src.w, false, pp); 780 } 781 len2(Float4 & dst,const Vector4f & src,bool pp)782 void ShaderCore::len2(Float4 &dst, const Vector4f &src, bool pp) 783 { 784 dst = Sqrt(dot2(src, src)); 785 } 786 len3(Float4 & dst,const Vector4f & src,bool pp)787 void ShaderCore::len3(Float4 &dst, const Vector4f &src, bool pp) 788 { 789 dst = Sqrt(dot3(src, src)); 790 } 791 len4(Float4 & dst,const Vector4f & src,bool pp)792 void ShaderCore::len4(Float4 &dst, const Vector4f &src, bool pp) 793 { 794 dst = Sqrt(dot4(src, src)); 795 } 796 dist1(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)797 void ShaderCore::dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 798 { 799 dst = Abs(src0.x - src1.x); 800 } 801 dist2(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)802 void ShaderCore::dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 803 { 804 Float4 dx = src0.x - src1.x; 805 Float4 dy = src0.y - src1.y; 806 Float4 dot2 = dx * dx + dy * dy; 807 dst = Sqrt(dot2); 808 } 809 dist3(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)810 void ShaderCore::dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 811 { 812 Float4 dx = src0.x - src1.x; 813 Float4 dy = src0.y - src1.y; 814 Float4 dz = src0.z - src1.z; 815 Float4 dot3 = dx * dx + dy * dy + dz * dz; 816 dst = Sqrt(dot3); 817 } 818 dist4(Float4 & dst,const Vector4f & src0,const Vector4f & src1,bool pp)819 void ShaderCore::dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 820 { 821 Float4 dx = src0.x - src1.x; 822 Float4 dy = src0.y - src1.y; 823 Float4 dz = src0.z - src1.z; 824 Float4 dw = src0.w - src1.w; 825 Float4 dot4 = dx * dx + dy * dy + dz * dz + dw * dw; 826 dst = Sqrt(dot4); 827 } 828 dp1(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)829 void ShaderCore::dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 830 { 831 Float4 t = src0.x * src1.x; 832 833 dst.x = t; 834 dst.y = t; 835 dst.z = t; 836 dst.w = t; 837 } 838 dp2(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)839 void ShaderCore::dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 840 { 841 Float4 t = dot2(src0, src1); 842 843 dst.x = t; 844 dst.y = t; 845 dst.z = t; 846 dst.w = t; 847 } 848 dp2add(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)849 void ShaderCore::dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 850 { 851 Float4 t = dot2(src0, src1) + src2.x; 852 853 dst.x = t; 854 dst.y = t; 855 dst.z = t; 856 dst.w = t; 857 } 858 dp3(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)859 void ShaderCore::dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 860 { 861 Float4 dot = dot3(src0, src1); 862 863 dst.x = dot; 864 dst.y = dot; 865 dst.z = dot; 866 dst.w = dot; 867 } 868 dp4(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)869 void ShaderCore::dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 870 { 871 Float4 dot = dot4(src0, src1); 872 873 dst.x = dot; 874 dst.y = dot; 875 dst.z = dot; 876 dst.w = dot; 877 } 878 min(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)879 void ShaderCore::min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 880 { 881 dst.x = Min(src0.x, src1.x); 882 dst.y = Min(src0.y, src1.y); 883 dst.z = Min(src0.z, src1.z); 884 dst.w = Min(src0.w, src1.w); 885 } 886 imin(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)887 void ShaderCore::imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 888 { 889 dst.x = As<Float4>(Min(As<Int4>(src0.x), As<Int4>(src1.x))); 890 dst.y = As<Float4>(Min(As<Int4>(src0.y), As<Int4>(src1.y))); 891 dst.z = As<Float4>(Min(As<Int4>(src0.z), As<Int4>(src1.z))); 892 dst.w = As<Float4>(Min(As<Int4>(src0.w), As<Int4>(src1.w))); 893 } 894 umin(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)895 void ShaderCore::umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 896 { 897 dst.x = As<Float4>(Min(As<UInt4>(src0.x), As<UInt4>(src1.x))); 898 dst.y = As<Float4>(Min(As<UInt4>(src0.y), As<UInt4>(src1.y))); 899 dst.z = As<Float4>(Min(As<UInt4>(src0.z), As<UInt4>(src1.z))); 900 dst.w = As<Float4>(Min(As<UInt4>(src0.w), As<UInt4>(src1.w))); 901 } 902 max(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)903 void ShaderCore::max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 904 { 905 dst.x = Max(src0.x, src1.x); 906 dst.y = Max(src0.y, src1.y); 907 dst.z = Max(src0.z, src1.z); 908 dst.w = Max(src0.w, src1.w); 909 } 910 imax(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)911 void ShaderCore::imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 912 { 913 dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x))); 914 dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y))); 915 dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z))); 916 dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w))); 917 } 918 umax(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)919 void ShaderCore::umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 920 { 921 dst.x = As<Float4>(Max(As<Int4>(src0.x), As<Int4>(src1.x))); 922 dst.y = As<Float4>(Max(As<Int4>(src0.y), As<Int4>(src1.y))); 923 dst.z = As<Float4>(Max(As<Int4>(src0.z), As<Int4>(src1.z))); 924 dst.w = As<Float4>(Max(As<Int4>(src0.w), As<Int4>(src1.w))); 925 } 926 slt(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)927 void ShaderCore::slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 928 { 929 dst.x = As<Float4>(As<Int4>(CmpLT(src0.x, src1.x)) & As<Int4>(Float4(1.0f))); 930 dst.y = As<Float4>(As<Int4>(CmpLT(src0.y, src1.y)) & As<Int4>(Float4(1.0f))); 931 dst.z = As<Float4>(As<Int4>(CmpLT(src0.z, src1.z)) & As<Int4>(Float4(1.0f))); 932 dst.w = As<Float4>(As<Int4>(CmpLT(src0.w, src1.w)) & As<Int4>(Float4(1.0f))); 933 } 934 step(Vector4f & dst,const Vector4f & edge,const Vector4f & x)935 void ShaderCore::step(Vector4f &dst, const Vector4f &edge, const Vector4f &x) 936 { 937 dst.x = As<Float4>(CmpNLT(x.x, edge.x) & As<Int4>(Float4(1.0f))); 938 dst.y = As<Float4>(CmpNLT(x.y, edge.y) & As<Int4>(Float4(1.0f))); 939 dst.z = As<Float4>(CmpNLT(x.z, edge.z) & As<Int4>(Float4(1.0f))); 940 dst.w = As<Float4>(CmpNLT(x.w, edge.w) & As<Int4>(Float4(1.0f))); 941 } 942 exp2x(Vector4f & dst,const Vector4f & src,bool pp)943 void ShaderCore::exp2x(Vector4f &dst, const Vector4f &src, bool pp) 944 { 945 Float4 exp = exponential2(src.x, pp); 946 947 dst.x = exp; 948 dst.y = exp; 949 dst.z = exp; 950 dst.w = exp; 951 } 952 exp2(Vector4f & dst,const Vector4f & src,bool pp)953 void ShaderCore::exp2(Vector4f &dst, const Vector4f &src, bool pp) 954 { 955 dst.x = exponential2(src.x, pp); 956 dst.y = exponential2(src.y, pp); 957 dst.z = exponential2(src.z, pp); 958 dst.w = exponential2(src.w, pp); 959 } 960 exp(Vector4f & dst,const Vector4f & src,bool pp)961 void ShaderCore::exp(Vector4f &dst, const Vector4f &src, bool pp) 962 { 963 dst.x = exponential(src.x, pp); 964 dst.y = exponential(src.y, pp); 965 dst.z = exponential(src.z, pp); 966 dst.w = exponential(src.w, pp); 967 } 968 log2x(Vector4f & dst,const Vector4f & src,bool pp)969 void ShaderCore::log2x(Vector4f &dst, const Vector4f &src, bool pp) 970 { 971 Float4 log = logarithm2(src.x, true, pp); 972 973 dst.x = log; 974 dst.y = log; 975 dst.z = log; 976 dst.w = log; 977 } 978 log2(Vector4f & dst,const Vector4f & src,bool pp)979 void ShaderCore::log2(Vector4f &dst, const Vector4f &src, bool pp) 980 { 981 dst.x = logarithm2(src.x, false, pp); 982 dst.y = logarithm2(src.y, false, pp); 983 dst.z = logarithm2(src.z, false, pp); 984 dst.w = logarithm2(src.w, false, pp); 985 } 986 log(Vector4f & dst,const Vector4f & src,bool pp)987 void ShaderCore::log(Vector4f &dst, const Vector4f &src, bool pp) 988 { 989 dst.x = logarithm(src.x, false, pp); 990 dst.y = logarithm(src.y, false, pp); 991 dst.z = logarithm(src.z, false, pp); 992 dst.w = logarithm(src.w, false, pp); 993 } 994 lit(Vector4f & dst,const Vector4f & src)995 void ShaderCore::lit(Vector4f &dst, const Vector4f &src) 996 { 997 dst.x = Float4(1.0f); 998 dst.y = Max(src.x, Float4(0.0f)); 999 1000 Float4 pow; 1001 1002 pow = src.w; 1003 pow = Min(pow, Float4(127.9961f)); 1004 pow = Max(pow, Float4(-127.9961f)); 1005 1006 dst.z = power(src.y, pow); 1007 dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.x, Float4(0.0f))); 1008 dst.z = As<Float4>(As<Int4>(dst.z) & CmpNLT(src.y, Float4(0.0f))); 1009 1010 dst.w = Float4(1.0f); 1011 } 1012 att(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1013 void ShaderCore::att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1014 { 1015 // Computes attenuation factors (1, d, d^2, 1/d) assuming src0 = d^2, src1 = 1/d 1016 dst.x = 1; 1017 dst.y = src0.y * src1.y; 1018 dst.z = src0.z; 1019 dst.w = src1.w; 1020 } 1021 lrp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1022 void ShaderCore::lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1023 { 1024 dst.x = src0.x * (src1.x - src2.x) + src2.x; 1025 dst.y = src0.y * (src1.y - src2.y) + src2.y; 1026 dst.z = src0.z * (src1.z - src2.z) + src2.z; 1027 dst.w = src0.w * (src1.w - src2.w) + src2.w; 1028 } 1029 smooth(Vector4f & dst,const Vector4f & edge0,const Vector4f & edge1,const Vector4f & x)1030 void ShaderCore::smooth(Vector4f &dst, const Vector4f &edge0, const Vector4f &edge1, const Vector4f &x) 1031 { 1032 Float4 tx = Min(Max((x.x - edge0.x) / (edge1.x - edge0.x), Float4(0.0f)), Float4(1.0f)); dst.x = tx * tx * (Float4(3.0f) - Float4(2.0f) * tx); 1033 Float4 ty = Min(Max((x.y - edge0.y) / (edge1.y - edge0.y), Float4(0.0f)), Float4(1.0f)); dst.y = ty * ty * (Float4(3.0f) - Float4(2.0f) * ty); 1034 Float4 tz = Min(Max((x.z - edge0.z) / (edge1.z - edge0.z), Float4(0.0f)), Float4(1.0f)); dst.z = tz * tz * (Float4(3.0f) - Float4(2.0f) * tz); 1035 Float4 tw = Min(Max((x.w - edge0.w) / (edge1.w - edge0.w), Float4(0.0f)), Float4(1.0f)); dst.w = tw * tw * (Float4(3.0f) - Float4(2.0f) * tw); 1036 } 1037 floatToHalfBits(Float4 & dst,const Float4 & floatBits,bool storeInUpperBits)1038 void ShaderCore::floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits) 1039 { 1040 static const uint32_t mask_sign = 0x80000000u; 1041 static const uint32_t mask_round = ~0xfffu; 1042 static const uint32_t c_f32infty = 255 << 23; 1043 static const uint32_t c_magic = 15 << 23; 1044 static const uint32_t c_nanbit = 0x200; 1045 static const uint32_t c_infty_as_fp16 = 0x7c00; 1046 static const uint32_t c_clamp = (31 << 23) - 0x1000; 1047 1048 UInt4 justsign = UInt4(mask_sign) & As<UInt4>(floatBits); 1049 UInt4 absf = As<UInt4>(floatBits) ^ justsign; 1050 UInt4 b_isnormal = CmpNLE(UInt4(c_f32infty), absf); 1051 1052 // Note: this version doesn't round to the nearest even in case of a tie as defined by IEEE 754-2008, it rounds to +inf 1053 // instead of nearest even, since that's fine for GLSL ES 3.0's needs (see section 2.1.1 Floating-Point Computation) 1054 UInt4 joined = ((((As<UInt4>(Min(As<Float4>(absf & UInt4(mask_round)) * As<Float4>(UInt4(c_magic)), 1055 As<Float4>(UInt4(c_clamp))))) - UInt4(mask_round)) >> 13) & b_isnormal) | 1056 ((b_isnormal ^ UInt4(0xFFFFFFFF)) & ((CmpNLE(absf, UInt4(c_f32infty)) & UInt4(c_nanbit)) | 1057 UInt4(c_infty_as_fp16))); 1058 1059 dst = As<Float4>(storeInUpperBits ? As<UInt4>(dst) | ((joined << 16) | justsign) : joined | (justsign >> 16)); 1060 } 1061 halfToFloatBits(Float4 & dst,const Float4 & halfBits)1062 void ShaderCore::halfToFloatBits(Float4& dst, const Float4& halfBits) 1063 { 1064 static const uint32_t mask_nosign = 0x7FFF; 1065 static const uint32_t magic = (254 - 15) << 23; 1066 static const uint32_t was_infnan = 0x7BFF; 1067 static const uint32_t exp_infnan = 255 << 23; 1068 1069 UInt4 expmant = As<UInt4>(halfBits) & UInt4(mask_nosign); 1070 dst = As<Float4>(As<UInt4>(As<Float4>(expmant << 13) * As<Float4>(UInt4(magic))) | 1071 ((As<UInt4>(halfBits) ^ UInt4(expmant)) << 16) | 1072 (CmpNLE(As<UInt4>(expmant), UInt4(was_infnan)) & UInt4(exp_infnan))); 1073 } 1074 packHalf2x16(Vector4f & d,const Vector4f & s0)1075 void ShaderCore::packHalf2x16(Vector4f &d, const Vector4f &s0) 1076 { 1077 // half2 | half1 1078 floatToHalfBits(d.x, s0.x, false); 1079 floatToHalfBits(d.x, s0.y, true); 1080 } 1081 unpackHalf2x16(Vector4f & dst,const Vector4f & s0)1082 void ShaderCore::unpackHalf2x16(Vector4f &dst, const Vector4f &s0) 1083 { 1084 // half2 | half1 1085 halfToFloatBits(dst.x, As<Float4>(As<UInt4>(s0.x) & UInt4(0x0000FFFF))); 1086 halfToFloatBits(dst.y, As<Float4>((As<UInt4>(s0.x) & UInt4(0xFFFF0000)) >> 16)); 1087 } 1088 packSnorm2x16(Vector4f & d,const Vector4f & s0)1089 void ShaderCore::packSnorm2x16(Vector4f &d, const Vector4f &s0) 1090 { 1091 // round(clamp(c, -1.0, 1.0) * 32767.0) 1092 d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) | 1093 ((Int4(Round(Min(Max(s0.y, Float4(-1.0f)), Float4(1.0f)) * Float4(32767.0f))) & Int4(0xFFFF)) << 16)); 1094 } 1095 packUnorm2x16(Vector4f & d,const Vector4f & s0)1096 void ShaderCore::packUnorm2x16(Vector4f &d, const Vector4f &s0) 1097 { 1098 // round(clamp(c, 0.0, 1.0) * 65535.0) 1099 d.x = As<Float4>((Int4(Round(Min(Max(s0.x, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) | 1100 ((Int4(Round(Min(Max(s0.y, Float4(0.0f)), Float4(1.0f)) * Float4(65535.0f))) & Int4(0xFFFF)) << 16)); 1101 } 1102 unpackSnorm2x16(Vector4f & dst,const Vector4f & s0)1103 void ShaderCore::unpackSnorm2x16(Vector4f &dst, const Vector4f &s0) 1104 { 1105 // clamp(f / 32727.0, -1.0, 1.0) 1106 dst.x = Min(Max(Float4(As<Int4>((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16)) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f)); 1107 dst.y = Min(Max(Float4(As<Int4>(As<UInt4>(s0.x) & UInt4(0xFFFF0000))) * Float4(1.0f / float(0x7FFF0000)), Float4(-1.0f)), Float4(1.0f)); 1108 } 1109 unpackUnorm2x16(Vector4f & dst,const Vector4f & s0)1110 void ShaderCore::unpackUnorm2x16(Vector4f &dst, const Vector4f &s0) 1111 { 1112 // f / 65535.0 1113 dst.x = Float4((As<UInt4>(s0.x) & UInt4(0x0000FFFF)) << 16) * Float4(1.0f / float(0xFFFF0000)); 1114 dst.y = Float4(As<UInt4>(s0.x) & UInt4(0xFFFF0000)) * Float4(1.0f / float(0xFFFF0000)); 1115 } 1116 det2(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1117 void ShaderCore::det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1118 { 1119 dst.x = src0.x * src1.y - src0.y * src1.x; 1120 dst.y = dst.z = dst.w = dst.x; 1121 } 1122 det3(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1123 void ShaderCore::det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1124 { 1125 crs(dst, src1, src2); 1126 dp3(dst, dst, src0); 1127 } 1128 det4(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2,const Vector4f & src3)1129 void ShaderCore::det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3) 1130 { 1131 dst.x = src2.z * src3.w - src2.w * src3.z; 1132 dst.y = src1.w * src3.z - src1.z * src3.w; 1133 dst.z = src1.z * src2.w - src1.w * src2.z; 1134 dst.x = src0.x * (src1.y * dst.x + src2.y * dst.y + src3.y * dst.z) - 1135 src0.y * (src1.x * dst.x + src2.x * dst.y + src3.x * dst.z) + 1136 src0.z * (src1.x * (src2.y * src3.w - src2.w * src3.y) + 1137 src2.x * (src1.w * src3.y - src1.y * src3.w) + 1138 src3.x * (src1.y * src2.w - src1.w * src2.y)) + 1139 src0.w * (src1.x * (src2.z * src3.y - src2.y * src3.z) + 1140 src2.x * (src1.y * src3.z - src1.z * src3.y) + 1141 src3.x * (src1.z * src2.y - src1.y * src2.z)); 1142 dst.y = dst.z = dst.w = dst.x; 1143 } 1144 frc(Vector4f & dst,const Vector4f & src)1145 void ShaderCore::frc(Vector4f &dst, const Vector4f &src) 1146 { 1147 dst.x = Frac(src.x); 1148 dst.y = Frac(src.y); 1149 dst.z = Frac(src.z); 1150 dst.w = Frac(src.w); 1151 } 1152 trunc(Vector4f & dst,const Vector4f & src)1153 void ShaderCore::trunc(Vector4f &dst, const Vector4f &src) 1154 { 1155 dst.x = Trunc(src.x); 1156 dst.y = Trunc(src.y); 1157 dst.z = Trunc(src.z); 1158 dst.w = Trunc(src.w); 1159 } 1160 floor(Vector4f & dst,const Vector4f & src)1161 void ShaderCore::floor(Vector4f &dst, const Vector4f &src) 1162 { 1163 dst.x = Floor(src.x); 1164 dst.y = Floor(src.y); 1165 dst.z = Floor(src.z); 1166 dst.w = Floor(src.w); 1167 } 1168 round(Vector4f & dst,const Vector4f & src)1169 void ShaderCore::round(Vector4f &dst, const Vector4f &src) 1170 { 1171 dst.x = Round(src.x); 1172 dst.y = Round(src.y); 1173 dst.z = Round(src.z); 1174 dst.w = Round(src.w); 1175 } 1176 roundEven(Vector4f & dst,const Vector4f & src)1177 void ShaderCore::roundEven(Vector4f &dst, const Vector4f &src) 1178 { 1179 // dst = round(src) + ((round(src) < src) * 2 - 1) * (fract(src) == 0.5) * isOdd(round(src)); 1180 // ex.: 1.5: 2 + (0 * 2 - 1) * 1 * 0 = 2 1181 // 2.5: 3 + (0 * 2 - 1) * 1 * 1 = 2 1182 // -1.5: -2 + (1 * 2 - 1) * 1 * 0 = -2 1183 // -2.5: -3 + (1 * 2 - 1) * 1 * 1 = -2 1184 // Even if the round implementation rounds the other way: 1185 // 1.5: 1 + (1 * 2 - 1) * 1 * 1 = 2 1186 // 2.5: 2 + (1 * 2 - 1) * 1 * 0 = 2 1187 // -1.5: -1 + (0 * 2 - 1) * 1 * 1 = -2 1188 // -2.5: -2 + (0 * 2 - 1) * 1 * 0 = -2 1189 round(dst, src); 1190 dst.x += ((Float4(CmpLT(dst.x, src.x) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.x), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.x) & Int4(1)); 1191 dst.y += ((Float4(CmpLT(dst.y, src.y) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.y), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.y) & Int4(1)); 1192 dst.z += ((Float4(CmpLT(dst.z, src.z) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.z), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.z) & Int4(1)); 1193 dst.w += ((Float4(CmpLT(dst.w, src.w) & Int4(1)) * Float4(2.0f)) - Float4(1.0f)) * Float4(CmpEQ(Frac(src.w), Float4(0.5f)) & Int4(1)) * Float4(Int4(dst.w) & Int4(1)); 1194 } 1195 ceil(Vector4f & dst,const Vector4f & src)1196 void ShaderCore::ceil(Vector4f &dst, const Vector4f &src) 1197 { 1198 dst.x = Ceil(src.x); 1199 dst.y = Ceil(src.y); 1200 dst.z = Ceil(src.z); 1201 dst.w = Ceil(src.w); 1202 } 1203 powx(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,bool pp)1204 void ShaderCore::powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 1205 { 1206 Float4 pow = power(src0.x, src1.x, pp); 1207 1208 dst.x = pow; 1209 dst.y = pow; 1210 dst.z = pow; 1211 dst.w = pow; 1212 } 1213 pow(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,bool pp)1214 void ShaderCore::pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 1215 { 1216 dst.x = power(src0.x, src1.x, pp); 1217 dst.y = power(src0.y, src1.y, pp); 1218 dst.z = power(src0.z, src1.z, pp); 1219 dst.w = power(src0.w, src1.w, pp); 1220 } 1221 crs(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1222 void ShaderCore::crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1223 { 1224 dst.x = src0.y * src1.z - src0.z * src1.y; 1225 dst.y = src0.z * src1.x - src0.x * src1.z; 1226 dst.z = src0.x * src1.y - src0.y * src1.x; 1227 } 1228 forward1(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1229 void ShaderCore::forward1(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1230 { 1231 Int4 flip = CmpNLT(Nref.x * I.x, Float4(0.0f)) & Int4(0x80000000); 1232 1233 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1234 } 1235 forward2(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1236 void ShaderCore::forward2(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1237 { 1238 Int4 flip = CmpNLT(dot2(Nref, I), Float4(0.0f)) & Int4(0x80000000); 1239 1240 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1241 dst.y = As<Float4>(flip ^ As<Int4>(N.y)); 1242 } 1243 forward3(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1244 void ShaderCore::forward3(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1245 { 1246 Int4 flip = CmpNLT(dot3(Nref, I), Float4(0.0f)) & Int4(0x80000000); 1247 1248 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1249 dst.y = As<Float4>(flip ^ As<Int4>(N.y)); 1250 dst.z = As<Float4>(flip ^ As<Int4>(N.z)); 1251 } 1252 forward4(Vector4f & dst,const Vector4f & N,const Vector4f & I,const Vector4f & Nref)1253 void ShaderCore::forward4(Vector4f &dst, const Vector4f &N, const Vector4f &I, const Vector4f &Nref) 1254 { 1255 Int4 flip = CmpNLT(dot4(Nref, I), Float4(0.0f)) & Int4(0x80000000); 1256 1257 dst.x = As<Float4>(flip ^ As<Int4>(N.x)); 1258 dst.y = As<Float4>(flip ^ As<Int4>(N.y)); 1259 dst.z = As<Float4>(flip ^ As<Int4>(N.z)); 1260 dst.w = As<Float4>(flip ^ As<Int4>(N.w)); 1261 } 1262 reflect1(Vector4f & dst,const Vector4f & I,const Vector4f & N)1263 void ShaderCore::reflect1(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1264 { 1265 Float4 d = N.x * I.x; 1266 1267 dst.x = I.x - Float4(2.0f) * d * N.x; 1268 } 1269 reflect2(Vector4f & dst,const Vector4f & I,const Vector4f & N)1270 void ShaderCore::reflect2(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1271 { 1272 Float4 d = dot2(N, I); 1273 1274 dst.x = I.x - Float4(2.0f) * d * N.x; 1275 dst.y = I.y - Float4(2.0f) * d * N.y; 1276 } 1277 reflect3(Vector4f & dst,const Vector4f & I,const Vector4f & N)1278 void ShaderCore::reflect3(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1279 { 1280 Float4 d = dot3(N, I); 1281 1282 dst.x = I.x - Float4(2.0f) * d * N.x; 1283 dst.y = I.y - Float4(2.0f) * d * N.y; 1284 dst.z = I.z - Float4(2.0f) * d * N.z; 1285 } 1286 reflect4(Vector4f & dst,const Vector4f & I,const Vector4f & N)1287 void ShaderCore::reflect4(Vector4f &dst, const Vector4f &I, const Vector4f &N) 1288 { 1289 Float4 d = dot4(N, I); 1290 1291 dst.x = I.x - Float4(2.0f) * d * N.x; 1292 dst.y = I.y - Float4(2.0f) * d * N.y; 1293 dst.z = I.z - Float4(2.0f) * d * N.z; 1294 dst.w = I.w - Float4(2.0f) * d * N.w; 1295 } 1296 refract1(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1297 void ShaderCore::refract1(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1298 { 1299 Float4 d = N.x * I.x; 1300 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1301 Int4 pos = CmpNLT(k, Float4(0.0f)); 1302 Float4 t = (eta * d + Sqrt(k)); 1303 1304 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1305 } 1306 refract2(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1307 void ShaderCore::refract2(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1308 { 1309 Float4 d = dot2(N, I); 1310 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1311 Int4 pos = CmpNLT(k, Float4(0.0f)); 1312 Float4 t = (eta * d + Sqrt(k)); 1313 1314 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1315 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); 1316 } 1317 refract3(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1318 void ShaderCore::refract3(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1319 { 1320 Float4 d = dot3(N, I); 1321 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1322 Int4 pos = CmpNLT(k, Float4(0.0f)); 1323 Float4 t = (eta * d + Sqrt(k)); 1324 1325 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1326 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); 1327 dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z)); 1328 } 1329 refract4(Vector4f & dst,const Vector4f & I,const Vector4f & N,const Float4 & eta)1330 void ShaderCore::refract4(Vector4f &dst, const Vector4f &I, const Vector4f &N, const Float4 &eta) 1331 { 1332 Float4 d = dot4(N, I); 1333 Float4 k = Float4(1.0f) - eta * eta * (Float4(1.0f) - d * d); 1334 Int4 pos = CmpNLT(k, Float4(0.0f)); 1335 Float4 t = (eta * d + Sqrt(k)); 1336 1337 dst.x = As<Float4>(pos & As<Int4>(eta * I.x - t * N.x)); 1338 dst.y = As<Float4>(pos & As<Int4>(eta * I.y - t * N.y)); 1339 dst.z = As<Float4>(pos & As<Int4>(eta * I.z - t * N.z)); 1340 dst.w = As<Float4>(pos & As<Int4>(eta * I.w - t * N.w)); 1341 } 1342 sgn(Vector4f & dst,const Vector4f & src)1343 void ShaderCore::sgn(Vector4f &dst, const Vector4f &src) 1344 { 1345 sgn(dst.x, src.x); 1346 sgn(dst.y, src.y); 1347 sgn(dst.z, src.z); 1348 sgn(dst.w, src.w); 1349 } 1350 isgn(Vector4f & dst,const Vector4f & src)1351 void ShaderCore::isgn(Vector4f &dst, const Vector4f &src) 1352 { 1353 isgn(dst.x, src.x); 1354 isgn(dst.y, src.y); 1355 isgn(dst.z, src.z); 1356 isgn(dst.w, src.w); 1357 } 1358 abs(Vector4f & dst,const Vector4f & src)1359 void ShaderCore::abs(Vector4f &dst, const Vector4f &src) 1360 { 1361 dst.x = Abs(src.x); 1362 dst.y = Abs(src.y); 1363 dst.z = Abs(src.z); 1364 dst.w = Abs(src.w); 1365 } 1366 iabs(Vector4f & dst,const Vector4f & src)1367 void ShaderCore::iabs(Vector4f &dst, const Vector4f &src) 1368 { 1369 dst.x = As<Float4>(Abs(As<Int4>(src.x))); 1370 dst.y = As<Float4>(Abs(As<Int4>(src.y))); 1371 dst.z = As<Float4>(Abs(As<Int4>(src.z))); 1372 dst.w = As<Float4>(Abs(As<Int4>(src.w))); 1373 } 1374 nrm2(Vector4f & dst,const Vector4f & src,bool pp)1375 void ShaderCore::nrm2(Vector4f &dst, const Vector4f &src, bool pp) 1376 { 1377 Float4 dot = dot2(src, src); 1378 Float4 rsq = reciprocalSquareRoot(dot, false, pp); 1379 1380 dst.x = src.x * rsq; 1381 dst.y = src.y * rsq; 1382 dst.z = src.z * rsq; 1383 dst.w = src.w * rsq; 1384 } 1385 nrm3(Vector4f & dst,const Vector4f & src,bool pp)1386 void ShaderCore::nrm3(Vector4f &dst, const Vector4f &src, bool pp) 1387 { 1388 Float4 dot = dot3(src, src); 1389 Float4 rsq = reciprocalSquareRoot(dot, false, pp); 1390 1391 dst.x = src.x * rsq; 1392 dst.y = src.y * rsq; 1393 dst.z = src.z * rsq; 1394 dst.w = src.w * rsq; 1395 } 1396 nrm4(Vector4f & dst,const Vector4f & src,bool pp)1397 void ShaderCore::nrm4(Vector4f &dst, const Vector4f &src, bool pp) 1398 { 1399 Float4 dot = dot4(src, src); 1400 Float4 rsq = reciprocalSquareRoot(dot, false, pp); 1401 1402 dst.x = src.x * rsq; 1403 dst.y = src.y * rsq; 1404 dst.z = src.z * rsq; 1405 dst.w = src.w * rsq; 1406 } 1407 sincos(Vector4f & dst,const Vector4f & src,bool pp)1408 void ShaderCore::sincos(Vector4f &dst, const Vector4f &src, bool pp) 1409 { 1410 dst.x = cosine_pi(src.x, pp); 1411 dst.y = sine_pi(src.x, pp); 1412 } 1413 cos(Vector4f & dst,const Vector4f & src,bool pp)1414 void ShaderCore::cos(Vector4f &dst, const Vector4f &src, bool pp) 1415 { 1416 dst.x = cosine(src.x, pp); 1417 dst.y = cosine(src.y, pp); 1418 dst.z = cosine(src.z, pp); 1419 dst.w = cosine(src.w, pp); 1420 } 1421 sin(Vector4f & dst,const Vector4f & src,bool pp)1422 void ShaderCore::sin(Vector4f &dst, const Vector4f &src, bool pp) 1423 { 1424 dst.x = sine(src.x, pp); 1425 dst.y = sine(src.y, pp); 1426 dst.z = sine(src.z, pp); 1427 dst.w = sine(src.w, pp); 1428 } 1429 tan(Vector4f & dst,const Vector4f & src,bool pp)1430 void ShaderCore::tan(Vector4f &dst, const Vector4f &src, bool pp) 1431 { 1432 dst.x = tangent(src.x, pp); 1433 dst.y = tangent(src.y, pp); 1434 dst.z = tangent(src.z, pp); 1435 dst.w = tangent(src.w, pp); 1436 } 1437 acos(Vector4f & dst,const Vector4f & src,bool pp)1438 void ShaderCore::acos(Vector4f &dst, const Vector4f &src, bool pp) 1439 { 1440 dst.x = arccos(src.x, pp); 1441 dst.y = arccos(src.y, pp); 1442 dst.z = arccos(src.z, pp); 1443 dst.w = arccos(src.w, pp); 1444 } 1445 asin(Vector4f & dst,const Vector4f & src,bool pp)1446 void ShaderCore::asin(Vector4f &dst, const Vector4f &src, bool pp) 1447 { 1448 dst.x = arcsin(src.x, pp); 1449 dst.y = arcsin(src.y, pp); 1450 dst.z = arcsin(src.z, pp); 1451 dst.w = arcsin(src.w, pp); 1452 } 1453 atan(Vector4f & dst,const Vector4f & src,bool pp)1454 void ShaderCore::atan(Vector4f &dst, const Vector4f &src, bool pp) 1455 { 1456 dst.x = arctan(src.x, pp); 1457 dst.y = arctan(src.y, pp); 1458 dst.z = arctan(src.z, pp); 1459 dst.w = arctan(src.w, pp); 1460 } 1461 atan2(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,bool pp)1462 void ShaderCore::atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp) 1463 { 1464 dst.x = arctan(src0.x, src1.x, pp); 1465 dst.y = arctan(src0.y, src1.y, pp); 1466 dst.z = arctan(src0.z, src1.z, pp); 1467 dst.w = arctan(src0.w, src1.w, pp); 1468 } 1469 cosh(Vector4f & dst,const Vector4f & src,bool pp)1470 void ShaderCore::cosh(Vector4f &dst, const Vector4f &src, bool pp) 1471 { 1472 dst.x = cosineh(src.x, pp); 1473 dst.y = cosineh(src.y, pp); 1474 dst.z = cosineh(src.z, pp); 1475 dst.w = cosineh(src.w, pp); 1476 } 1477 sinh(Vector4f & dst,const Vector4f & src,bool pp)1478 void ShaderCore::sinh(Vector4f &dst, const Vector4f &src, bool pp) 1479 { 1480 dst.x = sineh(src.x, pp); 1481 dst.y = sineh(src.y, pp); 1482 dst.z = sineh(src.z, pp); 1483 dst.w = sineh(src.w, pp); 1484 } 1485 tanh(Vector4f & dst,const Vector4f & src,bool pp)1486 void ShaderCore::tanh(Vector4f &dst, const Vector4f &src, bool pp) 1487 { 1488 dst.x = tangenth(src.x, pp); 1489 dst.y = tangenth(src.y, pp); 1490 dst.z = tangenth(src.z, pp); 1491 dst.w = tangenth(src.w, pp); 1492 } 1493 acosh(Vector4f & dst,const Vector4f & src,bool pp)1494 void ShaderCore::acosh(Vector4f &dst, const Vector4f &src, bool pp) 1495 { 1496 dst.x = arccosh(src.x, pp); 1497 dst.y = arccosh(src.y, pp); 1498 dst.z = arccosh(src.z, pp); 1499 dst.w = arccosh(src.w, pp); 1500 } 1501 asinh(Vector4f & dst,const Vector4f & src,bool pp)1502 void ShaderCore::asinh(Vector4f &dst, const Vector4f &src, bool pp) 1503 { 1504 dst.x = arcsinh(src.x, pp); 1505 dst.y = arcsinh(src.y, pp); 1506 dst.z = arcsinh(src.z, pp); 1507 dst.w = arcsinh(src.w, pp); 1508 } 1509 atanh(Vector4f & dst,const Vector4f & src,bool pp)1510 void ShaderCore::atanh(Vector4f &dst, const Vector4f &src, bool pp) 1511 { 1512 dst.x = arctanh(src.x, pp); 1513 dst.y = arctanh(src.y, pp); 1514 dst.z = arctanh(src.z, pp); 1515 dst.w = arctanh(src.w, pp); 1516 } 1517 expp(Vector4f & dst,const Vector4f & src,unsigned short version)1518 void ShaderCore::expp(Vector4f &dst, const Vector4f &src, unsigned short version) 1519 { 1520 if(version < 0x0200) 1521 { 1522 Float4 frc = Frac(src.x); 1523 Float4 floor = src.x - frc; 1524 1525 dst.x = exponential2(floor, true); 1526 dst.y = frc; 1527 dst.z = exponential2(src.x, true); 1528 dst.w = Float4(1.0f); 1529 } 1530 else // Version >= 2.0 1531 { 1532 exp2x(dst, src, true); // FIXME: 10-bit precision suffices 1533 } 1534 } 1535 logp(Vector4f & dst,const Vector4f & src,unsigned short version)1536 void ShaderCore::logp(Vector4f &dst, const Vector4f &src, unsigned short version) 1537 { 1538 if(version < 0x0200) 1539 { 1540 Float4 tmp0; 1541 Float4 tmp1; 1542 Float4 t; 1543 Int4 r; 1544 1545 tmp0 = Abs(src.x); 1546 tmp1 = tmp0; 1547 1548 // X component 1549 r = As<Int4>(As<UInt4>(tmp0) >> 23) - Int4(127); 1550 dst.x = Float4(r); 1551 1552 // Y component 1553 dst.y = As<Float4>((As<Int4>(tmp1) & Int4(0x007FFFFF)) | As<Int4>(Float4(1.0f))); 1554 1555 // Z component 1556 dst.z = logarithm2(src.x, true, true); 1557 1558 // W component 1559 dst.w = 1.0f; 1560 } 1561 else 1562 { 1563 log2x(dst, src, true); 1564 } 1565 } 1566 cmp0(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1567 void ShaderCore::cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1568 { 1569 cmp0(dst.x, src0.x, src1.x, src2.x); 1570 cmp0(dst.y, src0.y, src1.y, src2.y); 1571 cmp0(dst.z, src0.z, src1.z, src2.z); 1572 cmp0(dst.w, src0.w, src1.w, src2.w); 1573 } 1574 select(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,const Vector4f & src2)1575 void ShaderCore::select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2) 1576 { 1577 select(dst.x, As<Int4>(src0.x), src1.x, src2.x); 1578 select(dst.y, As<Int4>(src0.y), src1.y, src2.y); 1579 select(dst.z, As<Int4>(src0.z), src1.z, src2.z); 1580 select(dst.w, As<Int4>(src0.w), src1.w, src2.w); 1581 } 1582 extract(Float4 & dst,const Vector4f & src0,const Float4 & src1)1583 void ShaderCore::extract(Float4 &dst, const Vector4f &src0, const Float4 &src1) 1584 { 1585 select(dst, CmpEQ(As<Int4>(src1), Int4(1)), src0.y, src0.x); 1586 select(dst, CmpEQ(As<Int4>(src1), Int4(2)), src0.z, dst); 1587 select(dst, CmpEQ(As<Int4>(src1), Int4(3)), src0.w, dst); 1588 } 1589 insert(Vector4f & dst,const Vector4f & src,const Float4 & element,const Float4 & index)1590 void ShaderCore::insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index) 1591 { 1592 select(dst.x, CmpEQ(As<Int4>(index), Int4(0)), element, src.x); 1593 select(dst.y, CmpEQ(As<Int4>(index), Int4(1)), element, src.y); 1594 select(dst.z, CmpEQ(As<Int4>(index), Int4(2)), element, src.z); 1595 select(dst.w, CmpEQ(As<Int4>(index), Int4(3)), element, src.w); 1596 } 1597 sgn(Float4 & dst,const Float4 & src)1598 void ShaderCore::sgn(Float4 &dst, const Float4 &src) 1599 { 1600 Int4 neg = As<Int4>(CmpLT(src, Float4(-0.0f))) & As<Int4>(Float4(-1.0f)); 1601 Int4 pos = As<Int4>(CmpNLE(src, Float4(+0.0f))) & As<Int4>(Float4(1.0f)); 1602 dst = As<Float4>(neg | pos); 1603 } 1604 isgn(Float4 & dst,const Float4 & src)1605 void ShaderCore::isgn(Float4 &dst, const Float4 &src) 1606 { 1607 Int4 neg = CmpLT(As<Int4>(src), Int4(0)) & Int4(-1); 1608 Int4 pos = CmpNLE(As<Int4>(src), Int4(0)) & Int4(1); 1609 dst = As<Float4>(neg | pos); 1610 } 1611 cmp0(Float4 & dst,const Float4 & src0,const Float4 & src1,const Float4 & src2)1612 void ShaderCore::cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2) 1613 { 1614 Int4 pos = CmpLE(Float4(0.0f), src0); 1615 select(dst, pos, src1, src2); 1616 } 1617 cmp0i(Float4 & dst,const Float4 & src0,const Float4 & src1,const Float4 & src2)1618 void ShaderCore::cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2) 1619 { 1620 Int4 pos = CmpEQ(Int4(0), As<Int4>(src0)); 1621 select(dst, pos, src1, src2); 1622 } 1623 select(Float4 & dst,RValue<Int4> src0,const Float4 & src1,const Float4 & src2)1624 void ShaderCore::select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2) 1625 { 1626 // FIXME: LLVM vector select 1627 dst = As<Float4>((src0 & As<Int4>(src1)) | (~src0 & As<Int4>(src2))); 1628 } 1629 cmp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,Control control)1630 void ShaderCore::cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control) 1631 { 1632 switch(control) 1633 { 1634 case Shader::CONTROL_GT: 1635 dst.x = As<Float4>(CmpNLE(src0.x, src1.x)); 1636 dst.y = As<Float4>(CmpNLE(src0.y, src1.y)); 1637 dst.z = As<Float4>(CmpNLE(src0.z, src1.z)); 1638 dst.w = As<Float4>(CmpNLE(src0.w, src1.w)); 1639 break; 1640 case Shader::CONTROL_EQ: 1641 dst.x = As<Float4>(CmpEQ(src0.x, src1.x)); 1642 dst.y = As<Float4>(CmpEQ(src0.y, src1.y)); 1643 dst.z = As<Float4>(CmpEQ(src0.z, src1.z)); 1644 dst.w = As<Float4>(CmpEQ(src0.w, src1.w)); 1645 break; 1646 case Shader::CONTROL_GE: 1647 dst.x = As<Float4>(CmpNLT(src0.x, src1.x)); 1648 dst.y = As<Float4>(CmpNLT(src0.y, src1.y)); 1649 dst.z = As<Float4>(CmpNLT(src0.z, src1.z)); 1650 dst.w = As<Float4>(CmpNLT(src0.w, src1.w)); 1651 break; 1652 case Shader::CONTROL_LT: 1653 dst.x = As<Float4>(CmpLT(src0.x, src1.x)); 1654 dst.y = As<Float4>(CmpLT(src0.y, src1.y)); 1655 dst.z = As<Float4>(CmpLT(src0.z, src1.z)); 1656 dst.w = As<Float4>(CmpLT(src0.w, src1.w)); 1657 break; 1658 case Shader::CONTROL_NE: 1659 dst.x = As<Float4>(CmpNEQ(src0.x, src1.x)); 1660 dst.y = As<Float4>(CmpNEQ(src0.y, src1.y)); 1661 dst.z = As<Float4>(CmpNEQ(src0.z, src1.z)); 1662 dst.w = As<Float4>(CmpNEQ(src0.w, src1.w)); 1663 break; 1664 case Shader::CONTROL_LE: 1665 dst.x = As<Float4>(CmpLE(src0.x, src1.x)); 1666 dst.y = As<Float4>(CmpLE(src0.y, src1.y)); 1667 dst.z = As<Float4>(CmpLE(src0.z, src1.z)); 1668 dst.w = As<Float4>(CmpLE(src0.w, src1.w)); 1669 break; 1670 default: 1671 ASSERT(false); 1672 } 1673 } 1674 icmp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,Control control)1675 void ShaderCore::icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control) 1676 { 1677 switch(control) 1678 { 1679 case Shader::CONTROL_GT: 1680 dst.x = As<Float4>(CmpNLE(As<Int4>(src0.x), As<Int4>(src1.x))); 1681 dst.y = As<Float4>(CmpNLE(As<Int4>(src0.y), As<Int4>(src1.y))); 1682 dst.z = As<Float4>(CmpNLE(As<Int4>(src0.z), As<Int4>(src1.z))); 1683 dst.w = As<Float4>(CmpNLE(As<Int4>(src0.w), As<Int4>(src1.w))); 1684 break; 1685 case Shader::CONTROL_EQ: 1686 dst.x = As<Float4>(CmpEQ(As<Int4>(src0.x), As<Int4>(src1.x))); 1687 dst.y = As<Float4>(CmpEQ(As<Int4>(src0.y), As<Int4>(src1.y))); 1688 dst.z = As<Float4>(CmpEQ(As<Int4>(src0.z), As<Int4>(src1.z))); 1689 dst.w = As<Float4>(CmpEQ(As<Int4>(src0.w), As<Int4>(src1.w))); 1690 break; 1691 case Shader::CONTROL_GE: 1692 dst.x = As<Float4>(CmpNLT(As<Int4>(src0.x), As<Int4>(src1.x))); 1693 dst.y = As<Float4>(CmpNLT(As<Int4>(src0.y), As<Int4>(src1.y))); 1694 dst.z = As<Float4>(CmpNLT(As<Int4>(src0.z), As<Int4>(src1.z))); 1695 dst.w = As<Float4>(CmpNLT(As<Int4>(src0.w), As<Int4>(src1.w))); 1696 break; 1697 case Shader::CONTROL_LT: 1698 dst.x = As<Float4>(CmpLT(As<Int4>(src0.x), As<Int4>(src1.x))); 1699 dst.y = As<Float4>(CmpLT(As<Int4>(src0.y), As<Int4>(src1.y))); 1700 dst.z = As<Float4>(CmpLT(As<Int4>(src0.z), As<Int4>(src1.z))); 1701 dst.w = As<Float4>(CmpLT(As<Int4>(src0.w), As<Int4>(src1.w))); 1702 break; 1703 case Shader::CONTROL_NE: 1704 dst.x = As<Float4>(CmpNEQ(As<Int4>(src0.x), As<Int4>(src1.x))); 1705 dst.y = As<Float4>(CmpNEQ(As<Int4>(src0.y), As<Int4>(src1.y))); 1706 dst.z = As<Float4>(CmpNEQ(As<Int4>(src0.z), As<Int4>(src1.z))); 1707 dst.w = As<Float4>(CmpNEQ(As<Int4>(src0.w), As<Int4>(src1.w))); 1708 break; 1709 case Shader::CONTROL_LE: 1710 dst.x = As<Float4>(CmpLE(As<Int4>(src0.x), As<Int4>(src1.x))); 1711 dst.y = As<Float4>(CmpLE(As<Int4>(src0.y), As<Int4>(src1.y))); 1712 dst.z = As<Float4>(CmpLE(As<Int4>(src0.z), As<Int4>(src1.z))); 1713 dst.w = As<Float4>(CmpLE(As<Int4>(src0.w), As<Int4>(src1.w))); 1714 break; 1715 default: 1716 ASSERT(false); 1717 } 1718 } 1719 ucmp(Vector4f & dst,const Vector4f & src0,const Vector4f & src1,Control control)1720 void ShaderCore::ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control) 1721 { 1722 switch(control) 1723 { 1724 case Shader::CONTROL_GT: 1725 dst.x = As<Float4>(CmpNLE(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1726 dst.y = As<Float4>(CmpNLE(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1727 dst.z = As<Float4>(CmpNLE(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1728 dst.w = As<Float4>(CmpNLE(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1729 break; 1730 case Shader::CONTROL_EQ: 1731 dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1732 dst.y = As<Float4>(CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1733 dst.z = As<Float4>(CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1734 dst.w = As<Float4>(CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1735 break; 1736 case Shader::CONTROL_GE: 1737 dst.x = As<Float4>(CmpNLT(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1738 dst.y = As<Float4>(CmpNLT(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1739 dst.z = As<Float4>(CmpNLT(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1740 dst.w = As<Float4>(CmpNLT(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1741 break; 1742 case Shader::CONTROL_LT: 1743 dst.x = As<Float4>(CmpLT(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1744 dst.y = As<Float4>(CmpLT(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1745 dst.z = As<Float4>(CmpLT(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1746 dst.w = As<Float4>(CmpLT(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1747 break; 1748 case Shader::CONTROL_NE: 1749 dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1750 dst.y = As<Float4>(CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1751 dst.z = As<Float4>(CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1752 dst.w = As<Float4>(CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1753 break; 1754 case Shader::CONTROL_LE: 1755 dst.x = As<Float4>(CmpLE(As<UInt4>(src0.x), As<UInt4>(src1.x))); 1756 dst.y = As<Float4>(CmpLE(As<UInt4>(src0.y), As<UInt4>(src1.y))); 1757 dst.z = As<Float4>(CmpLE(As<UInt4>(src0.z), As<UInt4>(src1.z))); 1758 dst.w = As<Float4>(CmpLE(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1759 break; 1760 default: 1761 ASSERT(false); 1762 } 1763 } 1764 all(Float4 & dst,const Vector4f & src)1765 void ShaderCore::all(Float4 &dst, const Vector4f &src) 1766 { 1767 dst = As<Float4>(As<Int4>(src.x) & As<Int4>(src.y) & As<Int4>(src.z) & As<Int4>(src.w)); 1768 } 1769 any(Float4 & dst,const Vector4f & src)1770 void ShaderCore::any(Float4 &dst, const Vector4f &src) 1771 { 1772 dst = As<Float4>(As<Int4>(src.x) | As<Int4>(src.y) | As<Int4>(src.z) | As<Int4>(src.w)); 1773 } 1774 bitwise_not(Vector4f & dst,const Vector4f & src)1775 void ShaderCore::bitwise_not(Vector4f &dst, const Vector4f &src) 1776 { 1777 dst.x = As<Float4>(As<Int4>(src.x) ^ Int4(0xFFFFFFFF)); 1778 dst.y = As<Float4>(As<Int4>(src.y) ^ Int4(0xFFFFFFFF)); 1779 dst.z = As<Float4>(As<Int4>(src.z) ^ Int4(0xFFFFFFFF)); 1780 dst.w = As<Float4>(As<Int4>(src.w) ^ Int4(0xFFFFFFFF)); 1781 } 1782 bitwise_or(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1783 void ShaderCore::bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1784 { 1785 dst.x = As<Float4>(As<Int4>(src0.x) | As<Int4>(src1.x)); 1786 dst.y = As<Float4>(As<Int4>(src0.y) | As<Int4>(src1.y)); 1787 dst.z = As<Float4>(As<Int4>(src0.z) | As<Int4>(src1.z)); 1788 dst.w = As<Float4>(As<Int4>(src0.w) | As<Int4>(src1.w)); 1789 } 1790 bitwise_xor(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1791 void ShaderCore::bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1792 { 1793 dst.x = As<Float4>(As<Int4>(src0.x) ^ As<Int4>(src1.x)); 1794 dst.y = As<Float4>(As<Int4>(src0.y) ^ As<Int4>(src1.y)); 1795 dst.z = As<Float4>(As<Int4>(src0.z) ^ As<Int4>(src1.z)); 1796 dst.w = As<Float4>(As<Int4>(src0.w) ^ As<Int4>(src1.w)); 1797 } 1798 bitwise_and(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1799 void ShaderCore::bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1800 { 1801 dst.x = As<Float4>(As<Int4>(src0.x) & As<Int4>(src1.x)); 1802 dst.y = As<Float4>(As<Int4>(src0.y) & As<Int4>(src1.y)); 1803 dst.z = As<Float4>(As<Int4>(src0.z) & As<Int4>(src1.z)); 1804 dst.w = As<Float4>(As<Int4>(src0.w) & As<Int4>(src1.w)); 1805 } 1806 equal(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1807 void ShaderCore::equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1808 { 1809 dst.x = As<Float4>(CmpEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) & 1810 CmpEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) & 1811 CmpEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) & 1812 CmpEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1813 dst.y = dst.x; 1814 dst.z = dst.x; 1815 dst.w = dst.x; 1816 } 1817 notEqual(Vector4f & dst,const Vector4f & src0,const Vector4f & src1)1818 void ShaderCore::notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1) 1819 { 1820 dst.x = As<Float4>(CmpNEQ(As<UInt4>(src0.x), As<UInt4>(src1.x)) | 1821 CmpNEQ(As<UInt4>(src0.y), As<UInt4>(src1.y)) | 1822 CmpNEQ(As<UInt4>(src0.z), As<UInt4>(src1.z)) | 1823 CmpNEQ(As<UInt4>(src0.w), As<UInt4>(src1.w))); 1824 dst.y = dst.x; 1825 dst.z = dst.x; 1826 dst.w = dst.x; 1827 } 1828 } 1829