1/// @ref gtx_simd_vec4 2/// @file glm/gtx/simd_vec4.inl 3 4namespace glm{ 5namespace detail{ 6 7////////////////////////////////////// 8// Implicit basic constructors 9 10#if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT) 11 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD() 12# ifdef GLM_FORCE_NO_CTOR_INIT 13 : Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)) 14# endif 15 {} 16#endif//!GLM_HAS_DEFAULTED_FUNCTIONS 17 18#if !GLM_HAS_DEFAULTED_FUNCTIONS 19 GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) : 20 Data(v.Data) 21 {} 22#endif//!GLM_HAS_DEFAULTED_FUNCTIONS 23 24GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) : 25 Data(Data) 26{} 27 28GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec4 const & v) : 29 Data(_mm_set_ps(v.w, v.z, v.y, v.x)) 30{} 31 32////////////////////////////////////// 33// Explicit basic constructors 34 35GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) : 36 Data(_mm_set1_ps(s)) 37{} 38 39GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) : 40// Data(_mm_setr_ps(x, y, z, w)) 41 Data(_mm_set_ps(w, z, y, x)) 42{} 43/* 44GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) : 45 Data(_mm_load_ps(v)) 46{} 47*/ 48////////////////////////////////////// 49// Swizzle constructors 50 51//fvec4SIMD(ref4<float> const & r); 52 53////////////////////////////////////// 54// Conversion vector constructors 55 56GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) : 57 Data(_mm_set_ps(s2, s1, v.y, v.x)) 58{} 59 60GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) : 61 Data(_mm_set_ps(s2, v.y, v.x, s1)) 62{} 63 64GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) : 65 Data(_mm_set_ps(v.y, v.x, s2, s1)) 66{} 67 68GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) : 69 Data(_mm_set_ps(s, v.z, v.y, v.x)) 70{} 71 72GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) : 73 Data(_mm_set_ps(v.z, v.y, v.x, s)) 74{} 75 76GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) : 77 Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x)) 78{} 79 80//GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) : 81// Data(_mm_cvtepi32_ps(v.Data)) 82//{} 83 84////////////////////////////////////// 85// Unary arithmetic operators 86 87#if !GLM_HAS_DEFAULTED_FUNCTIONS 88 GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v) 89 { 90 this->Data = v.Data; 91 return *this; 92 } 93#endif//!GLM_HAS_DEFAULTED_FUNCTIONS 94 95GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s) 96{ 97 this->Data = _mm_add_ps(Data, _mm_set_ps1(s)); 98 return *this; 99} 100 101GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v) 102{ 103 this->Data = _mm_add_ps(this->Data , v.Data); 104 return *this; 105} 106 107GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s) 108{ 109 this->Data = _mm_sub_ps(Data, _mm_set_ps1(s)); 110 return *this; 111} 112 113GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v) 114{ 115 this->Data = _mm_sub_ps(this->Data , v.Data); 116 return *this; 117} 118 119GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s) 120{ 121 this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); 122 return *this; 123} 124 125GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v) 126{ 127 this->Data = _mm_mul_ps(this->Data , v.Data); 128 return *this; 129} 130 131GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s) 132{ 133 this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); 134 return *this; 135} 136 137GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v) 138{ 139 this->Data = _mm_div_ps(this->Data , v.Data); 140 return *this; 141} 142 143GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++() 144{ 145 this->Data = _mm_add_ps(this->Data , glm::detail::one); 146 return *this; 147} 148 149GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--() 150{ 151 this->Data = _mm_sub_ps(this->Data, glm::detail::one); 152 return *this; 153} 154 155////////////////////////////////////// 156// Swizzle operators 157 158template <comp X_, comp Y_, comp Z_, comp W_> 159GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const 160{ 161 __m128 Data = _mm_shuffle_ps( 162 this->Data, this->Data, 163 shuffle_mask<(W_ << 6) | (Z_ << 4) | (Y_ << 2) | (X_ << 0)>::value); 164 return fvec4SIMD(Data); 165} 166 167template <comp X_, comp Y_, comp Z_, comp W_> 168GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle() 169{ 170 this->Data = _mm_shuffle_ps( 171 this->Data, this->Data, 172 shuffle_mask<(W_ << 6) | (Z_ << 4) | (Y_ << 2) | (X_ << 0)>::value); 173 return *this; 174} 175 176// operator+ 177GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s) 178{ 179 return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s))); 180} 181 182GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v) 183{ 184 return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data)); 185} 186 187GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2) 188{ 189 return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data)); 190} 191 192//operator- 193GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s) 194{ 195 return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s))); 196} 197 198GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v) 199{ 200 return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data)); 201} 202 203GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2) 204{ 205 return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data)); 206} 207 208//operator* 209GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s) 210{ 211 __m128 par0 = v.Data; 212 __m128 par1 = _mm_set1_ps(s); 213 return fvec4SIMD(_mm_mul_ps(par0, par1)); 214} 215 216GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v) 217{ 218 __m128 par0 = _mm_set1_ps(s); 219 __m128 par1 = v.Data; 220 return fvec4SIMD(_mm_mul_ps(par0, par1)); 221} 222 223GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2) 224{ 225 return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data)); 226} 227 228//operator/ 229GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s) 230{ 231 __m128 par0 = v.Data; 232 __m128 par1 = _mm_set1_ps(s); 233 return fvec4SIMD(_mm_div_ps(par0, par1)); 234} 235 236GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v) 237{ 238 __m128 par0 = _mm_set1_ps(s); 239 __m128 par1 = v.Data; 240 return fvec4SIMD(_mm_div_ps(par0, par1)); 241} 242 243GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2) 244{ 245 return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data)); 246} 247 248// Unary constant operators 249GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v) 250{ 251 return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data)); 252} 253 254GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int) 255{ 256 return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one)); 257} 258 259GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int) 260{ 261 return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one)); 262} 263 264}//namespace detail 265 266GLM_FUNC_QUALIFIER vec4 vec4_cast 267( 268 detail::fvec4SIMD const & x 269) 270{ 271 GLM_ALIGN(16) vec4 Result; 272 _mm_store_ps(&Result[0], x.Data); 273 return Result; 274} 275 276// Other possible implementation 277//float abs(float a) 278//{ 279// return max(-a, a); 280//} 281GLM_FUNC_QUALIFIER detail::fvec4SIMD abs 282( 283 detail::fvec4SIMD const & x 284) 285{ 286 return detail::sse_abs_ps(x.Data); 287} 288 289GLM_FUNC_QUALIFIER detail::fvec4SIMD sign 290( 291 detail::fvec4SIMD const & x 292) 293{ 294 return detail::sse_sgn_ps(x.Data); 295} 296 297GLM_FUNC_QUALIFIER detail::fvec4SIMD floor 298( 299 detail::fvec4SIMD const & x 300) 301{ 302 return detail::sse_flr_ps(x.Data); 303} 304 305GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc 306( 307 detail::fvec4SIMD const & x 308) 309{ 310 //return x < 0 ? -floor(-x) : floor(x); 311 312 __m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data)); 313 __m128 Sub0 = _mm_sub_ps(Flr0, x.Data); 314 __m128 Flr1 = detail::sse_flr_ps(x.Data); 315 316 __m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero); 317 __m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero); 318 319 __m128 And0 = _mm_and_ps(Sub0, Cmp0); 320 __m128 And1 = _mm_and_ps(Flr1, Cmp1); 321 322 return _mm_or_ps(And0, And1); 323} 324 325GLM_FUNC_QUALIFIER detail::fvec4SIMD round 326( 327 detail::fvec4SIMD const & x 328) 329{ 330 return detail::sse_rnd_ps(x.Data); 331} 332 333//GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven 334//( 335// detail::fvec4SIMD const & x 336//) 337//{ 338 339//} 340 341GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil 342( 343 detail::fvec4SIMD const & x 344) 345{ 346 return detail::sse_ceil_ps(x.Data); 347} 348 349GLM_FUNC_QUALIFIER detail::fvec4SIMD fract 350( 351 detail::fvec4SIMD const & x 352) 353{ 354 return detail::sse_frc_ps(x.Data); 355} 356 357GLM_FUNC_QUALIFIER detail::fvec4SIMD mod 358( 359 detail::fvec4SIMD const & x, 360 detail::fvec4SIMD const & y 361) 362{ 363 return detail::sse_mod_ps(x.Data, y.Data); 364} 365 366GLM_FUNC_QUALIFIER detail::fvec4SIMD mod 367( 368 detail::fvec4SIMD const & x, 369 float const & y 370) 371{ 372 return detail::sse_mod_ps(x.Data, _mm_set1_ps(y)); 373} 374 375//GLM_FUNC_QUALIFIER detail::fvec4SIMD modf 376//( 377// detail::fvec4SIMD const & x, 378// detail::fvec4SIMD & i 379//) 380//{ 381 382//} 383 384GLM_FUNC_QUALIFIER detail::fvec4SIMD min 385( 386 detail::fvec4SIMD const & x, 387 detail::fvec4SIMD const & y 388) 389{ 390 return _mm_min_ps(x.Data, y.Data); 391} 392 393GLM_FUNC_QUALIFIER detail::fvec4SIMD min 394( 395 detail::fvec4SIMD const & x, 396 float const & y 397) 398{ 399 return _mm_min_ps(x.Data, _mm_set1_ps(y)); 400} 401 402GLM_FUNC_QUALIFIER detail::fvec4SIMD max 403( 404 detail::fvec4SIMD const & x, 405 detail::fvec4SIMD const & y 406) 407{ 408 return _mm_max_ps(x.Data, y.Data); 409} 410 411GLM_FUNC_QUALIFIER detail::fvec4SIMD max 412( 413 detail::fvec4SIMD const & x, 414 float const & y 415) 416{ 417 return _mm_max_ps(x.Data, _mm_set1_ps(y)); 418} 419 420GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp 421( 422 detail::fvec4SIMD const & x, 423 detail::fvec4SIMD const & minVal, 424 detail::fvec4SIMD const & maxVal 425) 426{ 427 return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data); 428} 429 430GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp 431( 432 detail::fvec4SIMD const & x, 433 float const & minVal, 434 float const & maxVal 435) 436{ 437 return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal)); 438} 439 440GLM_FUNC_QUALIFIER detail::fvec4SIMD mix 441( 442 detail::fvec4SIMD const & x, 443 detail::fvec4SIMD const & y, 444 detail::fvec4SIMD const & a 445) 446{ 447 __m128 Sub0 = _mm_sub_ps(y.Data, x.Data); 448 __m128 Mul0 = _mm_mul_ps(a.Data, Sub0); 449 return _mm_add_ps(x.Data, Mul0); 450} 451 452GLM_FUNC_QUALIFIER detail::fvec4SIMD step 453( 454 detail::fvec4SIMD const & edge, 455 detail::fvec4SIMD const & x 456) 457{ 458 __m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data); 459 return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); 460} 461 462GLM_FUNC_QUALIFIER detail::fvec4SIMD step 463( 464 float const & edge, 465 detail::fvec4SIMD const & x 466) 467{ 468 __m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge)); 469 return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); 470} 471 472GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep 473( 474 detail::fvec4SIMD const & edge0, 475 detail::fvec4SIMD const & edge1, 476 detail::fvec4SIMD const & x 477) 478{ 479 return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data); 480} 481 482GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep 483( 484 float const & edge0, 485 float const & edge1, 486 detail::fvec4SIMD const & x 487) 488{ 489 return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data); 490} 491 492//GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x) 493//{ 494 495//} 496 497//GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x) 498//{ 499 500//} 501 502//GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt 503//( 504// detail::fvec4SIMD const & value 505//) 506//{ 507 508//} 509 510//GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat 511//( 512// detail::ivec4SIMD const & value 513//) 514//{ 515 516//} 517 518GLM_FUNC_QUALIFIER detail::fvec4SIMD fma 519( 520 detail::fvec4SIMD const & a, 521 detail::fvec4SIMD const & b, 522 detail::fvec4SIMD const & c 523) 524{ 525 return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); 526} 527 528GLM_FUNC_QUALIFIER float length 529( 530 detail::fvec4SIMD const & x 531) 532{ 533 detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); 534 detail::fvec4SIMD sqt0 = sqrt(dot0); 535 float Result = 0; 536 _mm_store_ss(&Result, sqt0.Data); 537 return Result; 538} 539 540GLM_FUNC_QUALIFIER float fastLength 541( 542 detail::fvec4SIMD const & x 543) 544{ 545 detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); 546 detail::fvec4SIMD sqt0 = fastSqrt(dot0); 547 float Result = 0; 548 _mm_store_ss(&Result, sqt0.Data); 549 return Result; 550} 551 552GLM_FUNC_QUALIFIER float niceLength 553( 554 detail::fvec4SIMD const & x 555) 556{ 557 detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); 558 detail::fvec4SIMD sqt0 = niceSqrt(dot0); 559 float Result = 0; 560 _mm_store_ss(&Result, sqt0.Data); 561 return Result; 562} 563 564GLM_FUNC_QUALIFIER detail::fvec4SIMD length4 565( 566 detail::fvec4SIMD const & x 567) 568{ 569 return sqrt(dot4(x, x)); 570} 571 572GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4 573( 574 detail::fvec4SIMD const & x 575) 576{ 577 return fastSqrt(dot4(x, x)); 578} 579 580GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4 581( 582 detail::fvec4SIMD const & x 583) 584{ 585 return niceSqrt(dot4(x, x)); 586} 587 588GLM_FUNC_QUALIFIER float distance 589( 590 detail::fvec4SIMD const & p0, 591 detail::fvec4SIMD const & p1 592) 593{ 594 float Result = 0; 595 _mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data)); 596 return Result; 597} 598 599GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4 600( 601 detail::fvec4SIMD const & p0, 602 detail::fvec4SIMD const & p1 603) 604{ 605 return detail::sse_dst_ps(p0.Data, p1.Data); 606} 607 608GLM_FUNC_QUALIFIER float dot 609( 610 detail::fvec4SIMD const & x, 611 detail::fvec4SIMD const & y 612) 613{ 614 float Result = 0; 615 _mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data)); 616 return Result; 617} 618 619GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4 620( 621 detail::fvec4SIMD const & x, 622 detail::fvec4SIMD const & y 623) 624{ 625 return detail::sse_dot_ps(x.Data, y.Data); 626} 627 628GLM_FUNC_QUALIFIER detail::fvec4SIMD cross 629( 630 detail::fvec4SIMD const & x, 631 detail::fvec4SIMD const & y 632) 633{ 634 return detail::sse_xpd_ps(x.Data, y.Data); 635} 636 637GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize 638( 639 detail::fvec4SIMD const & x 640) 641{ 642 __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); 643 __m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data; 644 __m128 mul0 = _mm_mul_ps(x.Data, isr0); 645 return mul0; 646} 647 648GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize 649( 650 detail::fvec4SIMD const & x 651) 652{ 653 __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); 654 __m128 isr0 = fastInversesqrt(dot0).Data; 655 __m128 mul0 = _mm_mul_ps(x.Data, isr0); 656 return mul0; 657} 658 659GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward 660( 661 detail::fvec4SIMD const & N, 662 detail::fvec4SIMD const & I, 663 detail::fvec4SIMD const & Nref 664) 665{ 666 return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); 667} 668 669GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect 670( 671 detail::fvec4SIMD const & I, 672 detail::fvec4SIMD const & N 673) 674{ 675 return detail::sse_rfe_ps(I.Data, N.Data); 676} 677 678GLM_FUNC_QUALIFIER detail::fvec4SIMD refract 679( 680 detail::fvec4SIMD const & I, 681 detail::fvec4SIMD const & N, 682 float const & eta 683) 684{ 685 return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); 686} 687 688GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x) 689{ 690 return _mm_mul_ps(inversesqrt(x).Data, x.Data); 691} 692 693GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x) 694{ 695 return _mm_sqrt_ps(x.Data); 696} 697 698GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x) 699{ 700 return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data); 701} 702 703// SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration 704// By Elan Ruskin, http://assemblyrequired.crashworks.org/ 705GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x) 706{ 707 GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load 708 GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; 709 710 __m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode 711 __m128 halfrecip = _mm_mul_ps(half, recip); 712 __m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip))); 713 return _mm_mul_ps(halfrecip, threeminus_xrr); 714} 715 716GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x) 717{ 718 return _mm_rsqrt_ps(x.Data); 719} 720 721}//namespace glm 722