1/////////////////////////////////////////////////////////////////////////////////////////////////// 2// OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net) 3/////////////////////////////////////////////////////////////////////////////////////////////////// 4// Created : 2013-04-22 5// Updated : 2013-04-22 6// Licence : This source is under MIT License 7// File : glm/gtx/simd_quat.inl 8/////////////////////////////////////////////////////////////////////////////////////////////////// 9 10 11namespace glm{ 12namespace detail{ 13 14 15////////////////////////////////////// 16// Debugging 17#if 0 18void print(__m128 v) 19{ 20 GLM_ALIGN(16) float result[4]; 21 _mm_store_ps(result, v); 22 23 printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]); 24} 25 26void print(const fvec4SIMD &v) 27{ 28 printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w); 29} 30#endif 31 32 33////////////////////////////////////// 34// Implicit basic constructors 35 36GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD() 37#ifdef GLM_SIMD_ENABLE_DEFAULT_INIT 38 : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f)) 39#endif 40{} 41 42GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) : 43 Data(Data) 44{} 45 46GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) : 47 Data(q.Data) 48{} 49 50 51////////////////////////////////////// 52// Explicit basic constructors 53 54GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) : 55 Data(_mm_set_ps(w, z, y, x)) 56{} 57 58GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) : 59 Data(_mm_set_ps(q.w, q.z, q.y, q.x)) 60{} 61 62GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles) 63{ 64 vec3 c = glm::cos(eulerAngles * 0.5f); 65 vec3 s = glm::sin(eulerAngles * 0.5f); 66 67 Data = _mm_set_ps( 68 (c.x * c.y * c.z) + (s.x * s.y * s.z), 69 (c.x * c.y * s.z) - (s.x * s.y * c.z), 70 (c.x * s.y * c.z) + (s.x * c.y * s.z), 71 (s.x * c.y * c.z) - (c.x * s.y * s.z)); 72} 73 74 75////////////////////////////////////// 76// Unary arithmetic operators 77 78GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q) 79{ 80 this->Data = q.Data; 81 return *this; 82} 83 84GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s) 85{ 86 this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); 87 return *this; 88} 89 90GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s) 91{ 92 this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); 93 return *this; 94} 95 96 97 98// negate operator 99GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q) 100{ 101 return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f))); 102} 103 104// operator+ 105GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2) 106{ 107 return fquatSIMD(_mm_add_ps(q1.Data, q2.Data)); 108} 109 110//operator* 111GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2) 112{ 113 // SSE2 STATS: 114 // 11 shuffle 115 // 8 mul 116 // 8 add 117 118 // SSE4 STATS: 119 // 3 shuffle 120 // 4 mul 121 // 4 dpps 122 123 __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3))); 124 __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2))); 125 __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1))); 126 __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data); 127 128# if((GLM_ARCH & GLM_ARCH_SSE4)) 129 __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff); 130 __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff); 131 __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff); 132 __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff); 133# else 134 mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f)); 135 __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0)); 136 add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1)); 137 138 mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f)); 139 __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1)); 140 add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1)); 141 142 mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f)); 143 __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2)); 144 add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1)); 145 146 mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)); 147 __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3)); 148 add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1)); 149#endif 150 151 152 // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than 153 // the final code below. I'll keep this here for reference - maybe somebody else can do something better... 154 // 155 //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0)); 156 //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0)); 157 // 158 //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0)); 159 160 float x; 161 float y; 162 float z; 163 float w; 164 165 _mm_store_ss(&x, add0); 166 _mm_store_ss(&y, add1); 167 _mm_store_ss(&z, add2); 168 _mm_store_ss(&w, add3); 169 170 return detail::fquatSIMD(w, x, y, z); 171} 172 173GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v) 174{ 175 static const __m128 two = _mm_set1_ps(2.0f); 176 177 __m128 q_wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); 178 __m128 q_swp0 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); 179 __m128 q_swp1 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); 180 __m128 v_swp0 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1)); 181 __m128 v_swp1 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2)); 182 183 __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); 184 __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); 185 __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); 186 __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); 187 188 189 uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); 190 uuv = _mm_mul_ps(uuv, two); 191 192 return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv)); 193} 194 195GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q) 196{ 197 return glm::inverse(q) * v; 198} 199 200GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s) 201{ 202 return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s))); 203} 204 205GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q) 206{ 207 return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data)); 208} 209 210 211//operator/ 212GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s) 213{ 214 return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s))); 215} 216 217 218}//namespace detail 219 220 221GLM_FUNC_QUALIFIER quat quat_cast 222( 223 detail::fquatSIMD const & x 224) 225{ 226 GLM_ALIGN(16) quat Result; 227 _mm_store_ps(&Result[0], x.Data); 228 229 return Result; 230} 231 232template <typename T> 233GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[]) 234{ 235 T trace = m0[0] + m1[1] + m2[2] + T(1.0); 236 if (trace > T(0)) 237 { 238 T s = static_cast<T>(0.5) / sqrt(trace); 239 240 return _mm_set_ps( 241 static_cast<float>(T(0.25) / s), 242 static_cast<float>((m0[1] - m1[0]) * s), 243 static_cast<float>((m2[0] - m0[2]) * s), 244 static_cast<float>((m1[2] - m2[1]) * s)); 245 } 246 else 247 { 248 if (m0[0] > m1[1]) 249 { 250 if (m0[0] > m2[2]) 251 { 252 // X is biggest. 253 T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5); 254 255 return _mm_set_ps( 256 static_cast<float>((m1[2] - m2[1]) * s), 257 static_cast<float>((m2[0] + m0[2]) * s), 258 static_cast<float>((m0[1] + m1[0]) * s), 259 static_cast<float>(T(0.5) * s)); 260 } 261 } 262 else 263 { 264 if (m1[1] > m2[2]) 265 { 266 // Y is biggest. 267 T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5); 268 269 return _mm_set_ps( 270 static_cast<float>((m2[0] - m0[2]) * s), 271 static_cast<float>((m1[2] + m2[1]) * s), 272 static_cast<float>(T(0.5) * s), 273 static_cast<float>((m0[1] + m1[0]) * s)); 274 } 275 } 276 277 // Z is biggest. 278 T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5); 279 280 return _mm_set_ps( 281 static_cast<float>((m0[1] - m1[0]) * s), 282 static_cast<float>(T(0.5) * s), 283 static_cast<float>((m1[2] + m2[1]) * s), 284 static_cast<float>((m2[0] + m0[2]) * s)); 285 } 286} 287 288GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 289( 290 detail::fmat4x4SIMD const & m 291) 292{ 293 // Scalar implementation for now. 294 GLM_ALIGN(16) float m0[4]; 295 GLM_ALIGN(16) float m1[4]; 296 GLM_ALIGN(16) float m2[4]; 297 298 _mm_store_ps(m0, m[0].Data); 299 _mm_store_ps(m1, m[1].Data); 300 _mm_store_ps(m2, m[2].Data); 301 302 return quatSIMD_cast_impl(m0, m1, m2); 303} 304 305template <typename T, precision P> 306GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 307( 308 detail::tmat4x4<T, P> const & m 309) 310{ 311 return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); 312} 313 314template <typename T, precision P> 315GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 316( 317 detail::tmat3x3<T, P> const & m 318) 319{ 320 return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); 321} 322 323 324GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast 325( 326 detail::fquatSIMD const & q 327) 328{ 329 detail::fmat4x4SIMD result; 330 331 __m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); 332 __m128 _xyzw = q.Data; 333 __m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); 334 __m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); 335 336 __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw); 337 __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2)); 338 __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1)); 339 340 __m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw)); 341 _tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw)); 342 343 __m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw); 344 _tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww)); 345 346 __m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw); 347 _tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww)); 348 349 350 // There's probably a better, more politically correct way of doing this... 351 result[0].Data = _mm_set_ps( 352 0.0f, 353 reinterpret_cast<float*>(&_tmp2)[0], 354 reinterpret_cast<float*>(&_tmp1)[0], 355 reinterpret_cast<float*>(&_tmp0)[0]); 356 357 result[1].Data = _mm_set_ps( 358 0.0f, 359 reinterpret_cast<float*>(&_tmp1)[1], 360 reinterpret_cast<float*>(&_tmp0)[1], 361 reinterpret_cast<float*>(&_tmp2)[1]); 362 363 result[2].Data = _mm_set_ps( 364 0.0f, 365 reinterpret_cast<float*>(&_tmp0)[2], 366 reinterpret_cast<float*>(&_tmp2)[2], 367 reinterpret_cast<float*>(&_tmp1)[2]); 368 369 result[3].Data = _mm_set_ps( 370 1.0f, 371 0.0f, 372 0.0f, 373 0.0f); 374 375 376 return result; 377} 378 379GLM_FUNC_QUALIFIER mat4 mat4_cast 380( 381 detail::fquatSIMD const & q 382) 383{ 384 return mat4_cast(mat4SIMD_cast(q)); 385} 386 387 388 389GLM_FUNC_QUALIFIER float length 390( 391 detail::fquatSIMD const & q 392) 393{ 394 return glm::sqrt(dot(q, q)); 395} 396 397GLM_FUNC_QUALIFIER detail::fquatSIMD normalize 398( 399 detail::fquatSIMD const & q 400) 401{ 402 return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q))); 403} 404 405GLM_FUNC_QUALIFIER float dot 406( 407 detail::fquatSIMD const & q1, 408 detail::fquatSIMD const & q2 409) 410{ 411 float result; 412 _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data)); 413 414 return result; 415} 416 417GLM_FUNC_QUALIFIER detail::fquatSIMD mix 418( 419 detail::fquatSIMD const & x, 420 detail::fquatSIMD const & y, 421 float const & a 422) 423{ 424 float cosTheta = dot(x, y); 425 426 if (cosTheta > 1.0f - glm::epsilon<float>()) 427 { 428 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 429 } 430 else 431 { 432 float angle = glm::acos(cosTheta); 433 434 435 float s0 = glm::sin((1.0f - a) * angle); 436 float s1 = glm::sin(a * angle); 437 float d = 1.0f / glm::sin(angle); 438 439 return (s0 * x + s1 * y) * d; 440 } 441} 442 443GLM_FUNC_QUALIFIER detail::fquatSIMD lerp 444( 445 detail::fquatSIMD const & x, 446 detail::fquatSIMD const & y, 447 float const & a 448) 449{ 450 // Lerp is only defined in [0, 1] 451 assert(a >= 0.0f); 452 assert(a <= 1.0f); 453 454 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 455} 456 457GLM_FUNC_QUALIFIER detail::fquatSIMD slerp 458( 459 detail::fquatSIMD const & x, 460 detail::fquatSIMD const & y, 461 float const & a 462) 463{ 464 detail::fquatSIMD z = y; 465 466 float cosTheta = dot(x, y); 467 468 // If cosTheta < 0, the interpolation will take the long way around the sphere. 469 // To fix this, one quat must be negated. 470 if (cosTheta < 0.0f) 471 { 472 z = -y; 473 cosTheta = -cosTheta; 474 } 475 476 // Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator 477 if(cosTheta > 1.0f - epsilon<float>()) 478 { 479 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 480 } 481 else 482 { 483 float angle = glm::acos(cosTheta); 484 485 486 float s0 = glm::sin((1.0f - a) * angle); 487 float s1 = glm::sin(a * angle); 488 float d = 1.0f / glm::sin(angle); 489 490 return (s0 * x + s1 * y) * d; 491 } 492} 493 494 495GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix 496( 497 detail::fquatSIMD const & x, 498 detail::fquatSIMD const & y, 499 float const & a 500) 501{ 502 float cosTheta = dot(x, y); 503 504 if (cosTheta > 1.0f - glm::epsilon<float>()) 505 { 506 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 507 } 508 else 509 { 510 float angle = glm::fastAcos(cosTheta); 511 512 513 __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); 514 515 __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); 516 __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); 517 __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); 518 519 return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); 520 } 521} 522 523GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp 524( 525 detail::fquatSIMD const & x, 526 detail::fquatSIMD const & y, 527 float const & a 528) 529{ 530 detail::fquatSIMD z = y; 531 532 float cosTheta = dot(x, y); 533 if (cosTheta < 0.0f) 534 { 535 z = -y; 536 cosTheta = -cosTheta; 537 } 538 539 540 if(cosTheta > 1.0f - epsilon<float>()) 541 { 542 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 543 } 544 else 545 { 546 float angle = glm::fastAcos(cosTheta); 547 548 549 __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); 550 551 __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); 552 __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); 553 __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); 554 555 return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); 556 } 557} 558 559 560 561GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate 562( 563 detail::fquatSIMD const & q 564) 565{ 566 return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f))); 567} 568 569GLM_FUNC_QUALIFIER detail::fquatSIMD inverse 570( 571 detail::fquatSIMD const & q 572) 573{ 574 return conjugate(q) / dot(q, q); 575} 576 577 578GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD 579( 580 float const & angle, 581 vec3 const & v 582) 583{ 584#ifdef GLM_FORCE_RADIANS 585 float a(angle); 586#else 587# pragma message("GLM: rotateZ function taking degrees as parameters is deprecated. #define GLM_FORCE_RADIANS before including GLM headers to remove this message.") 588 float a(glm::radians(angle)); 589#endif 590 float s = glm::sin(a * 0.5f); 591 592 return _mm_set_ps( 593 glm::cos(a * 0.5f), 594 v.z * s, 595 v.y * s, 596 v.x * s); 597} 598 599GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD 600( 601 float const & angle, 602 float const & x, 603 float const & y, 604 float const & z 605) 606{ 607 return angleAxisSIMD(angle, vec3(x, y, z)); 608} 609 610 611GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x) 612{ 613 static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f); 614 static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f); 615 static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f); 616 617 __m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x)); 618 __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x)); 619 __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x)); 620 621 __m128 y0 = _mm_mul_ps(x3, c0); 622 __m128 y1 = _mm_mul_ps(x5, c1); 623 __m128 y2 = _mm_mul_ps(x7, c2); 624 625 return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2); 626} 627 628 629}//namespace glm 630