1/// @ref gtx_simd_quat 2/// @file glm/gtx/simd_quat.inl 3 4namespace glm{ 5namespace detail{ 6 7 8////////////////////////////////////// 9// Debugging 10#if 0 11void print(__m128 v) 12{ 13 GLM_ALIGN(16) float result[4]; 14 _mm_store_ps(result, v); 15 16 printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]); 17} 18 19void print(const fvec4SIMD &v) 20{ 21 printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w); 22} 23#endif 24 25////////////////////////////////////// 26// Implicit basic constructors 27 28# if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT) 29 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD() 30# ifdef GLM_FORCE_NO_CTOR_INIT 31 : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f)) 32# endif 33 {} 34# endif 35 36# if !GLM_HAS_DEFAULTED_FUNCTIONS 37 GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) : 38 Data(q.Data) 39 {} 40# endif//!GLM_HAS_DEFAULTED_FUNCTIONS 41 42GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) : 43 Data(Data) 44{} 45 46////////////////////////////////////// 47// Explicit basic constructors 48 49GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) : 50 Data(_mm_set_ps(w, z, y, x)) 51{} 52 53GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) : 54 Data(_mm_set_ps(q.w, q.z, q.y, q.x)) 55{} 56 57GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles) 58{ 59 vec3 c = glm::cos(eulerAngles * 0.5f); 60 vec3 s = glm::sin(eulerAngles * 0.5f); 61 62 Data = _mm_set_ps( 63 (c.x * c.y * c.z) + (s.x * s.y * s.z), 64 (c.x * c.y * s.z) - (s.x * s.y * c.z), 65 (c.x * s.y * c.z) + (s.x * c.y * s.z), 66 (s.x * c.y * c.z) - (c.x * s.y * s.z)); 67} 68 69 70////////////////////////////////////// 71// Unary arithmetic operators 72 73#if !GLM_HAS_DEFAULTED_FUNCTIONS 74 GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q) 75 { 76 this->Data = q.Data; 77 return *this; 78 } 79#endif//!GLM_HAS_DEFAULTED_FUNCTIONS 80 81GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s) 82{ 83 this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); 84 return *this; 85} 86 87GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s) 88{ 89 this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); 90 return *this; 91} 92 93 94 95// negate operator 96GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q) 97{ 98 return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f))); 99} 100 101// operator+ 102GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2) 103{ 104 return fquatSIMD(_mm_add_ps(q1.Data, q2.Data)); 105} 106 107//operator* 108GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2) 109{ 110 // SSE2 STATS: 111 // 11 shuffle 112 // 8 mul 113 // 8 add 114 115 // SSE4 STATS: 116 // 3 shuffle 117 // 4 mul 118 // 4 dpps 119 120 __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3))); 121 __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2))); 122 __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1))); 123 __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data); 124 125# if(GLM_ARCH & GLM_ARCH_SSE41_BIT) 126 __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff); 127 __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff); 128 __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff); 129 __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff); 130# else 131 mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f)); 132 __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0)); 133 add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1)); 134 135 mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f)); 136 __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1)); 137 add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1)); 138 139 mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f)); 140 __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2)); 141 add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1)); 142 143 mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)); 144 __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3)); 145 add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1)); 146#endif 147 148 149 // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than 150 // the final code below. I'll keep this here for reference - maybe somebody else can do something better... 151 // 152 //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0)); 153 //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0)); 154 // 155 //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0)); 156 157 float x; 158 float y; 159 float z; 160 float w; 161 162 _mm_store_ss(&x, add0); 163 _mm_store_ss(&y, add1); 164 _mm_store_ss(&z, add2); 165 _mm_store_ss(&w, add3); 166 167 return detail::fquatSIMD(w, x, y, z); 168} 169 170GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v) 171{ 172 static const __m128 two = _mm_set1_ps(2.0f); 173 174 __m128 q_wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); 175 __m128 q_swp0 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); 176 __m128 q_swp1 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); 177 __m128 v_swp0 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1)); 178 __m128 v_swp1 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2)); 179 180 __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); 181 __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); 182 __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); 183 __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); 184 185 186 uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); 187 uuv = _mm_mul_ps(uuv, two); 188 189 return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv)); 190} 191 192GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q) 193{ 194 return glm::inverse(q) * v; 195} 196 197GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s) 198{ 199 return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s))); 200} 201 202GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q) 203{ 204 return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data)); 205} 206 207 208//operator/ 209GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s) 210{ 211 return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s))); 212} 213 214 215}//namespace detail 216 217 218GLM_FUNC_QUALIFIER quat quat_cast 219( 220 detail::fquatSIMD const & x 221) 222{ 223 GLM_ALIGN(16) quat Result; 224 _mm_store_ps(&Result[0], x.Data); 225 226 return Result; 227} 228 229template <typename T> 230GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[]) 231{ 232 T trace = m0[0] + m1[1] + m2[2] + T(1.0); 233 if (trace > T(0)) 234 { 235 T s = static_cast<T>(0.5) / sqrt(trace); 236 237 return _mm_set_ps( 238 static_cast<float>(T(0.25) / s), 239 static_cast<float>((m0[1] - m1[0]) * s), 240 static_cast<float>((m2[0] - m0[2]) * s), 241 static_cast<float>((m1[2] - m2[1]) * s)); 242 } 243 else 244 { 245 if (m0[0] > m1[1]) 246 { 247 if (m0[0] > m2[2]) 248 { 249 // X is biggest. 250 T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5); 251 252 return _mm_set_ps( 253 static_cast<float>((m1[2] - m2[1]) * s), 254 static_cast<float>((m2[0] + m0[2]) * s), 255 static_cast<float>((m0[1] + m1[0]) * s), 256 static_cast<float>(T(0.5) * s)); 257 } 258 } 259 else 260 { 261 if (m1[1] > m2[2]) 262 { 263 // Y is biggest. 264 T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5); 265 266 return _mm_set_ps( 267 static_cast<float>((m2[0] - m0[2]) * s), 268 static_cast<float>((m1[2] + m2[1]) * s), 269 static_cast<float>(T(0.5) * s), 270 static_cast<float>((m0[1] + m1[0]) * s)); 271 } 272 } 273 274 // Z is biggest. 275 T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5); 276 277 return _mm_set_ps( 278 static_cast<float>((m0[1] - m1[0]) * s), 279 static_cast<float>(T(0.5) * s), 280 static_cast<float>((m1[2] + m2[1]) * s), 281 static_cast<float>((m2[0] + m0[2]) * s)); 282 } 283} 284 285GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 286( 287 detail::fmat4x4SIMD const & m 288) 289{ 290 // Scalar implementation for now. 291 GLM_ALIGN(16) float m0[4]; 292 GLM_ALIGN(16) float m1[4]; 293 GLM_ALIGN(16) float m2[4]; 294 295 _mm_store_ps(m0, m[0].Data); 296 _mm_store_ps(m1, m[1].Data); 297 _mm_store_ps(m2, m[2].Data); 298 299 return quatSIMD_cast_impl(m0, m1, m2); 300} 301 302template <typename T, precision P> 303GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 304( 305 tmat4x4<T, P> const & m 306) 307{ 308 return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); 309} 310 311template <typename T, precision P> 312GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast 313( 314 tmat3x3<T, P> const & m 315) 316{ 317 return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); 318} 319 320 321GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast 322( 323 detail::fquatSIMD const & q 324) 325{ 326 detail::fmat4x4SIMD result; 327 328 __m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); 329 __m128 _xyzw = q.Data; 330 __m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); 331 __m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); 332 333 __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw); 334 __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2)); 335 __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1)); 336 337 __m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw)); 338 _tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw)); 339 340 __m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw); 341 _tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww)); 342 343 __m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw); 344 _tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww)); 345 346 347 // There's probably a better, more politically correct way of doing this... 348 result[0].Data = _mm_set_ps( 349 0.0f, 350 reinterpret_cast<float*>(&_tmp2)[0], 351 reinterpret_cast<float*>(&_tmp1)[0], 352 reinterpret_cast<float*>(&_tmp0)[0]); 353 354 result[1].Data = _mm_set_ps( 355 0.0f, 356 reinterpret_cast<float*>(&_tmp1)[1], 357 reinterpret_cast<float*>(&_tmp0)[1], 358 reinterpret_cast<float*>(&_tmp2)[1]); 359 360 result[2].Data = _mm_set_ps( 361 0.0f, 362 reinterpret_cast<float*>(&_tmp0)[2], 363 reinterpret_cast<float*>(&_tmp2)[2], 364 reinterpret_cast<float*>(&_tmp1)[2]); 365 366 result[3].Data = _mm_set_ps( 367 1.0f, 368 0.0f, 369 0.0f, 370 0.0f); 371 372 373 return result; 374} 375 376GLM_FUNC_QUALIFIER mat4 mat4_cast 377( 378 detail::fquatSIMD const & q 379) 380{ 381 return mat4_cast(mat4SIMD_cast(q)); 382} 383 384 385 386GLM_FUNC_QUALIFIER float length 387( 388 detail::fquatSIMD const & q 389) 390{ 391 return glm::sqrt(dot(q, q)); 392} 393 394GLM_FUNC_QUALIFIER detail::fquatSIMD normalize 395( 396 detail::fquatSIMD const & q 397) 398{ 399 return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q))); 400} 401 402GLM_FUNC_QUALIFIER float dot 403( 404 detail::fquatSIMD const & q1, 405 detail::fquatSIMD const & q2 406) 407{ 408 float result; 409 _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data)); 410 411 return result; 412} 413 414GLM_FUNC_QUALIFIER detail::fquatSIMD mix 415( 416 detail::fquatSIMD const & x, 417 detail::fquatSIMD const & y, 418 float const & a 419) 420{ 421 float cosTheta = dot(x, y); 422 423 if (cosTheta > 1.0f - glm::epsilon<float>()) 424 { 425 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 426 } 427 else 428 { 429 float angle = glm::acos(cosTheta); 430 431 432 float s0 = glm::sin((1.0f - a) * angle); 433 float s1 = glm::sin(a * angle); 434 float d = 1.0f / glm::sin(angle); 435 436 return (s0 * x + s1 * y) * d; 437 } 438} 439 440GLM_FUNC_QUALIFIER detail::fquatSIMD lerp 441( 442 detail::fquatSIMD const & x, 443 detail::fquatSIMD const & y, 444 float const & a 445) 446{ 447 // Lerp is only defined in [0, 1] 448 assert(a >= 0.0f); 449 assert(a <= 1.0f); 450 451 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 452} 453 454GLM_FUNC_QUALIFIER detail::fquatSIMD slerp 455( 456 detail::fquatSIMD const & x, 457 detail::fquatSIMD const & y, 458 float const & a 459) 460{ 461 detail::fquatSIMD z = y; 462 463 float cosTheta = dot(x, y); 464 465 // If cosTheta < 0, the interpolation will take the long way around the sphere. 466 // To fix this, one quat must be negated. 467 if (cosTheta < 0.0f) 468 { 469 z = -y; 470 cosTheta = -cosTheta; 471 } 472 473 // Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator 474 if(cosTheta > 1.0f - epsilon<float>()) 475 { 476 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 477 } 478 else 479 { 480 float angle = glm::acos(cosTheta); 481 482 483 float s0 = glm::sin((1.0f - a) * angle); 484 float s1 = glm::sin(a * angle); 485 float d = 1.0f / glm::sin(angle); 486 487 return (s0 * x + s1 * y) * d; 488 } 489} 490 491 492GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix 493( 494 detail::fquatSIMD const & x, 495 detail::fquatSIMD const & y, 496 float const & a 497) 498{ 499 float cosTheta = dot(x, y); 500 501 if (cosTheta > 1.0f - glm::epsilon<float>()) 502 { 503 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 504 } 505 else 506 { 507 float angle = glm::fastAcos(cosTheta); 508 509 510 __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); 511 512 __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); 513 __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); 514 __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); 515 516 return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); 517 } 518} 519 520GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp 521( 522 detail::fquatSIMD const & x, 523 detail::fquatSIMD const & y, 524 float const & a 525) 526{ 527 detail::fquatSIMD z = y; 528 529 float cosTheta = dot(x, y); 530 if (cosTheta < 0.0f) 531 { 532 z = -y; 533 cosTheta = -cosTheta; 534 } 535 536 537 if(cosTheta > 1.0f - epsilon<float>()) 538 { 539 return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); 540 } 541 else 542 { 543 float angle = glm::fastAcos(cosTheta); 544 545 546 __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); 547 548 __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); 549 __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); 550 __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); 551 552 return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); 553 } 554} 555 556 557 558GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate 559( 560 detail::fquatSIMD const & q 561) 562{ 563 return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f))); 564} 565 566GLM_FUNC_QUALIFIER detail::fquatSIMD inverse 567( 568 detail::fquatSIMD const & q 569) 570{ 571 return conjugate(q) / dot(q, q); 572} 573 574 575GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD 576( 577 float const & angle, 578 vec3 const & v 579) 580{ 581 float s = glm::sin(angle * 0.5f); 582 583 return _mm_set_ps( 584 glm::cos(angle * 0.5f), 585 v.z * s, 586 v.y * s, 587 v.x * s); 588} 589 590GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD 591( 592 float const & angle, 593 float const & x, 594 float const & y, 595 float const & z 596) 597{ 598 return angleAxisSIMD(angle, vec3(x, y, z)); 599} 600 601 602GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x) 603{ 604 static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f); 605 static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f); 606 static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f); 607 608 __m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x)); 609 __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x)); 610 __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x)); 611 612 __m128 y0 = _mm_mul_ps(x3, c0); 613 __m128 y1 = _mm_mul_ps(x5, c1); 614 __m128 y2 = _mm_mul_ps(x7, c2); 615 616 return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2); 617} 618 619 620}//namespace glm 621