1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14
15
16
17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
19
20
21 #include "btVector3.h"
22 #include "btQuadWord.h"
23
24
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btQuaternionData btQuaternionDoubleData
27 #define btQuaternionDataName "btQuaternionDoubleData"
28 #else
29 #define btQuaternionData btQuaternionFloatData
30 #define btQuaternionDataName "btQuaternionFloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
32
33
34
35 #ifdef BT_USE_SSE
36
37 //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
38 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
39
40 #endif
41
42 #if defined(BT_USE_SSE)
43
44 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
45 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
46
47 #elif defined(BT_USE_NEON)
48
49 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
50 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
51
52 #endif
53
54 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
55 class btQuaternion : public btQuadWord {
56 public:
57 /**@brief No initialization constructor */
btQuaternion()58 btQuaternion() {}
59
60 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
61 // Set Vector
btQuaternion(const btSimdFloat4 vec)62 SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
63 {
64 mVec128 = vec;
65 }
66
67 // Copy constructor
btQuaternion(const btQuaternion & rhs)68 SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
69 {
70 mVec128 = rhs.mVec128;
71 }
72
73 // Assignment Operator
74 SIMD_FORCE_INLINE btQuaternion&
75 operator=(const btQuaternion& v)
76 {
77 mVec128 = v.mVec128;
78
79 return *this;
80 }
81
82 #endif
83
84 // template <typename btScalar>
85 // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
86 /**@brief Constructor from scalars */
btQuaternion(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)87 btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
88 : btQuadWord(_x, _y, _z, _w)
89 {}
90 /**@brief Axis angle Constructor
91 * @param axis The axis which the rotation is around
92 * @param angle The magnitude of the rotation around the angle (Radians) */
btQuaternion(const btVector3 & _axis,const btScalar & _angle)93 btQuaternion(const btVector3& _axis, const btScalar& _angle)
94 {
95 setRotation(_axis, _angle);
96 }
97 /**@brief Constructor from Euler angles
98 * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
99 * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
100 * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
btQuaternion(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)101 btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
102 {
103 #ifndef BT_EULER_DEFAULT_ZYX
104 setEuler(yaw, pitch, roll);
105 #else
106 setEulerZYX(yaw, pitch, roll);
107 #endif
108 }
109 /**@brief Set the rotation using axis angle notation
110 * @param axis The axis around which to rotate
111 * @param angle The magnitude of the rotation in Radians */
setRotation(const btVector3 & axis,const btScalar & _angle)112 void setRotation(const btVector3& axis, const btScalar& _angle)
113 {
114 btScalar d = axis.length();
115 btAssert(d != btScalar(0.0));
116 btScalar s = btSin(_angle * btScalar(0.5)) / d;
117 setValue(axis.x() * s, axis.y() * s, axis.z() * s,
118 btCos(_angle * btScalar(0.5)));
119 }
120 /**@brief Set the quaternion using Euler angles
121 * @param yaw Angle around Y
122 * @param pitch Angle around X
123 * @param roll Angle around Z */
setEuler(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)124 void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
125 {
126 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
127 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
128 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
129 btScalar cosYaw = btCos(halfYaw);
130 btScalar sinYaw = btSin(halfYaw);
131 btScalar cosPitch = btCos(halfPitch);
132 btScalar sinPitch = btSin(halfPitch);
133 btScalar cosRoll = btCos(halfRoll);
134 btScalar sinRoll = btSin(halfRoll);
135 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
136 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
137 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
138 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
139 }
140 /**@brief Set the quaternion using euler angles
141 * @param yaw Angle around Z
142 * @param pitch Angle around Y
143 * @param roll Angle around X */
setEulerZYX(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)144 void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
145 {
146 btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
147 btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
148 btScalar halfRoll = btScalar(roll) * btScalar(0.5);
149 btScalar cosYaw = btCos(halfYaw);
150 btScalar sinYaw = btSin(halfYaw);
151 btScalar cosPitch = btCos(halfPitch);
152 btScalar sinPitch = btSin(halfPitch);
153 btScalar cosRoll = btCos(halfRoll);
154 btScalar sinRoll = btSin(halfRoll);
155 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
156 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
157 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
158 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
159 }
160 /**@brief Add two quaternions
161 * @param q The quaternion to add to this one */
162 SIMD_FORCE_INLINE btQuaternion& operator+=(const btQuaternion& q)
163 {
164 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
165 mVec128 = _mm_add_ps(mVec128, q.mVec128);
166 #elif defined(BT_USE_NEON)
167 mVec128 = vaddq_f32(mVec128, q.mVec128);
168 #else
169 m_floats[0] += q.x();
170 m_floats[1] += q.y();
171 m_floats[2] += q.z();
172 m_floats[3] += q.m_floats[3];
173 #endif
174 return *this;
175 }
176
177 /**@brief Subtract out a quaternion
178 * @param q The quaternion to subtract from this one */
179 btQuaternion& operator-=(const btQuaternion& q)
180 {
181 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
182 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
183 #elif defined(BT_USE_NEON)
184 mVec128 = vsubq_f32(mVec128, q.mVec128);
185 #else
186 m_floats[0] -= q.x();
187 m_floats[1] -= q.y();
188 m_floats[2] -= q.z();
189 m_floats[3] -= q.m_floats[3];
190 #endif
191 return *this;
192 }
193
194 /**@brief Scale this quaternion
195 * @param s The scalar to scale by */
196 btQuaternion& operator*=(const btScalar& s)
197 {
198 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
199 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
200 vs = bt_pshufd_ps(vs, 0); // (S S S S)
201 mVec128 = _mm_mul_ps(mVec128, vs);
202 #elif defined(BT_USE_NEON)
203 mVec128 = vmulq_n_f32(mVec128, s);
204 #else
205 m_floats[0] *= s;
206 m_floats[1] *= s;
207 m_floats[2] *= s;
208 m_floats[3] *= s;
209 #endif
210 return *this;
211 }
212
213 /**@brief Multiply this quaternion by q on the right
214 * @param q The other quaternion
215 * Equivilant to this = this * q */
216 btQuaternion& operator*=(const btQuaternion& q)
217 {
218 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
219 __m128 vQ2 = q.get128();
220
221 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
222 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
223
224 A1 = A1 * B1;
225
226 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
227 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
228
229 A2 = A2 * B2;
230
231 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
232 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
233
234 B1 = B1 * B2; // A3 *= B3
235
236 mVec128 = bt_splat_ps(mVec128, 3); // A0
237 mVec128 = mVec128 * vQ2; // A0 * B0
238
239 A1 = A1 + A2; // AB12
240 mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
241 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
242 mVec128 = mVec128+ A1; // AB03 + AB12
243
244 #elif defined(BT_USE_NEON)
245
246 float32x4_t vQ1 = mVec128;
247 float32x4_t vQ2 = q.get128();
248 float32x4_t A0, A1, B1, A2, B2, A3, B3;
249 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
250
251 {
252 float32x2x2_t tmp;
253 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
254 vQ1zx = tmp.val[0];
255
256 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
257 vQ2zx = tmp.val[0];
258 }
259 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
260
261 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
262
263 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
264 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
265
266 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
267 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
268
269 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
270 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
271
272 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
273 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
274
275 A1 = vmulq_f32(A1, B1);
276 A2 = vmulq_f32(A2, B2);
277 A3 = vmulq_f32(A3, B3); // A3 *= B3
278 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
279
280 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
281 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
282
283 // change the sign of the last element
284 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
285 A0 = vaddq_f32(A0, A1); // AB03 + AB12
286
287 mVec128 = A0;
288 #else
289 setValue(
290 m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
291 m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
292 m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
293 m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
294 #endif
295 return *this;
296 }
297 /**@brief Return the dot product between this quaternion and another
298 * @param q The other quaternion */
dot(const btQuaternion & q)299 btScalar dot(const btQuaternion& q) const
300 {
301 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
302 __m128 vd;
303
304 vd = _mm_mul_ps(mVec128, q.mVec128);
305
306 __m128 t = _mm_movehl_ps(vd, vd);
307 vd = _mm_add_ps(vd, t);
308 t = _mm_shuffle_ps(vd, vd, 0x55);
309 vd = _mm_add_ss(vd, t);
310
311 return _mm_cvtss_f32(vd);
312 #elif defined(BT_USE_NEON)
313 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
314 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
315 x = vpadd_f32(x, x);
316 return vget_lane_f32(x, 0);
317 #else
318 return m_floats[0] * q.x() +
319 m_floats[1] * q.y() +
320 m_floats[2] * q.z() +
321 m_floats[3] * q.m_floats[3];
322 #endif
323 }
324
325 /**@brief Return the length squared of the quaternion */
length2()326 btScalar length2() const
327 {
328 return dot(*this);
329 }
330
331 /**@brief Return the length of the quaternion */
length()332 btScalar length() const
333 {
334 return btSqrt(length2());
335 }
336
337 /**@brief Normalize the quaternion
338 * Such that x^2 + y^2 + z^2 +w^2 = 1 */
normalize()339 btQuaternion& normalize()
340 {
341 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
342 __m128 vd;
343
344 vd = _mm_mul_ps(mVec128, mVec128);
345
346 __m128 t = _mm_movehl_ps(vd, vd);
347 vd = _mm_add_ps(vd, t);
348 t = _mm_shuffle_ps(vd, vd, 0x55);
349 vd = _mm_add_ss(vd, t);
350
351 vd = _mm_sqrt_ss(vd);
352 vd = _mm_div_ss(vOnes, vd);
353 vd = bt_pshufd_ps(vd, 0); // splat
354 mVec128 = _mm_mul_ps(mVec128, vd);
355
356 return *this;
357 #else
358 return *this /= length();
359 #endif
360 }
361
362 /**@brief Return a scaled version of this quaternion
363 * @param s The scale factor */
364 SIMD_FORCE_INLINE btQuaternion
365 operator*(const btScalar& s) const
366 {
367 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
368 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
369 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
370
371 return btQuaternion(_mm_mul_ps(mVec128, vs));
372 #elif defined(BT_USE_NEON)
373 return btQuaternion(vmulq_n_f32(mVec128, s));
374 #else
375 return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
376 #endif
377 }
378
379 /**@brief Return an inversely scaled versionof this quaternion
380 * @param s The inverse scale factor */
381 btQuaternion operator/(const btScalar& s) const
382 {
383 btAssert(s != btScalar(0.0));
384 return *this * (btScalar(1.0) / s);
385 }
386
387 /**@brief Inversely scale this quaternion
388 * @param s The scale factor */
389 btQuaternion& operator/=(const btScalar& s)
390 {
391 btAssert(s != btScalar(0.0));
392 return *this *= btScalar(1.0) / s;
393 }
394
395 /**@brief Return a normalized version of this quaternion */
normalized()396 btQuaternion normalized() const
397 {
398 return *this / length();
399 }
400 /**@brief Return the ***half*** angle between this quaternion and the other
401 * @param q The other quaternion */
angle(const btQuaternion & q)402 btScalar angle(const btQuaternion& q) const
403 {
404 btScalar s = btSqrt(length2() * q.length2());
405 btAssert(s != btScalar(0.0));
406 return btAcos(dot(q) / s);
407 }
408
409 /**@brief Return the angle between this quaternion and the other along the shortest path
410 * @param q The other quaternion */
angleShortestPath(const btQuaternion & q)411 btScalar angleShortestPath(const btQuaternion& q) const
412 {
413 btScalar s = btSqrt(length2() * q.length2());
414 btAssert(s != btScalar(0.0));
415 if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
416 return btAcos(dot(-q) / s) * btScalar(2.0);
417 else
418 return btAcos(dot(q) / s) * btScalar(2.0);
419 }
420
421 /**@brief Return the angle of rotation represented by this quaternion */
getAngle()422 btScalar getAngle() const
423 {
424 btScalar s = btScalar(2.) * btAcos(m_floats[3]);
425 return s;
426 }
427
428 /**@brief Return the angle of rotation represented by this quaternion along the shortest path*/
getAngleShortestPath()429 btScalar getAngleShortestPath() const
430 {
431 btScalar s;
432 if (dot(*this) < 0)
433 s = btScalar(2.) * btAcos(m_floats[3]);
434 else
435 s = btScalar(2.) * btAcos(-m_floats[3]);
436
437 return s;
438 }
439
440
441 /**@brief Return the axis of the rotation represented by this quaternion */
getAxis()442 btVector3 getAxis() const
443 {
444 btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
445
446 if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
447 return btVector3(1.0, 0.0, 0.0); // Arbitrary
448 btScalar s = 1.f/btSqrt(s_squared);
449 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
450 }
451
452 /**@brief Return the inverse of this quaternion */
inverse()453 btQuaternion inverse() const
454 {
455 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
456 return btQuaternion(_mm_xor_ps(mVec128, vQInv));
457 #elif defined(BT_USE_NEON)
458 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
459 #else
460 return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
461 #endif
462 }
463
464 /**@brief Return the sum of this quaternion and the other
465 * @param q2 The other quaternion */
466 SIMD_FORCE_INLINE btQuaternion
467 operator+(const btQuaternion& q2) const
468 {
469 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
470 return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
471 #elif defined(BT_USE_NEON)
472 return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
473 #else
474 const btQuaternion& q1 = *this;
475 return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
476 #endif
477 }
478
479 /**@brief Return the difference between this quaternion and the other
480 * @param q2 The other quaternion */
481 SIMD_FORCE_INLINE btQuaternion
482 operator-(const btQuaternion& q2) const
483 {
484 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
485 return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
486 #elif defined(BT_USE_NEON)
487 return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
488 #else
489 const btQuaternion& q1 = *this;
490 return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
491 #endif
492 }
493
494 /**@brief Return the negative of this quaternion
495 * This simply negates each element */
496 SIMD_FORCE_INLINE btQuaternion operator-() const
497 {
498 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
499 return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
500 #elif defined(BT_USE_NEON)
501 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
502 #else
503 const btQuaternion& q2 = *this;
504 return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]);
505 #endif
506 }
507 /**@todo document this and it's use */
farthest(const btQuaternion & qd)508 SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
509 {
510 btQuaternion diff,sum;
511 diff = *this - qd;
512 sum = *this + qd;
513 if( diff.dot(diff) > sum.dot(sum) )
514 return qd;
515 return (-qd);
516 }
517
518 /**@todo document this and it's use */
nearest(const btQuaternion & qd)519 SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
520 {
521 btQuaternion diff,sum;
522 diff = *this - qd;
523 sum = *this + qd;
524 if( diff.dot(diff) < sum.dot(sum) )
525 return qd;
526 return (-qd);
527 }
528
529
530 /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
531 * @param q The other quaternion to interpolate with
532 * @param t The ratio between this and q to interpolate. If t = 0 the result is this, if t=1 the result is q.
533 * Slerp interpolates assuming constant velocity. */
slerp(const btQuaternion & q,const btScalar & t)534 btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
535 {
536 btScalar magnitude = btSqrt(length2() * q.length2());
537 btAssert(magnitude > btScalar(0));
538
539 btScalar product = dot(q) / magnitude;
540 if (btFabs(product) < btScalar(1))
541 {
542 // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
543 const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
544
545 const btScalar theta = btAcos(sign * product);
546 const btScalar s1 = btSin(sign * t * theta);
547 const btScalar d = btScalar(1.0) / btSin(theta);
548 const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
549
550 return btQuaternion(
551 (m_floats[0] * s0 + q.x() * s1) * d,
552 (m_floats[1] * s0 + q.y() * s1) * d,
553 (m_floats[2] * s0 + q.z() * s1) * d,
554 (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
555 }
556 else
557 {
558 return *this;
559 }
560 }
561
getIdentity()562 static const btQuaternion& getIdentity()
563 {
564 static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
565 return identityQuat;
566 }
567
getW()568 SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
569
570 SIMD_FORCE_INLINE void serialize(struct btQuaternionData& dataOut) const;
571
572 SIMD_FORCE_INLINE void deSerialize(const struct btQuaternionData& dataIn);
573
574 SIMD_FORCE_INLINE void serializeFloat(struct btQuaternionFloatData& dataOut) const;
575
576 SIMD_FORCE_INLINE void deSerializeFloat(const struct btQuaternionFloatData& dataIn);
577
578 SIMD_FORCE_INLINE void serializeDouble(struct btQuaternionDoubleData& dataOut) const;
579
580 SIMD_FORCE_INLINE void deSerializeDouble(const struct btQuaternionDoubleData& dataIn);
581
582 };
583
584
585
586
587
588 /**@brief Return the product of two quaternions */
589 SIMD_FORCE_INLINE btQuaternion
590 operator*(const btQuaternion& q1, const btQuaternion& q2)
591 {
592 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
593 __m128 vQ1 = q1.get128();
594 __m128 vQ2 = q2.get128();
595 __m128 A0, A1, B1, A2, B2;
596
597 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
598 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
599
600 A1 = A1 * B1;
601
602 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
603 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
604
605 A2 = A2 * B2;
606
607 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
608 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
609
610 B1 = B1 * B2; // A3 *= B3
611
612 A0 = bt_splat_ps(vQ1, 3); // A0
613 A0 = A0 * vQ2; // A0 * B0
614
615 A1 = A1 + A2; // AB12
616 A0 = A0 - B1; // AB03 = AB0 - AB3
617
618 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
619 A0 = A0 + A1; // AB03 + AB12
620
621 return btQuaternion(A0);
622
623 #elif defined(BT_USE_NEON)
624
625 float32x4_t vQ1 = q1.get128();
626 float32x4_t vQ2 = q2.get128();
627 float32x4_t A0, A1, B1, A2, B2, A3, B3;
628 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
629
630 {
631 float32x2x2_t tmp;
632 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
633 vQ1zx = tmp.val[0];
634
635 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
636 vQ2zx = tmp.val[0];
637 }
638 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
639
640 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
641
642 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
643 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
644
645 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
646 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
647
648 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
649 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
650
651 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
652 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
653
654 A1 = vmulq_f32(A1, B1);
655 A2 = vmulq_f32(A2, B2);
656 A3 = vmulq_f32(A3, B3); // A3 *= B3
657 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
658
659 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
660 A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
661
662 // change the sign of the last element
663 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
664 A0 = vaddq_f32(A0, A1); // AB03 + AB12
665
666 return btQuaternion(A0);
667
668 #else
669 return btQuaternion(
670 q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
671 q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
672 q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
673 q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
674 #endif
675 }
676
677 SIMD_FORCE_INLINE btQuaternion
678 operator*(const btQuaternion& q, const btVector3& w)
679 {
680 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
681 __m128 vQ1 = q.get128();
682 __m128 vQ2 = w.get128();
683 __m128 A1, B1, A2, B2, A3, B3;
684
685 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
686 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
687
688 A1 = A1 * B1;
689
690 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
691 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
692
693 A2 = A2 * B2;
694
695 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
696 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
697
698 A3 = A3 * B3; // A3 *= B3
699
700 A1 = A1 + A2; // AB12
701 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
702 A1 = A1 - A3; // AB123 = AB12 - AB3
703
704 return btQuaternion(A1);
705
706 #elif defined(BT_USE_NEON)
707
708 float32x4_t vQ1 = q.get128();
709 float32x4_t vQ2 = w.get128();
710 float32x4_t A1, B1, A2, B2, A3, B3;
711 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
712
713 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
714 {
715 float32x2x2_t tmp;
716
717 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
718 vQ2zx = tmp.val[0];
719
720 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
721 vQ1zx = tmp.val[0];
722 }
723
724 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
725
726 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
727 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
728
729 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
730 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
731
732 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
733 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
734
735 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
736 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
737
738 A1 = vmulq_f32(A1, B1);
739 A2 = vmulq_f32(A2, B2);
740 A3 = vmulq_f32(A3, B3); // A3 *= B3
741
742 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
743
744 // change the sign of the last element
745 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
746
747 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
748
749 return btQuaternion(A1);
750
751 #else
752 return btQuaternion(
753 q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
754 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
755 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
756 -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
757 #endif
758 }
759
760 SIMD_FORCE_INLINE btQuaternion
761 operator*(const btVector3& w, const btQuaternion& q)
762 {
763 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
764 __m128 vQ1 = w.get128();
765 __m128 vQ2 = q.get128();
766 __m128 A1, B1, A2, B2, A3, B3;
767
768 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x
769 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X
770
771 A1 = A1 * B1;
772
773 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
774 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
775
776 A2 = A2 *B2;
777
778 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
779 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
780
781 A3 = A3 * B3; // A3 *= B3
782
783 A1 = A1 + A2; // AB12
784 A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
785 A1 = A1 - A3; // AB123 = AB12 - AB3
786
787 return btQuaternion(A1);
788
789 #elif defined(BT_USE_NEON)
790
791 float32x4_t vQ1 = w.get128();
792 float32x4_t vQ2 = q.get128();
793 float32x4_t A1, B1, A2, B2, A3, B3;
794 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
795
796 {
797 float32x2x2_t tmp;
798
799 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
800 vQ1zx = tmp.val[0];
801
802 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
803 vQ2zx = tmp.val[0];
804 }
805 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
806
807 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
808
809 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
810 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
811
812 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
813 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
814
815 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
816 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
817
818 A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
819 B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
820
821 A1 = vmulq_f32(A1, B1);
822 A2 = vmulq_f32(A2, B2);
823 A3 = vmulq_f32(A3, B3); // A3 *= B3
824
825 A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
826
827 // change the sign of the last element
828 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
829
830 A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
831
832 return btQuaternion(A1);
833
834 #else
835 return btQuaternion(
836 +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
837 +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
838 +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
839 -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
840 #endif
841 }
842
843 /**@brief Calculate the dot product between two quaternions */
844 SIMD_FORCE_INLINE btScalar
dot(const btQuaternion & q1,const btQuaternion & q2)845 dot(const btQuaternion& q1, const btQuaternion& q2)
846 {
847 return q1.dot(q2);
848 }
849
850
851 /**@brief Return the length of a quaternion */
852 SIMD_FORCE_INLINE btScalar
length(const btQuaternion & q)853 length(const btQuaternion& q)
854 {
855 return q.length();
856 }
857
858 /**@brief Return the angle between two quaternions*/
859 SIMD_FORCE_INLINE btScalar
btAngle(const btQuaternion & q1,const btQuaternion & q2)860 btAngle(const btQuaternion& q1, const btQuaternion& q2)
861 {
862 return q1.angle(q2);
863 }
864
865 /**@brief Return the inverse of a quaternion*/
866 SIMD_FORCE_INLINE btQuaternion
inverse(const btQuaternion & q)867 inverse(const btQuaternion& q)
868 {
869 return q.inverse();
870 }
871
872 /**@brief Return the result of spherical linear interpolation betwen two quaternions
873 * @param q1 The first quaternion
874 * @param q2 The second quaternion
875 * @param t The ration between q1 and q2. t = 0 return q1, t=1 returns q2
876 * Slerp assumes constant velocity between positions. */
877 SIMD_FORCE_INLINE btQuaternion
slerp(const btQuaternion & q1,const btQuaternion & q2,const btScalar & t)878 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
879 {
880 return q1.slerp(q2, t);
881 }
882
883 SIMD_FORCE_INLINE btVector3
quatRotate(const btQuaternion & rotation,const btVector3 & v)884 quatRotate(const btQuaternion& rotation, const btVector3& v)
885 {
886 btQuaternion q = rotation * v;
887 q *= rotation.inverse();
888 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
889 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
890 #elif defined(BT_USE_NEON)
891 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
892 #else
893 return btVector3(q.getX(),q.getY(),q.getZ());
894 #endif
895 }
896
897 SIMD_FORCE_INLINE btQuaternion
shortestArcQuat(const btVector3 & v0,const btVector3 & v1)898 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
899 {
900 btVector3 c = v0.cross(v1);
901 btScalar d = v0.dot(v1);
902
903 if (d < -1.0 + SIMD_EPSILON)
904 {
905 btVector3 n,unused;
906 btPlaneSpace1(v0,n,unused);
907 return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
908 }
909
910 btScalar s = btSqrt((1.0f + d) * 2.0f);
911 btScalar rs = 1.0f / s;
912
913 return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
914 }
915
916 SIMD_FORCE_INLINE btQuaternion
shortestArcQuatNormalize2(btVector3 & v0,btVector3 & v1)917 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
918 {
919 v0.normalize();
920 v1.normalize();
921 return shortestArcQuat(v0,v1);
922 }
923
924
925
926
927 struct btQuaternionFloatData
928 {
929 float m_floats[4];
930 };
931
932 struct btQuaternionDoubleData
933 {
934 double m_floats[4];
935
936 };
937
serializeFloat(struct btQuaternionFloatData & dataOut)938 SIMD_FORCE_INLINE void btQuaternion::serializeFloat(struct btQuaternionFloatData& dataOut) const
939 {
940 ///could also do a memcpy, check if it is worth it
941 for (int i=0;i<4;i++)
942 dataOut.m_floats[i] = float(m_floats[i]);
943 }
944
deSerializeFloat(const struct btQuaternionFloatData & dataIn)945 SIMD_FORCE_INLINE void btQuaternion::deSerializeFloat(const struct btQuaternionFloatData& dataIn)
946 {
947 for (int i=0;i<4;i++)
948 m_floats[i] = btScalar(dataIn.m_floats[i]);
949 }
950
951
serializeDouble(struct btQuaternionDoubleData & dataOut)952 SIMD_FORCE_INLINE void btQuaternion::serializeDouble(struct btQuaternionDoubleData& dataOut) const
953 {
954 ///could also do a memcpy, check if it is worth it
955 for (int i=0;i<4;i++)
956 dataOut.m_floats[i] = double(m_floats[i]);
957 }
958
deSerializeDouble(const struct btQuaternionDoubleData & dataIn)959 SIMD_FORCE_INLINE void btQuaternion::deSerializeDouble(const struct btQuaternionDoubleData& dataIn)
960 {
961 for (int i=0;i<4;i++)
962 m_floats[i] = btScalar(dataIn.m_floats[i]);
963 }
964
965
serialize(struct btQuaternionData & dataOut)966 SIMD_FORCE_INLINE void btQuaternion::serialize(struct btQuaternionData& dataOut) const
967 {
968 ///could also do a memcpy, check if it is worth it
969 for (int i=0;i<4;i++)
970 dataOut.m_floats[i] = m_floats[i];
971 }
972
deSerialize(const struct btQuaternionData & dataIn)973 SIMD_FORCE_INLINE void btQuaternion::deSerialize(const struct btQuaternionData& dataIn)
974 {
975 for (int i=0;i<4;i++)
976 m_floats[i] = dataIn.m_floats[i];
977 }
978
979
980 #endif //BT_SIMD__QUATERNION_H_
981
982
983
984