• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 
17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
19 
20 
21 #include "btVector3.h"
22 #include "btQuadWord.h"
23 
24 
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btQuaternionData btQuaternionDoubleData
27 #define btQuaternionDataName "btQuaternionDoubleData"
28 #else
29 #define btQuaternionData btQuaternionFloatData
30 #define btQuaternionDataName "btQuaternionFloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
32 
33 
34 
35 #ifdef BT_USE_SSE
36 
37 //const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
38 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
39 
40 #endif
41 
42 #if defined(BT_USE_SSE)
43 
44 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
45 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
46 
47 #elif defined(BT_USE_NEON)
48 
49 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
50 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
51 
52 #endif
53 
54 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
55 class btQuaternion : public btQuadWord {
56 public:
57   /**@brief No initialization constructor */
btQuaternion()58 	btQuaternion() {}
59 
60 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
61 	// Set Vector
btQuaternion(const btSimdFloat4 vec)62 	SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
63 	{
64 		mVec128 = vec;
65 	}
66 
67 	// Copy constructor
btQuaternion(const btQuaternion & rhs)68 	SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
69 	{
70 		mVec128 = rhs.mVec128;
71 	}
72 
73 	// Assignment Operator
74 	SIMD_FORCE_INLINE btQuaternion&
75 	operator=(const btQuaternion& v)
76 	{
77 		mVec128 = v.mVec128;
78 
79 		return *this;
80 	}
81 
82 #endif
83 
84 	//		template <typename btScalar>
85 	//		explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
86   /**@brief Constructor from scalars */
btQuaternion(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)87 	btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
88 		: btQuadWord(_x, _y, _z, _w)
89 	{}
90   /**@brief Axis angle Constructor
91    * @param axis The axis which the rotation is around
92    * @param angle The magnitude of the rotation around the angle (Radians) */
btQuaternion(const btVector3 & _axis,const btScalar & _angle)93 	btQuaternion(const btVector3& _axis, const btScalar& _angle)
94 	{
95 		setRotation(_axis, _angle);
96 	}
97   /**@brief Constructor from Euler angles
98    * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
99    * @param pitch Angle around X unless BT_EULER_DEFAULT_ZYX defined then Y
100    * @param roll Angle around Z unless BT_EULER_DEFAULT_ZYX defined then X */
btQuaternion(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)101 	btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
102 	{
103 #ifndef BT_EULER_DEFAULT_ZYX
104 		setEuler(yaw, pitch, roll);
105 #else
106 		setEulerZYX(yaw, pitch, roll);
107 #endif
108 	}
109   /**@brief Set the rotation using axis angle notation
110    * @param axis The axis around which to rotate
111    * @param angle The magnitude of the rotation in Radians */
setRotation(const btVector3 & axis,const btScalar & _angle)112 	void setRotation(const btVector3& axis, const btScalar& _angle)
113 	{
114 		btScalar d = axis.length();
115 		btAssert(d != btScalar(0.0));
116 		btScalar s = btSin(_angle * btScalar(0.5)) / d;
117 		setValue(axis.x() * s, axis.y() * s, axis.z() * s,
118 			btCos(_angle * btScalar(0.5)));
119 	}
120   /**@brief Set the quaternion using Euler angles
121    * @param yaw Angle around Y
122    * @param pitch Angle around X
123    * @param roll Angle around Z */
setEuler(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)124 	void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
125 	{
126 		btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
127 		btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
128 		btScalar halfRoll = btScalar(roll) * btScalar(0.5);
129 		btScalar cosYaw = btCos(halfYaw);
130 		btScalar sinYaw = btSin(halfYaw);
131 		btScalar cosPitch = btCos(halfPitch);
132 		btScalar sinPitch = btSin(halfPitch);
133 		btScalar cosRoll = btCos(halfRoll);
134 		btScalar sinRoll = btSin(halfRoll);
135 		setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
136 			cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
137 			sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
138 			cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
139 	}
140   /**@brief Set the quaternion using euler angles
141    * @param yaw Angle around Z
142    * @param pitch Angle around Y
143    * @param roll Angle around X */
setEulerZYX(const btScalar & yaw,const btScalar & pitch,const btScalar & roll)144 	void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
145 	{
146 		btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
147 		btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
148 		btScalar halfRoll = btScalar(roll) * btScalar(0.5);
149 		btScalar cosYaw = btCos(halfYaw);
150 		btScalar sinYaw = btSin(halfYaw);
151 		btScalar cosPitch = btCos(halfPitch);
152 		btScalar sinPitch = btSin(halfPitch);
153 		btScalar cosRoll = btCos(halfRoll);
154 		btScalar sinRoll = btSin(halfRoll);
155 		setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
156                          cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
157                          cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
158                          cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
159 	}
160   /**@brief Add two quaternions
161    * @param q The quaternion to add to this one */
162 	SIMD_FORCE_INLINE	btQuaternion& operator+=(const btQuaternion& q)
163 	{
164 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
165 		mVec128 = _mm_add_ps(mVec128, q.mVec128);
166 #elif defined(BT_USE_NEON)
167 		mVec128 = vaddq_f32(mVec128, q.mVec128);
168 #else
169 		m_floats[0] += q.x();
170         m_floats[1] += q.y();
171         m_floats[2] += q.z();
172         m_floats[3] += q.m_floats[3];
173 #endif
174 		return *this;
175 	}
176 
177   /**@brief Subtract out a quaternion
178    * @param q The quaternion to subtract from this one */
179 	btQuaternion& operator-=(const btQuaternion& q)
180 	{
181 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
182 		mVec128 = _mm_sub_ps(mVec128, q.mVec128);
183 #elif defined(BT_USE_NEON)
184 		mVec128 = vsubq_f32(mVec128, q.mVec128);
185 #else
186 		m_floats[0] -= q.x();
187         m_floats[1] -= q.y();
188         m_floats[2] -= q.z();
189         m_floats[3] -= q.m_floats[3];
190 #endif
191         return *this;
192 	}
193 
194   /**@brief Scale this quaternion
195    * @param s The scalar to scale by */
196 	btQuaternion& operator*=(const btScalar& s)
197 	{
198 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
199 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
200 		vs = bt_pshufd_ps(vs, 0);	//	(S S S S)
201 		mVec128 = _mm_mul_ps(mVec128, vs);
202 #elif defined(BT_USE_NEON)
203 		mVec128 = vmulq_n_f32(mVec128, s);
204 #else
205 		m_floats[0] *= s;
206         m_floats[1] *= s;
207         m_floats[2] *= s;
208         m_floats[3] *= s;
209 #endif
210 		return *this;
211 	}
212 
213   /**@brief Multiply this quaternion by q on the right
214    * @param q The other quaternion
215    * Equivilant to this = this * q */
216 	btQuaternion& operator*=(const btQuaternion& q)
217 	{
218 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
219 		__m128 vQ2 = q.get128();
220 
221 		__m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
222 		__m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
223 
224 		A1 = A1 * B1;
225 
226 		__m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
227 		__m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
228 
229 		A2 = A2 * B2;
230 
231 		B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
232 		B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
233 
234 		B1 = B1 * B2;	//	A3 *= B3
235 
236 		mVec128 = bt_splat_ps(mVec128, 3);	//	A0
237 		mVec128 = mVec128 * vQ2;	//	A0 * B0
238 
239 		A1 = A1 + A2;	//	AB12
240 		mVec128 = mVec128 - B1;	//	AB03 = AB0 - AB3
241 		A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
242 		mVec128 = mVec128+ A1;	//	AB03 + AB12
243 
244 #elif defined(BT_USE_NEON)
245 
246         float32x4_t vQ1 = mVec128;
247         float32x4_t vQ2 = q.get128();
248         float32x4_t A0, A1, B1, A2, B2, A3, B3;
249         float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
250 
251         {
252         float32x2x2_t tmp;
253         tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
254         vQ1zx = tmp.val[0];
255 
256         tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
257         vQ2zx = tmp.val[0];
258         }
259         vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
260 
261         vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
262 
263         vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
264         vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
265 
266         A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
267         B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
268 
269         A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
270         B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
271 
272         A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
273         B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
274 
275         A1 = vmulq_f32(A1, B1);
276         A2 = vmulq_f32(A2, B2);
277         A3 = vmulq_f32(A3, B3);	//	A3 *= B3
278         A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
279 
280         A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
281         A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3
282 
283         //	change the sign of the last element
284         A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
285         A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
286 
287         mVec128 = A0;
288 #else
289 		setValue(
290             m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
291 			m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
292 			m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
293 			m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
294 #endif
295 		return *this;
296 	}
297   /**@brief Return the dot product between this quaternion and another
298    * @param q The other quaternion */
dot(const btQuaternion & q)299 	btScalar dot(const btQuaternion& q) const
300 	{
301 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
302 		__m128	vd;
303 
304 		vd = _mm_mul_ps(mVec128, q.mVec128);
305 
306         __m128 t = _mm_movehl_ps(vd, vd);
307 		vd = _mm_add_ps(vd, t);
308 		t = _mm_shuffle_ps(vd, vd, 0x55);
309 		vd = _mm_add_ss(vd, t);
310 
311         return _mm_cvtss_f32(vd);
312 #elif defined(BT_USE_NEON)
313 		float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
314 		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
315 		x = vpadd_f32(x, x);
316 		return vget_lane_f32(x, 0);
317 #else
318 		return  m_floats[0] * q.x() +
319                 m_floats[1] * q.y() +
320                 m_floats[2] * q.z() +
321                 m_floats[3] * q.m_floats[3];
322 #endif
323 	}
324 
325   /**@brief Return the length squared of the quaternion */
length2()326 	btScalar length2() const
327 	{
328 		return dot(*this);
329 	}
330 
331   /**@brief Return the length of the quaternion */
length()332 	btScalar length() const
333 	{
334 		return btSqrt(length2());
335 	}
336 
337   /**@brief Normalize the quaternion
338    * Such that x^2 + y^2 + z^2 +w^2 = 1 */
normalize()339 	btQuaternion& normalize()
340 	{
341 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
342 		__m128	vd;
343 
344 		vd = _mm_mul_ps(mVec128, mVec128);
345 
346         __m128 t = _mm_movehl_ps(vd, vd);
347 		vd = _mm_add_ps(vd, t);
348 		t = _mm_shuffle_ps(vd, vd, 0x55);
349 		vd = _mm_add_ss(vd, t);
350 
351 		vd = _mm_sqrt_ss(vd);
352 		vd = _mm_div_ss(vOnes, vd);
353         vd = bt_pshufd_ps(vd, 0); // splat
354 		mVec128 = _mm_mul_ps(mVec128, vd);
355 
356 		return *this;
357 #else
358 		return *this /= length();
359 #endif
360 	}
361 
362   /**@brief Return a scaled version of this quaternion
363    * @param s The scale factor */
364 	SIMD_FORCE_INLINE btQuaternion
365 	operator*(const btScalar& s) const
366 	{
367 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
368 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
369 		vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
370 
371 		return btQuaternion(_mm_mul_ps(mVec128, vs));
372 #elif defined(BT_USE_NEON)
373 		return btQuaternion(vmulq_n_f32(mVec128, s));
374 #else
375 		return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
376 #endif
377 	}
378 
379   /**@brief Return an inversely scaled versionof this quaternion
380    * @param s The inverse scale factor */
381 	btQuaternion operator/(const btScalar& s) const
382 	{
383 		btAssert(s != btScalar(0.0));
384 		return *this * (btScalar(1.0) / s);
385 	}
386 
387   /**@brief Inversely scale this quaternion
388    * @param s The scale factor */
389 	btQuaternion& operator/=(const btScalar& s)
390 	{
391 		btAssert(s != btScalar(0.0));
392 		return *this *= btScalar(1.0) / s;
393 	}
394 
395   /**@brief Return a normalized version of this quaternion */
normalized()396 	btQuaternion normalized() const
397 	{
398 		return *this / length();
399 	}
400 	/**@brief Return the ***half*** angle between this quaternion and the other
401    * @param q The other quaternion */
angle(const btQuaternion & q)402 	btScalar angle(const btQuaternion& q) const
403 	{
404 		btScalar s = btSqrt(length2() * q.length2());
405 		btAssert(s != btScalar(0.0));
406 		return btAcos(dot(q) / s);
407 	}
408 
409 	/**@brief Return the angle between this quaternion and the other along the shortest path
410 	* @param q The other quaternion */
angleShortestPath(const btQuaternion & q)411 	btScalar angleShortestPath(const btQuaternion& q) const
412 	{
413 		btScalar s = btSqrt(length2() * q.length2());
414 		btAssert(s != btScalar(0.0));
415 		if (dot(q) < 0) // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
416 			return btAcos(dot(-q) / s) * btScalar(2.0);
417 		else
418 			return btAcos(dot(q) / s) * btScalar(2.0);
419 	}
420 
421   /**@brief Return the angle of rotation represented by this quaternion */
getAngle()422 	btScalar getAngle() const
423 	{
424 		btScalar s = btScalar(2.) * btAcos(m_floats[3]);
425 		return s;
426 	}
427 
428 	/**@brief Return the angle of rotation represented by this quaternion along the shortest path*/
getAngleShortestPath()429 	btScalar getAngleShortestPath() const
430 	{
431 		btScalar s;
432 		if (dot(*this) < 0)
433 			s = btScalar(2.) * btAcos(m_floats[3]);
434 		else
435 			s = btScalar(2.) * btAcos(-m_floats[3]);
436 
437 		return s;
438 	}
439 
440 
441 	/**@brief Return the axis of the rotation represented by this quaternion */
getAxis()442 	btVector3 getAxis() const
443 	{
444 		btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
445 
446 		if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
447 			return btVector3(1.0, 0.0, 0.0);  // Arbitrary
448 		btScalar s = 1.f/btSqrt(s_squared);
449 		return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
450 	}
451 
452 	/**@brief Return the inverse of this quaternion */
inverse()453 	btQuaternion inverse() const
454 	{
455 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
456 		return btQuaternion(_mm_xor_ps(mVec128, vQInv));
457 #elif defined(BT_USE_NEON)
458         return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
459 #else
460 		return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
461 #endif
462 	}
463 
464   /**@brief Return the sum of this quaternion and the other
465    * @param q2 The other quaternion */
466 	SIMD_FORCE_INLINE btQuaternion
467 	operator+(const btQuaternion& q2) const
468 	{
469 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
470 		return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
471 #elif defined(BT_USE_NEON)
472         return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
473 #else
474 		const btQuaternion& q1 = *this;
475 		return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
476 #endif
477 	}
478 
479   /**@brief Return the difference between this quaternion and the other
480    * @param q2 The other quaternion */
481 	SIMD_FORCE_INLINE btQuaternion
482 	operator-(const btQuaternion& q2) const
483 	{
484 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
485 		return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
486 #elif defined(BT_USE_NEON)
487         return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
488 #else
489 		const btQuaternion& q1 = *this;
490 		return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
491 #endif
492 	}
493 
494   /**@brief Return the negative of this quaternion
495    * This simply negates each element */
496 	SIMD_FORCE_INLINE btQuaternion operator-() const
497 	{
498 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
499 		return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
500 #elif defined(BT_USE_NEON)
501 		return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
502 #else
503 		const btQuaternion& q2 = *this;
504 		return btQuaternion( - q2.x(), - q2.y(),  - q2.z(),  - q2.m_floats[3]);
505 #endif
506 	}
507   /**@todo document this and it's use */
farthest(const btQuaternion & qd)508 	SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const
509 	{
510 		btQuaternion diff,sum;
511 		diff = *this - qd;
512 		sum = *this + qd;
513 		if( diff.dot(diff) > sum.dot(sum) )
514 			return qd;
515 		return (-qd);
516 	}
517 
518 	/**@todo document this and it's use */
nearest(const btQuaternion & qd)519 	SIMD_FORCE_INLINE btQuaternion nearest( const btQuaternion& qd) const
520 	{
521 		btQuaternion diff,sum;
522 		diff = *this - qd;
523 		sum = *this + qd;
524 		if( diff.dot(diff) < sum.dot(sum) )
525 			return qd;
526 		return (-qd);
527 	}
528 
529 
530   /**@brief Return the quaternion which is the result of Spherical Linear Interpolation between this and the other quaternion
531    * @param q The other quaternion to interpolate with
532    * @param t The ratio between this and q to interpolate.  If t = 0 the result is this, if t=1 the result is q.
533    * Slerp interpolates assuming constant velocity.  */
slerp(const btQuaternion & q,const btScalar & t)534 	btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
535 	{
536 	  btScalar magnitude = btSqrt(length2() * q.length2());
537 	  btAssert(magnitude > btScalar(0));
538 
539     btScalar product = dot(q) / magnitude;
540     if (btFabs(product) < btScalar(1))
541 		{
542       // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
543       const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
544 
545       const btScalar theta = btAcos(sign * product);
546       const btScalar s1 = btSin(sign * t * theta);
547       const btScalar d = btScalar(1.0) / btSin(theta);
548       const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
549 
550       return btQuaternion(
551           (m_floats[0] * s0 + q.x() * s1) * d,
552           (m_floats[1] * s0 + q.y() * s1) * d,
553           (m_floats[2] * s0 + q.z() * s1) * d,
554           (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
555 		}
556 		else
557 		{
558 			return *this;
559 		}
560 	}
561 
getIdentity()562 	static const btQuaternion&	getIdentity()
563 	{
564 		static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
565 		return identityQuat;
566 	}
567 
getW()568 	SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
569 
570 	SIMD_FORCE_INLINE	void	serialize(struct	btQuaternionData& dataOut) const;
571 
572 	SIMD_FORCE_INLINE	void	deSerialize(const struct	btQuaternionData& dataIn);
573 
574 	SIMD_FORCE_INLINE	void	serializeFloat(struct	btQuaternionFloatData& dataOut) const;
575 
576 	SIMD_FORCE_INLINE	void	deSerializeFloat(const struct	btQuaternionFloatData& dataIn);
577 
578 	SIMD_FORCE_INLINE	void	serializeDouble(struct	btQuaternionDoubleData& dataOut) const;
579 
580 	SIMD_FORCE_INLINE	void	deSerializeDouble(const struct	btQuaternionDoubleData& dataIn);
581 
582 };
583 
584 
585 
586 
587 
588 /**@brief Return the product of two quaternions */
589 SIMD_FORCE_INLINE btQuaternion
590 operator*(const btQuaternion& q1, const btQuaternion& q2)
591 {
592 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
593 	__m128 vQ1 = q1.get128();
594 	__m128 vQ2 = q2.get128();
595 	__m128 A0, A1, B1, A2, B2;
596 
597 	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y  z x     //      vtrn
598 	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W  W X     // vdup vext
599 
600 	A1 = A1 * B1;
601 
602 	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z  X Y     // vext
603 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x  Y Y     // vtrn vdup
604 
605 	A2 = A2 * B2;
606 
607 	B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z      // vtrn vext
608 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z      // vext vtrn
609 
610 	B1 = B1 * B2;	//	A3 *= B3
611 
612 	A0 = bt_splat_ps(vQ1, 3);	//	A0
613 	A0 = A0 * vQ2;	//	A0 * B0
614 
615 	A1 = A1 + A2;	//	AB12
616 	A0 =  A0 - B1;	//	AB03 = AB0 - AB3
617 
618     A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
619 	A0 = A0 + A1;	//	AB03 + AB12
620 
621 	return btQuaternion(A0);
622 
623 #elif defined(BT_USE_NEON)
624 
625 	float32x4_t vQ1 = q1.get128();
626 	float32x4_t vQ2 = q2.get128();
627 	float32x4_t A0, A1, B1, A2, B2, A3, B3;
628     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
629 
630     {
631     float32x2x2_t tmp;
632     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
633     vQ1zx = tmp.val[0];
634 
635     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
636     vQ2zx = tmp.val[0];
637     }
638     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
639 
640     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
641 
642     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
643     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
644 
645     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
646     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
647 
648 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
649     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
650 
651     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
652     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
653 
654 	A1 = vmulq_f32(A1, B1);
655 	A2 = vmulq_f32(A2, B2);
656 	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
657 	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
658 
659 	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
660 	A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3
661 
662     //	change the sign of the last element
663     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
664 	A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
665 
666 	return btQuaternion(A0);
667 
668 #else
669 	return btQuaternion(
670         q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
671 		q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
672 		q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
673 		q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
674 #endif
675 }
676 
677 SIMD_FORCE_INLINE btQuaternion
678 operator*(const btQuaternion& q, const btVector3& w)
679 {
680 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
681 	__m128 vQ1 = q.get128();
682 	__m128 vQ2 = w.get128();
683 	__m128 A1, B1, A2, B2, A3, B3;
684 
685 	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
686 	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
687 
688 	A1 = A1 * B1;
689 
690 	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
691 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
692 
693 	A2 = A2 * B2;
694 
695 	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
696 	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
697 
698 	A3 = A3 * B3;	//	A3 *= B3
699 
700 	A1 = A1 + A2;	//	AB12
701 	A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
702     A1 = A1 - A3;	//	AB123 = AB12 - AB3
703 
704 	return btQuaternion(A1);
705 
706 #elif defined(BT_USE_NEON)
707 
708 	float32x4_t vQ1 = q.get128();
709 	float32x4_t vQ2 = w.get128();
710 	float32x4_t A1, B1, A2, B2, A3, B3;
711     float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
712 
713     vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
714     {
715     float32x2x2_t tmp;
716 
717     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
718     vQ2zx = tmp.val[0];
719 
720     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
721     vQ1zx = tmp.val[0];
722     }
723 
724     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
725 
726     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
727     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
728 
729     A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W  W X
730     B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                    // X Y  z x
731 
732 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
733     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
734 
735     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
736     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
737 
738 	A1 = vmulq_f32(A1, B1);
739 	A2 = vmulq_f32(A2, B2);
740 	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
741 
742 	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
743 
744     //	change the sign of the last element
745     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
746 
747     A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
748 
749 	return btQuaternion(A1);
750 
751 #else
752 	return btQuaternion(
753          q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
754 		 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
755 		 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
756 		-q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
757 #endif
758 }
759 
760 SIMD_FORCE_INLINE btQuaternion
761 operator*(const btVector3& w, const btQuaternion& q)
762 {
763 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
764 	__m128 vQ1 = w.get128();
765 	__m128 vQ2 = q.get128();
766 	__m128 A1, B1, A2, B2, A3, B3;
767 
768 	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));  // X Y  z x
769 	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));  // W W  W X
770 
771 	A1 = A1 * B1;
772 
773 	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
774 	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
775 
776 	A2 = A2 *B2;
777 
778 	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
779 	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
780 
781 	A3 = A3 * B3;	//	A3 *= B3
782 
783 	A1 = A1 + A2;	//	AB12
784 	A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
785 	A1 = A1 - A3;	//	AB123 = AB12 - AB3
786 
787 	return btQuaternion(A1);
788 
789 #elif defined(BT_USE_NEON)
790 
791 	float32x4_t vQ1 = w.get128();
792 	float32x4_t vQ2 = q.get128();
793 	float32x4_t  A1, B1, A2, B2, A3, B3;
794     float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
795 
796     {
797     float32x2x2_t tmp;
798 
799     tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
800     vQ1zx = tmp.val[0];
801 
802     tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
803     vQ2zx = tmp.val[0];
804     }
805     vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
806 
807     vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
808 
809     vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
810     vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
811 
812     A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x
813     B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X
814 
815 	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
816     B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
817 
818     A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
819     B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
820 
821 	A1 = vmulq_f32(A1, B1);
822 	A2 = vmulq_f32(A2, B2);
823 	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
824 
825 	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
826 
827     //	change the sign of the last element
828     A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
829 
830     A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
831 
832 	return btQuaternion(A1);
833 
834 #else
835 	return btQuaternion(
836         +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
837 		+w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
838 		+w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
839 		-w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
840 #endif
841 }
842 
843 /**@brief Calculate the dot product between two quaternions */
844 SIMD_FORCE_INLINE btScalar
dot(const btQuaternion & q1,const btQuaternion & q2)845 dot(const btQuaternion& q1, const btQuaternion& q2)
846 {
847 	return q1.dot(q2);
848 }
849 
850 
851 /**@brief Return the length of a quaternion */
852 SIMD_FORCE_INLINE btScalar
length(const btQuaternion & q)853 length(const btQuaternion& q)
854 {
855 	return q.length();
856 }
857 
858 /**@brief Return the angle between two quaternions*/
859 SIMD_FORCE_INLINE btScalar
btAngle(const btQuaternion & q1,const btQuaternion & q2)860 btAngle(const btQuaternion& q1, const btQuaternion& q2)
861 {
862 	return q1.angle(q2);
863 }
864 
865 /**@brief Return the inverse of a quaternion*/
866 SIMD_FORCE_INLINE btQuaternion
inverse(const btQuaternion & q)867 inverse(const btQuaternion& q)
868 {
869 	return q.inverse();
870 }
871 
872 /**@brief Return the result of spherical linear interpolation betwen two quaternions
873  * @param q1 The first quaternion
874  * @param q2 The second quaternion
875  * @param t The ration between q1 and q2.  t = 0 return q1, t=1 returns q2
876  * Slerp assumes constant velocity between positions. */
877 SIMD_FORCE_INLINE btQuaternion
slerp(const btQuaternion & q1,const btQuaternion & q2,const btScalar & t)878 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
879 {
880 	return q1.slerp(q2, t);
881 }
882 
883 SIMD_FORCE_INLINE btVector3
quatRotate(const btQuaternion & rotation,const btVector3 & v)884 quatRotate(const btQuaternion& rotation, const btVector3& v)
885 {
886 	btQuaternion q = rotation * v;
887 	q *= rotation.inverse();
888 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
889 	return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
890 #elif defined(BT_USE_NEON)
891     return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
892 #else
893 	return btVector3(q.getX(),q.getY(),q.getZ());
894 #endif
895 }
896 
897 SIMD_FORCE_INLINE btQuaternion
shortestArcQuat(const btVector3 & v0,const btVector3 & v1)898 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
899 {
900 	btVector3 c = v0.cross(v1);
901 	btScalar  d = v0.dot(v1);
902 
903 	if (d < -1.0 + SIMD_EPSILON)
904 	{
905 		btVector3 n,unused;
906 		btPlaneSpace1(v0,n,unused);
907 		return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
908 	}
909 
910 	btScalar  s = btSqrt((1.0f + d) * 2.0f);
911 	btScalar rs = 1.0f / s;
912 
913 	return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
914 }
915 
916 SIMD_FORCE_INLINE btQuaternion
shortestArcQuatNormalize2(btVector3 & v0,btVector3 & v1)917 shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
918 {
919 	v0.normalize();
920 	v1.normalize();
921 	return shortestArcQuat(v0,v1);
922 }
923 
924 
925 
926 
927 struct	btQuaternionFloatData
928 {
929 	float	m_floats[4];
930 };
931 
932 struct	btQuaternionDoubleData
933 {
934 	double	m_floats[4];
935 
936 };
937 
serializeFloat(struct btQuaternionFloatData & dataOut)938 SIMD_FORCE_INLINE	void	btQuaternion::serializeFloat(struct	btQuaternionFloatData& dataOut) const
939 {
940 	///could also do a memcpy, check if it is worth it
941 	for (int i=0;i<4;i++)
942 		dataOut.m_floats[i] = float(m_floats[i]);
943 }
944 
deSerializeFloat(const struct btQuaternionFloatData & dataIn)945 SIMD_FORCE_INLINE void	btQuaternion::deSerializeFloat(const struct	btQuaternionFloatData& dataIn)
946 {
947 	for (int i=0;i<4;i++)
948 		m_floats[i] = btScalar(dataIn.m_floats[i]);
949 }
950 
951 
serializeDouble(struct btQuaternionDoubleData & dataOut)952 SIMD_FORCE_INLINE	void	btQuaternion::serializeDouble(struct	btQuaternionDoubleData& dataOut) const
953 {
954 	///could also do a memcpy, check if it is worth it
955 	for (int i=0;i<4;i++)
956 		dataOut.m_floats[i] = double(m_floats[i]);
957 }
958 
deSerializeDouble(const struct btQuaternionDoubleData & dataIn)959 SIMD_FORCE_INLINE void	btQuaternion::deSerializeDouble(const struct	btQuaternionDoubleData& dataIn)
960 {
961 	for (int i=0;i<4;i++)
962 		m_floats[i] = btScalar(dataIn.m_floats[i]);
963 }
964 
965 
serialize(struct btQuaternionData & dataOut)966 SIMD_FORCE_INLINE	void	btQuaternion::serialize(struct	btQuaternionData& dataOut) const
967 {
968 	///could also do a memcpy, check if it is worth it
969 	for (int i=0;i<4;i++)
970 		dataOut.m_floats[i] = m_floats[i];
971 }
972 
deSerialize(const struct btQuaternionData & dataIn)973 SIMD_FORCE_INLINE void	btQuaternion::deSerialize(const struct	btQuaternionData& dataIn)
974 {
975 	for (int i=0;i<4;i++)
976 		m_floats[i] = dataIn.m_floats[i];
977 }
978 
979 
980 #endif //BT_SIMD__QUATERNION_H_
981 
982 
983 
984