• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans  http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 
17 #ifndef BT_VECTOR3_H
18 #define BT_VECTOR3_H
19 
20 //#include <stdint.h>
21 #include "btScalar.h"
22 #include "btMinMax.h"
23 #include "btAlignedAllocator.h"
24 
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btVector3Data btVector3DoubleData
27 #define btVector3DataName "btVector3DoubleData"
28 #else
29 #define btVector3Data btVector3FloatData
30 #define btVector3DataName "btVector3FloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
32 
33 #if defined BT_USE_SSE
34 
35 //typedef  uint32_t __m128i __attribute__ ((vector_size(16)));
36 
37 #ifdef _MSC_VER
38 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
39 #endif
40 
41 
42 #define BT_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
43 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
44 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
45 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
46 #define bt_splat_ps( _a, _i )  bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
47 
48 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
49 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
50 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
51 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
52 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
53 #define btvxyzMaskf btvFFF0fMask
54 #define btvAbsfMask btCastiTo128f(btvAbsMask)
55 
56 //there is an issue with XCode 3.2 (LCx errors)
57 #define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f))
58 #define v1110		 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f))
59 #define vHalf		 (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f))
60 #define v1_5		 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f))
61 
62 //const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
63 //const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
64 //const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
65 //const __m128 ATTRIBUTE_ALIGNED16(v1_5)  = {1.5f, 1.5f, 1.5f, 1.5f};
66 
67 #endif
68 
69 #ifdef BT_USE_NEON
70 
ATTRIBUTE_ALIGNED16(btvMzeroMask)71 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
ATTRIBUTE_ALIGNED16(btvFFF0Mask)72 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF),
73 	static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0};
ATTRIBUTE_ALIGNED16(btvAbsMask)74 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
ATTRIBUTE_ALIGNED16(btv3AbsMask)75 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
76 
77 #endif
78 
79 /**@brief btVector3 can be used to represent 3D points and vectors.
80  * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
81  * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
82  */
ATTRIBUTE_ALIGNED16(class)83 ATTRIBUTE_ALIGNED16(class) btVector3
84 {
85 public:
86 
87 	BT_DECLARE_ALIGNED_ALLOCATOR();
88 
89 #if defined (__SPU__) && defined (__CELLOS_LV2__)
90 		btScalar	m_floats[4];
91 public:
92 	SIMD_FORCE_INLINE const vec_float4&	get128() const
93 	{
94 		return *((const vec_float4*)&m_floats[0]);
95 	}
96 public:
97 #else //__CELLOS_LV2__ __SPU__
98     #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
99         union {
100             btSimdFloat4      mVec128;
101             btScalar	m_floats[4];
102         };
103         SIMD_FORCE_INLINE	btSimdFloat4	get128() const
104         {
105             return mVec128;
106         }
107         SIMD_FORCE_INLINE	void	set128(btSimdFloat4 v128)
108         {
109             mVec128 = v128;
110         }
111     #else
112         btScalar	m_floats[4];
113     #endif
114 #endif //__CELLOS_LV2__ __SPU__
115 
116 	public:
117 
118   /**@brief No initialization constructor */
119 	SIMD_FORCE_INLINE btVector3()
120 	{
121 
122 	}
123 
124 
125 
126   /**@brief Constructor from scalars
127    * @param x X value
128    * @param y Y value
129    * @param z Z value
130    */
131 	SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
132 	{
133 		m_floats[0] = _x;
134 		m_floats[1] = _y;
135 		m_floats[2] = _z;
136 		m_floats[3] = btScalar(0.f);
137 	}
138 
139 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
140 	// Set Vector
141 	SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
142 	{
143 		mVec128 = v;
144 	}
145 
146 	// Copy constructor
147 	SIMD_FORCE_INLINE btVector3(const btVector3& rhs)
148 	{
149 		mVec128 = rhs.mVec128;
150 	}
151 
152 	// Assignment Operator
153 	SIMD_FORCE_INLINE btVector3&
154 	operator=(const btVector3& v)
155 	{
156 		mVec128 = v.mVec128;
157 
158 		return *this;
159 	}
160 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
161 
162 /**@brief Add a vector to this one
163  * @param The vector to add to this one */
164 	SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
165 	{
166 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
167 		mVec128 = _mm_add_ps(mVec128, v.mVec128);
168 #elif defined(BT_USE_NEON)
169 		mVec128 = vaddq_f32(mVec128, v.mVec128);
170 #else
171 		m_floats[0] += v.m_floats[0];
172 		m_floats[1] += v.m_floats[1];
173 		m_floats[2] += v.m_floats[2];
174 #endif
175 		return *this;
176 	}
177 
178 
179   /**@brief Subtract a vector from this one
180    * @param The vector to subtract */
181 	SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v)
182 	{
183 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
184 		mVec128 = _mm_sub_ps(mVec128, v.mVec128);
185 #elif defined(BT_USE_NEON)
186 		mVec128 = vsubq_f32(mVec128, v.mVec128);
187 #else
188 		m_floats[0] -= v.m_floats[0];
189 		m_floats[1] -= v.m_floats[1];
190 		m_floats[2] -= v.m_floats[2];
191 #endif
192 		return *this;
193 	}
194 
195   /**@brief Scale the vector
196    * @param s Scale factor */
197 	SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
198 	{
199 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
200 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
201 		vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
202 		mVec128 = _mm_mul_ps(mVec128, vs);
203 #elif defined(BT_USE_NEON)
204 		mVec128 = vmulq_n_f32(mVec128, s);
205 #else
206 		m_floats[0] *= s;
207 		m_floats[1] *= s;
208 		m_floats[2] *= s;
209 #endif
210 		return *this;
211 	}
212 
213   /**@brief Inversely scale the vector
214    * @param s Scale factor to divide by */
215 	SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s)
216 	{
217 		btFullAssert(s != btScalar(0.0));
218 
219 #if 0 //defined(BT_USE_SSE_IN_API)
220 // this code is not faster !
221 		__m128 vs = _mm_load_ss(&s);
222 		vs = _mm_div_ss(v1110, vs);
223 		vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
224 
225 		mVec128 = _mm_mul_ps(mVec128, vs);
226 
227 		return *this;
228 #else
229 		return *this *= btScalar(1.0) / s;
230 #endif
231 	}
232 
233   /**@brief Return the dot product
234    * @param v The other vector in the dot product */
235 	SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
236 	{
237 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
238 		__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
239 		__m128 z = _mm_movehl_ps(vd, vd);
240 		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
241 		vd = _mm_add_ss(vd, y);
242 		vd = _mm_add_ss(vd, z);
243 		return _mm_cvtss_f32(vd);
244 #elif defined(BT_USE_NEON)
245 		float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
246 		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
247 		x = vadd_f32(x, vget_high_f32(vd));
248 		return vget_lane_f32(x, 0);
249 #else
250 		return	m_floats[0] * v.m_floats[0] +
251 				m_floats[1] * v.m_floats[1] +
252 				m_floats[2] * v.m_floats[2];
253 #endif
254 	}
255 
256   /**@brief Return the length of the vector squared */
257 	SIMD_FORCE_INLINE btScalar length2() const
258 	{
259 		return dot(*this);
260 	}
261 
262   /**@brief Return the length of the vector */
263 	SIMD_FORCE_INLINE btScalar length() const
264 	{
265 		return btSqrt(length2());
266 	}
267 
268 	/**@brief Return the norm (length) of the vector */
269 	SIMD_FORCE_INLINE btScalar norm() const
270 	{
271 		return length();
272 	}
273 
274   /**@brief Return the distance squared between the ends of this and another vector
275    * This is symantically treating the vector like a point */
276 	SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
277 
278   /**@brief Return the distance between the ends of this and another vector
279    * This is symantically treating the vector like a point */
280 	SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
281 
282 	SIMD_FORCE_INLINE btVector3& safeNormalize()
283 	{
284 		btVector3 absVec = this->absolute();
285 		int maxIndex = absVec.maxAxis();
286 		if (absVec[maxIndex]>0)
287 		{
288 			*this /= absVec[maxIndex];
289 			return *this /= length();
290 		}
291 		setValue(1,0,0);
292 		return *this;
293 	}
294 
295   /**@brief Normalize this vector
296    * x^2 + y^2 + z^2 = 1 */
297 	SIMD_FORCE_INLINE btVector3& normalize()
298 	{
299 
300 		btAssert(!fuzzyZero());
301 
302 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
303         // dot product first
304 		__m128 vd = _mm_mul_ps(mVec128, mVec128);
305 		__m128 z = _mm_movehl_ps(vd, vd);
306 		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
307 		vd = _mm_add_ss(vd, y);
308 		vd = _mm_add_ss(vd, z);
309 
310         #if 0
311         vd = _mm_sqrt_ss(vd);
312 		vd = _mm_div_ss(v1110, vd);
313 		vd = bt_splat_ps(vd, 0x80);
314 		mVec128 = _mm_mul_ps(mVec128, vd);
315         #else
316 
317         // NR step 1/sqrt(x) - vd is x, y is output
318         y = _mm_rsqrt_ss(vd); // estimate
319 
320         //  one step NR
321         z = v1_5;
322         vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
323         //x2 = vd;
324         vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
325         vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
326         z = _mm_sub_ss(z, vd);  // 1.5 - vd * 0.5 * y0 * y0
327 
328         y = _mm_mul_ss(y, z);   // y0 * (1.5 - vd * 0.5 * y0 * y0)
329 
330 		y = bt_splat_ps(y, 0x80);
331 		mVec128 = _mm_mul_ps(mVec128, y);
332 
333         #endif
334 
335 
336 		return *this;
337 #else
338 		return *this /= length();
339 #endif
340 	}
341 
342   /**@brief Return a normalized version of this vector */
343 	SIMD_FORCE_INLINE btVector3 normalized() const;
344 
345   /**@brief Return a rotated version of this vector
346    * @param wAxis The axis to rotate about
347    * @param angle The angle to rotate by */
348 	SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
349 
350   /**@brief Return the angle between this and another vector
351    * @param v The other vector */
352 	SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const
353 	{
354 		btScalar s = btSqrt(length2() * v.length2());
355 		btFullAssert(s != btScalar(0.0));
356 		return btAcos(dot(v) / s);
357 	}
358 
359   /**@brief Return a vector will the absolute values of each element */
360 	SIMD_FORCE_INLINE btVector3 absolute() const
361 	{
362 
363 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
364 		return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
365 #elif defined(BT_USE_NEON)
366 		return btVector3(vabsq_f32(mVec128));
367 #else
368 		return btVector3(
369 			btFabs(m_floats[0]),
370 			btFabs(m_floats[1]),
371 			btFabs(m_floats[2]));
372 #endif
373 	}
374 
375   /**@brief Return the cross product between this and another vector
376    * @param v The other vector */
377 	SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
378 	{
379 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
380 		__m128	T, V;
381 
382 		T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
383 		V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
384 
385 		V = _mm_mul_ps(V, mVec128);
386 		T = _mm_mul_ps(T, v.mVec128);
387 		V = _mm_sub_ps(V, T);
388 
389 		V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
390 		return btVector3(V);
391 #elif defined(BT_USE_NEON)
392 		float32x4_t T, V;
393 		// form (Y, Z, X, _) of mVec128 and v.mVec128
394 		float32x2_t Tlow = vget_low_f32(mVec128);
395 		float32x2_t Vlow = vget_low_f32(v.mVec128);
396 		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
397 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
398 
399 		V = vmulq_f32(V, mVec128);
400 		T = vmulq_f32(T, v.mVec128);
401 		V = vsubq_f32(V, T);
402 		Vlow = vget_low_f32(V);
403 		// form (Y, Z, X, _);
404 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
405 		V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
406 
407 		return btVector3(V);
408 #else
409 		return btVector3(
410 			m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
411 			m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
412 			m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
413 #endif
414 	}
415 
416 	SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
417 	{
418 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
419 		// cross:
420 		__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
421 		__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
422 
423 		V = _mm_mul_ps(V, v1.mVec128);
424 		T = _mm_mul_ps(T, v2.mVec128);
425 		V = _mm_sub_ps(V, T);
426 
427 		V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
428 
429 		// dot:
430 		V = _mm_mul_ps(V, mVec128);
431 		__m128 z = _mm_movehl_ps(V, V);
432 		__m128 y = _mm_shuffle_ps(V, V, 0x55);
433 		V = _mm_add_ss(V, y);
434 		V = _mm_add_ss(V, z);
435 		return _mm_cvtss_f32(V);
436 
437 #elif defined(BT_USE_NEON)
438 		// cross:
439 		float32x4_t T, V;
440 		// form (Y, Z, X, _) of mVec128 and v.mVec128
441 		float32x2_t Tlow = vget_low_f32(v1.mVec128);
442 		float32x2_t Vlow = vget_low_f32(v2.mVec128);
443 		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
444 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
445 
446 		V = vmulq_f32(V, v1.mVec128);
447 		T = vmulq_f32(T, v2.mVec128);
448 		V = vsubq_f32(V, T);
449 		Vlow = vget_low_f32(V);
450 		// form (Y, Z, X, _);
451 		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
452 
453 		// dot:
454 		V = vmulq_f32(mVec128, V);
455 		float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
456 		x = vadd_f32(x, vget_high_f32(V));
457 		return vget_lane_f32(x, 0);
458 #else
459 		return
460 			m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
461 			m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
462 			m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
463 #endif
464 	}
465 
466   /**@brief Return the axis with the smallest value
467    * Note return values are 0,1,2 for x, y, or z */
468 	SIMD_FORCE_INLINE int minAxis() const
469 	{
470 		return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
471 	}
472 
473   /**@brief Return the axis with the largest value
474    * Note return values are 0,1,2 for x, y, or z */
475 	SIMD_FORCE_INLINE int maxAxis() const
476 	{
477 		return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
478 	}
479 
480 	SIMD_FORCE_INLINE int furthestAxis() const
481 	{
482 		return absolute().minAxis();
483 	}
484 
485 	SIMD_FORCE_INLINE int closestAxis() const
486 	{
487 		return absolute().maxAxis();
488 	}
489 
490 
491 	SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
492 	{
493 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
494 		__m128	vrt = _mm_load_ss(&rt);	//	(rt 0 0 0)
495 		btScalar s = btScalar(1.0) - rt;
496 		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
497 		vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
498 		__m128 r0 = _mm_mul_ps(v0.mVec128, vs);
499 		vrt = bt_pshufd_ps(vrt, 0x80);	//	(rt rt rt 0.0)
500 		__m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
501 		__m128 tmp3 = _mm_add_ps(r0,r1);
502 		mVec128 = tmp3;
503 #elif defined(BT_USE_NEON)
504 		float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
505 		vl = vmulq_n_f32(vl, rt);
506 		mVec128 = vaddq_f32(vl, v0.mVec128);
507 #else
508 		btScalar s = btScalar(1.0) - rt;
509 		m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
510 		m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
511 		m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
512 		//don't do the unused w component
513 		//		m_co[3] = s * v0[3] + rt * v1[3];
514 #endif
515 	}
516 
517   /**@brief Return the linear interpolation between this and another vector
518    * @param v The other vector
519    * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
520 	SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const
521 	{
522 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
523 		__m128	vt = _mm_load_ss(&t);	//	(t 0 0 0)
524 		vt = bt_pshufd_ps(vt, 0x80);	//	(rt rt rt 0.0)
525 		__m128 vl = _mm_sub_ps(v.mVec128, mVec128);
526 		vl = _mm_mul_ps(vl, vt);
527 		vl = _mm_add_ps(vl, mVec128);
528 
529 		return btVector3(vl);
530 #elif defined(BT_USE_NEON)
531 		float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
532 		vl = vmulq_n_f32(vl, t);
533 		vl = vaddq_f32(vl, mVec128);
534 
535 		return btVector3(vl);
536 #else
537 		return
538 			btVector3(	m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
539 						m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
540 						m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
541 #endif
542 	}
543 
544   /**@brief Elementwise multiply this vector by the other
545    * @param v The other vector */
546 	SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
547 	{
548 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
549 		mVec128 = _mm_mul_ps(mVec128, v.mVec128);
550 #elif defined(BT_USE_NEON)
551 		mVec128 = vmulq_f32(mVec128, v.mVec128);
552 #else
553 		m_floats[0] *= v.m_floats[0];
554 		m_floats[1] *= v.m_floats[1];
555 		m_floats[2] *= v.m_floats[2];
556 #endif
557 		return *this;
558 	}
559 
560 	 /**@brief Return the x value */
561 		SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
562   /**@brief Return the y value */
563 		SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
564   /**@brief Return the z value */
565 		SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
566   /**@brief Set the x value */
567 		SIMD_FORCE_INLINE void	setX(btScalar _x) { m_floats[0] = _x;};
568   /**@brief Set the y value */
569 		SIMD_FORCE_INLINE void	setY(btScalar _y) { m_floats[1] = _y;};
570   /**@brief Set the z value */
571 		SIMD_FORCE_INLINE void	setZ(btScalar _z) { m_floats[2] = _z;};
572   /**@brief Set the w value */
573 		SIMD_FORCE_INLINE void	setW(btScalar _w) { m_floats[3] = _w;};
574   /**@brief Return the x value */
575 		SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
576   /**@brief Return the y value */
577 		SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
578   /**@brief Return the z value */
579 		SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
580   /**@brief Return the w value */
581 		SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
582 
583 	//SIMD_FORCE_INLINE btScalar&       operator[](int i)       { return (&m_floats[0])[i];	}
584 	//SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
585 	///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
586 	SIMD_FORCE_INLINE	operator       btScalar *()       { return &m_floats[0]; }
587 	SIMD_FORCE_INLINE	operator const btScalar *() const { return &m_floats[0]; }
588 
589 	SIMD_FORCE_INLINE	bool	operator==(const btVector3& other) const
590 	{
591 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
592         return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
593 #else
594 		return ((m_floats[3]==other.m_floats[3]) &&
595                 (m_floats[2]==other.m_floats[2]) &&
596                 (m_floats[1]==other.m_floats[1]) &&
597                 (m_floats[0]==other.m_floats[0]));
598 #endif
599 	}
600 
601 	SIMD_FORCE_INLINE	bool	operator!=(const btVector3& other) const
602 	{
603 		return !(*this == other);
604 	}
605 
606   /**@brief Set each element to the max of the current values and the values of another btVector3
607    * @param other The other btVector3 to compare with
608    */
609 	SIMD_FORCE_INLINE void	setMax(const btVector3& other)
610 	{
611 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
612 		mVec128 = _mm_max_ps(mVec128, other.mVec128);
613 #elif defined(BT_USE_NEON)
614 		mVec128 = vmaxq_f32(mVec128, other.mVec128);
615 #else
616 		btSetMax(m_floats[0], other.m_floats[0]);
617 		btSetMax(m_floats[1], other.m_floats[1]);
618 		btSetMax(m_floats[2], other.m_floats[2]);
619 		btSetMax(m_floats[3], other.w());
620 #endif
621 	}
622 
623   /**@brief Set each element to the min of the current values and the values of another btVector3
624    * @param other The other btVector3 to compare with
625    */
626 	SIMD_FORCE_INLINE void	setMin(const btVector3& other)
627 	{
628 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
629 		mVec128 = _mm_min_ps(mVec128, other.mVec128);
630 #elif defined(BT_USE_NEON)
631 		mVec128 = vminq_f32(mVec128, other.mVec128);
632 #else
633 		btSetMin(m_floats[0], other.m_floats[0]);
634 		btSetMin(m_floats[1], other.m_floats[1]);
635 		btSetMin(m_floats[2], other.m_floats[2]);
636 		btSetMin(m_floats[3], other.w());
637 #endif
638 	}
639 
640 	SIMD_FORCE_INLINE void 	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
641 	{
642 		m_floats[0]=_x;
643 		m_floats[1]=_y;
644 		m_floats[2]=_z;
645 		m_floats[3] = btScalar(0.f);
646 	}
647 
648 	void	getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
649 	{
650 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
651 
652 		__m128 V  = _mm_and_ps(mVec128, btvFFF0fMask);
653 		__m128 V0 = _mm_xor_ps(btvMzeroMask, V);
654 		__m128 V2 = _mm_movelh_ps(V0, V);
655 
656 		__m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
657 
658         V0 = _mm_shuffle_ps(V0, V, 0xDB);
659 		V2 = _mm_shuffle_ps(V2, V, 0xF9);
660 
661 		v0->mVec128 = V0;
662 		v1->mVec128 = V1;
663 		v2->mVec128 = V2;
664 #else
665 		v0->setValue(0.		,-z()		,y());
666 		v1->setValue(z()	,0.			,-x());
667 		v2->setValue(-y()	,x()	,0.);
668 #endif
669 	}
670 
671 	void setZero()
672 	{
673 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
674 		mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
675 #elif defined(BT_USE_NEON)
676 		int32x4_t vi = vdupq_n_s32(0);
677 		mVec128 = vreinterpretq_f32_s32(vi);
678 #else
679 		setValue(btScalar(0.),btScalar(0.),btScalar(0.));
680 #endif
681 	}
682 
683 	SIMD_FORCE_INLINE bool isZero() const
684 	{
685 		return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
686 	}
687 
688 
689 	SIMD_FORCE_INLINE bool fuzzyZero() const
690 	{
691 		return length2() < SIMD_EPSILON*SIMD_EPSILON;
692 	}
693 
694 	SIMD_FORCE_INLINE	void	serialize(struct	btVector3Data& dataOut) const;
695 
696 	SIMD_FORCE_INLINE	void	deSerialize(const struct	btVector3Data& dataIn);
697 
698 	SIMD_FORCE_INLINE	void	serializeFloat(struct	btVector3FloatData& dataOut) const;
699 
700 	SIMD_FORCE_INLINE	void	deSerializeFloat(const struct	btVector3FloatData& dataIn);
701 
702 	SIMD_FORCE_INLINE	void	serializeDouble(struct	btVector3DoubleData& dataOut) const;
703 
704 	SIMD_FORCE_INLINE	void	deSerializeDouble(const struct	btVector3DoubleData& dataIn);
705 
706         /**@brief returns index of maximum dot product between this and vectors in array[]
707          * @param array The other vectors
708          * @param array_count The number of other vectors
709          * @param dotOut The maximum dot product */
710         SIMD_FORCE_INLINE   long    maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
711 
712         /**@brief returns index of minimum dot product between this and vectors in array[]
713          * @param array The other vectors
714          * @param array_count The number of other vectors
715          * @param dotOut The minimum dot product */
716         SIMD_FORCE_INLINE   long    minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
717 
718     /* create a vector as  btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 ))  */
719     SIMD_FORCE_INLINE btVector3  dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
720     {
721 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
722 
723         __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
724         __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
725         __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
726         __m128 b0 = _mm_unpacklo_ps( a0, a1 );
727         __m128 b1 = _mm_unpackhi_ps( a0, a1 );
728         __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
729         __m128 r = _mm_movelh_ps( b0, b2 );
730         r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
731         a2 = _mm_and_ps( a2, btvxyzMaskf);
732         r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
733         return btVector3(r);
734 
735 #elif defined(BT_USE_NEON)
736         static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
737         float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
738         float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
739         float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
740         float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
741         a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
742         float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
743         float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
744         return btVector3( vcombine_f32(b0, b1) );
745 #else
746 		return btVector3( dot(v0), dot(v1), dot(v2));
747 #endif
748     }
749 };
750 
751 /**@brief Return the sum of two vectors (Point symantics)*/
752 SIMD_FORCE_INLINE btVector3
753 operator+(const btVector3& v1, const btVector3& v2)
754 {
755 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
756 	return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
757 #elif defined(BT_USE_NEON)
758 	return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
759 #else
760 	return btVector3(
761 			v1.m_floats[0] + v2.m_floats[0],
762 			v1.m_floats[1] + v2.m_floats[1],
763 			v1.m_floats[2] + v2.m_floats[2]);
764 #endif
765 }
766 
767 /**@brief Return the elementwise product of two vectors */
768 SIMD_FORCE_INLINE btVector3
769 operator*(const btVector3& v1, const btVector3& v2)
770 {
771 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
772 	return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
773 #elif defined(BT_USE_NEON)
774 	return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
775 #else
776 	return btVector3(
777 			v1.m_floats[0] * v2.m_floats[0],
778 			v1.m_floats[1] * v2.m_floats[1],
779 			v1.m_floats[2] * v2.m_floats[2]);
780 #endif
781 }
782 
783 /**@brief Return the difference between two vectors */
784 SIMD_FORCE_INLINE btVector3
785 operator-(const btVector3& v1, const btVector3& v2)
786 {
787 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))
788 
789 	//	without _mm_and_ps this code causes slowdown in Concave moving
790 	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
791 	return btVector3(_mm_and_ps(r, btvFFF0fMask));
792 #elif defined(BT_USE_NEON)
793 	float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
794 	return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
795 #else
796 	return btVector3(
797 			v1.m_floats[0] - v2.m_floats[0],
798 			v1.m_floats[1] - v2.m_floats[1],
799 			v1.m_floats[2] - v2.m_floats[2]);
800 #endif
801 }
802 
803 /**@brief Return the negative of the vector */
804 SIMD_FORCE_INLINE btVector3
805 operator-(const btVector3& v)
806 {
807 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
808 	__m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
809 	return btVector3(_mm_and_ps(r, btvFFF0fMask));
810 #elif defined(BT_USE_NEON)
811 	return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
812 #else
813 	return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
814 #endif
815 }
816 
817 /**@brief Return the vector scaled by s */
818 SIMD_FORCE_INLINE btVector3
819 operator*(const btVector3& v, const btScalar& s)
820 {
821 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
822 	__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
823 	vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
824 	return btVector3(_mm_mul_ps(v.mVec128, vs));
825 #elif defined(BT_USE_NEON)
826 	float32x4_t r = vmulq_n_f32(v.mVec128, s);
827 	return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
828 #else
829 	return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
830 #endif
831 }
832 
833 /**@brief Return the vector scaled by s */
834 SIMD_FORCE_INLINE btVector3
835 operator*(const btScalar& s, const btVector3& v)
836 {
837 	return v * s;
838 }
839 
840 /**@brief Return the vector inversely scaled by s */
841 SIMD_FORCE_INLINE btVector3
842 operator/(const btVector3& v, const btScalar& s)
843 {
844 	btFullAssert(s != btScalar(0.0));
845 #if 0 //defined(BT_USE_SSE_IN_API)
846 // this code is not faster !
847 	__m128 vs = _mm_load_ss(&s);
848     vs = _mm_div_ss(v1110, vs);
849 	vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
850 
851 	return btVector3(_mm_mul_ps(v.mVec128, vs));
852 #else
853 	return v * (btScalar(1.0) / s);
854 #endif
855 }
856 
857 /**@brief Return the vector inversely scaled by s */
858 SIMD_FORCE_INLINE btVector3
859 operator/(const btVector3& v1, const btVector3& v2)
860 {
861 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
862 	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
863 	vec = _mm_and_ps(vec, btvFFF0fMask);
864 	return btVector3(vec);
865 #elif defined(BT_USE_NEON)
866 	float32x4_t x, y, v, m;
867 
868 	x = v1.mVec128;
869 	y = v2.mVec128;
870 
871 	v = vrecpeq_f32(y);			// v ~ 1/y
872 	m = vrecpsq_f32(y, v);		// m = (2-v*y)
873 	v = vmulq_f32(v, m);		// vv = v*m ~~ 1/y
874 	m = vrecpsq_f32(y, v);		// mm = (2-vv*y)
875 	v = vmulq_f32(v, x);		// x*vv
876 	v = vmulq_f32(v, m);		// (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
877 
878 	return btVector3(v);
879 #else
880 	return btVector3(
881 			v1.m_floats[0] / v2.m_floats[0],
882 			v1.m_floats[1] / v2.m_floats[1],
883 			v1.m_floats[2] / v2.m_floats[2]);
884 #endif
885 }
886 
887 /**@brief Return the dot product between two vectors */
888 SIMD_FORCE_INLINE btScalar
btDot(const btVector3 & v1,const btVector3 & v2)889 btDot(const btVector3& v1, const btVector3& v2)
890 {
891 	return v1.dot(v2);
892 }
893 
894 
895 /**@brief Return the distance squared between two vectors */
896 SIMD_FORCE_INLINE btScalar
btDistance2(const btVector3 & v1,const btVector3 & v2)897 btDistance2(const btVector3& v1, const btVector3& v2)
898 {
899 	return v1.distance2(v2);
900 }
901 
902 
903 /**@brief Return the distance between two vectors */
904 SIMD_FORCE_INLINE btScalar
btDistance(const btVector3 & v1,const btVector3 & v2)905 btDistance(const btVector3& v1, const btVector3& v2)
906 {
907 	return v1.distance(v2);
908 }
909 
910 /**@brief Return the angle between two vectors */
911 SIMD_FORCE_INLINE btScalar
btAngle(const btVector3 & v1,const btVector3 & v2)912 btAngle(const btVector3& v1, const btVector3& v2)
913 {
914 	return v1.angle(v2);
915 }
916 
917 /**@brief Return the cross product of two vectors */
918 SIMD_FORCE_INLINE btVector3
btCross(const btVector3 & v1,const btVector3 & v2)919 btCross(const btVector3& v1, const btVector3& v2)
920 {
921 	return v1.cross(v2);
922 }
923 
924 SIMD_FORCE_INLINE btScalar
btTriple(const btVector3 & v1,const btVector3 & v2,const btVector3 & v3)925 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
926 {
927 	return v1.triple(v2, v3);
928 }
929 
930 /**@brief Return the linear interpolation between two vectors
931  * @param v1 One vector
932  * @param v2 The other vector
933  * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
934 SIMD_FORCE_INLINE btVector3
lerp(const btVector3 & v1,const btVector3 & v2,const btScalar & t)935 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
936 {
937 	return v1.lerp(v2, t);
938 }
939 
940 
941 
distance2(const btVector3 & v)942 SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const
943 {
944 	return (v - *this).length2();
945 }
946 
distance(const btVector3 & v)947 SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
948 {
949 	return (v - *this).length();
950 }
951 
normalized()952 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
953 {
954 	btVector3 nrm = *this;
955 
956 	return nrm.normalize();
957 }
958 
rotate(const btVector3 & wAxis,const btScalar _angle)959 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
960 {
961 	// wAxis must be a unit lenght vector
962 
963 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
964 
965     __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
966 	btScalar ssin = btSin( _angle );
967     __m128 C = wAxis.cross( mVec128 ).mVec128;
968 	O = _mm_and_ps(O, btvFFF0fMask);
969     btScalar scos = btCos( _angle );
970 
971 	__m128 vsin = _mm_load_ss(&ssin);	//	(S 0 0 0)
972     __m128 vcos = _mm_load_ss(&scos);	//	(S 0 0 0)
973 
974 	__m128 Y = bt_pshufd_ps(O, 0xC9);	//	(Y Z X 0)
975 	__m128 Z = bt_pshufd_ps(O, 0xD2);	//	(Z X Y 0)
976 	O = _mm_add_ps(O, Y);
977 	vsin = bt_pshufd_ps(vsin, 0x80);	//	(S S S 0)
978 	O = _mm_add_ps(O, Z);
979     vcos = bt_pshufd_ps(vcos, 0x80);	//	(S S S 0)
980 
981     vsin = vsin * C;
982 	O = O * wAxis.mVec128;
983 	__m128 X = mVec128 - O;
984 
985     O = O + vsin;
986 	vcos = vcos * X;
987 	O = O + vcos;
988 
989 	return btVector3(O);
990 #else
991 	btVector3 o = wAxis * wAxis.dot( *this );
992 	btVector3 _x = *this - o;
993 	btVector3 _y;
994 
995 	_y = wAxis.cross( *this );
996 
997 	return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
998 #endif
999 }
1000 
maxDot(const btVector3 * array,long array_count,btScalar & dotOut)1001 SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1002 {
1003 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1004     #if defined _WIN32 || defined (BT_USE_SSE)
1005         const long scalar_cutoff = 10;
1006         long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1007     #elif defined BT_USE_NEON
1008         const long scalar_cutoff = 4;
1009         extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1010     #endif
1011     if( array_count < scalar_cutoff )
1012 #endif
1013     {
1014         btScalar maxDot1 = -SIMD_INFINITY;
1015         int i = 0;
1016         int ptIndex = -1;
1017         for( i = 0; i < array_count; i++ )
1018         {
1019             btScalar dot = array[i].dot(*this);
1020 
1021             if( dot > maxDot1 )
1022             {
1023                 maxDot1 = dot;
1024                 ptIndex = i;
1025             }
1026         }
1027 
1028         dotOut = maxDot1;
1029         return ptIndex;
1030     }
1031 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1032     return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1033 #endif
1034 }
1035 
minDot(const btVector3 * array,long array_count,btScalar & dotOut)1036 SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1037 {
1038 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1039     #if defined BT_USE_SSE
1040         const long scalar_cutoff = 10;
1041         long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1042     #elif defined BT_USE_NEON
1043         const long scalar_cutoff = 4;
1044         extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1045     #else
1046         #error unhandled arch!
1047     #endif
1048 
1049     if( array_count < scalar_cutoff )
1050 #endif
1051     {
1052         btScalar  minDot = SIMD_INFINITY;
1053         int i = 0;
1054         int ptIndex = -1;
1055 
1056         for( i = 0; i < array_count; i++ )
1057         {
1058             btScalar dot = array[i].dot(*this);
1059 
1060             if( dot < minDot )
1061             {
1062                 minDot = dot;
1063                 ptIndex = i;
1064             }
1065         }
1066 
1067         dotOut = minDot;
1068 
1069         return ptIndex;
1070     }
1071 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1072     return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1073 #endif//BT_USE_SIMD_VECTOR3
1074 }
1075 
1076 
1077 class btVector4 : public btVector3
1078 {
1079 public:
1080 
btVector4()1081 	SIMD_FORCE_INLINE btVector4() {}
1082 
1083 
btVector4(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1084 	SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1085 		: btVector3(_x,_y,_z)
1086 	{
1087 		m_floats[3] = _w;
1088 	}
1089 
1090 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON)
btVector4(const btSimdFloat4 vec)1091 	SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
1092 	{
1093 		mVec128 = vec;
1094 	}
1095 
btVector4(const btVector3 & rhs)1096 	SIMD_FORCE_INLINE btVector4(const btVector3& rhs)
1097 	{
1098 		mVec128 = rhs.mVec128;
1099 	}
1100 
1101 	SIMD_FORCE_INLINE btVector4&
1102 	operator=(const btVector4& v)
1103 	{
1104 		mVec128 = v.mVec128;
1105 		return *this;
1106 	}
1107 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1108 
absolute4()1109 	SIMD_FORCE_INLINE btVector4 absolute4() const
1110 	{
1111 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
1112 		return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
1113 #elif defined(BT_USE_NEON)
1114 		return btVector4(vabsq_f32(mVec128));
1115 #else
1116 		return btVector4(
1117 			btFabs(m_floats[0]),
1118 			btFabs(m_floats[1]),
1119 			btFabs(m_floats[2]),
1120 			btFabs(m_floats[3]));
1121 #endif
1122 	}
1123 
1124 
getW()1125 	btScalar	getW() const { return m_floats[3];}
1126 
1127 
maxAxis4()1128 		SIMD_FORCE_INLINE int maxAxis4() const
1129 	{
1130 		int maxIndex = -1;
1131 		btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
1132 		if (m_floats[0] > maxVal)
1133 		{
1134 			maxIndex = 0;
1135 			maxVal = m_floats[0];
1136 		}
1137 		if (m_floats[1] > maxVal)
1138 		{
1139 			maxIndex = 1;
1140 			maxVal = m_floats[1];
1141 		}
1142 		if (m_floats[2] > maxVal)
1143 		{
1144 			maxIndex = 2;
1145 			maxVal =m_floats[2];
1146 		}
1147 		if (m_floats[3] > maxVal)
1148 		{
1149 			maxIndex = 3;
1150 			maxVal = m_floats[3];
1151 		}
1152 
1153 		return maxIndex;
1154 	}
1155 
1156 
minAxis4()1157 	SIMD_FORCE_INLINE int minAxis4() const
1158 	{
1159 		int minIndex = -1;
1160 		btScalar minVal = btScalar(BT_LARGE_FLOAT);
1161 		if (m_floats[0] < minVal)
1162 		{
1163 			minIndex = 0;
1164 			minVal = m_floats[0];
1165 		}
1166 		if (m_floats[1] < minVal)
1167 		{
1168 			minIndex = 1;
1169 			minVal = m_floats[1];
1170 		}
1171 		if (m_floats[2] < minVal)
1172 		{
1173 			minIndex = 2;
1174 			minVal =m_floats[2];
1175 		}
1176 		if (m_floats[3] < minVal)
1177 		{
1178 			minIndex = 3;
1179 			minVal = m_floats[3];
1180 		}
1181 
1182 		return minIndex;
1183 	}
1184 
1185 
closestAxis4()1186 	SIMD_FORCE_INLINE int closestAxis4() const
1187 	{
1188 		return absolute4().maxAxis4();
1189 	}
1190 
1191 
1192 
1193 
1194   /**@brief Set x,y,z and zero w
1195    * @param x Value of x
1196    * @param y Value of y
1197    * @param z Value of z
1198    */
1199 
1200 
1201 /*		void getValue(btScalar *m) const
1202 		{
1203 			m[0] = m_floats[0];
1204 			m[1] = m_floats[1];
1205 			m[2] =m_floats[2];
1206 		}
1207 */
1208 /**@brief Set the values
1209    * @param x Value of x
1210    * @param y Value of y
1211    * @param z Value of z
1212    * @param w Value of w
1213    */
setValue(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1214 		SIMD_FORCE_INLINE void	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1215 		{
1216 			m_floats[0]=_x;
1217 			m_floats[1]=_y;
1218 			m_floats[2]=_z;
1219 			m_floats[3]=_w;
1220 		}
1221 
1222 
1223 };
1224 
1225 
1226 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapScalarEndian(const btScalar & sourceVal,btScalar & destVal)1227 SIMD_FORCE_INLINE void	btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
1228 {
1229 	#ifdef BT_USE_DOUBLE_PRECISION
1230 	unsigned char* dest = (unsigned char*) &destVal;
1231 	unsigned char* src  = (unsigned char*) &sourceVal;
1232 	dest[0] = src[7];
1233     dest[1] = src[6];
1234     dest[2] = src[5];
1235     dest[3] = src[4];
1236     dest[4] = src[3];
1237     dest[5] = src[2];
1238     dest[6] = src[1];
1239     dest[7] = src[0];
1240 #else
1241 	unsigned char* dest = (unsigned char*) &destVal;
1242 	unsigned char* src  = (unsigned char*) &sourceVal;
1243 	dest[0] = src[3];
1244     dest[1] = src[2];
1245     dest[2] = src[1];
1246     dest[3] = src[0];
1247 #endif //BT_USE_DOUBLE_PRECISION
1248 }
1249 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapVector3Endian(const btVector3 & sourceVec,btVector3 & destVec)1250 SIMD_FORCE_INLINE void	btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec)
1251 {
1252 	for (int i=0;i<4;i++)
1253 	{
1254 		btSwapScalarEndian(sourceVec[i],destVec[i]);
1255 	}
1256 
1257 }
1258 
1259 ///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btUnSwapVector3Endian(btVector3 & vector)1260 SIMD_FORCE_INLINE void	btUnSwapVector3Endian(btVector3& vector)
1261 {
1262 
1263 	btVector3	swappedVec;
1264 	for (int i=0;i<4;i++)
1265 	{
1266 		btSwapScalarEndian(vector[i],swappedVec[i]);
1267 	}
1268 	vector = swappedVec;
1269 }
1270 
1271 template <class T>
btPlaneSpace1(const T & n,T & p,T & q)1272 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
1273 {
1274   if (btFabs(n[2]) > SIMDSQRT12) {
1275     // choose p in y-z plane
1276     btScalar a = n[1]*n[1] + n[2]*n[2];
1277     btScalar k = btRecipSqrt (a);
1278     p[0] = 0;
1279 	p[1] = -n[2]*k;
1280 	p[2] = n[1]*k;
1281     // set q = n x p
1282     q[0] = a*k;
1283 	q[1] = -n[0]*p[2];
1284 	q[2] = n[0]*p[1];
1285   }
1286   else {
1287     // choose p in x-y plane
1288     btScalar a = n[0]*n[0] + n[1]*n[1];
1289     btScalar k = btRecipSqrt (a);
1290     p[0] = -n[1]*k;
1291 	p[1] = n[0]*k;
1292 	p[2] = 0;
1293     // set q = n x p
1294     q[0] = -n[2]*p[1];
1295 	q[1] = n[2]*p[0];
1296 	q[2] = a*k;
1297   }
1298 }
1299 
1300 
1301 struct	btVector3FloatData
1302 {
1303 	float	m_floats[4];
1304 };
1305 
1306 struct	btVector3DoubleData
1307 {
1308 	double	m_floats[4];
1309 
1310 };
1311 
serializeFloat(struct btVector3FloatData & dataOut)1312 SIMD_FORCE_INLINE	void	btVector3::serializeFloat(struct	btVector3FloatData& dataOut) const
1313 {
1314 	///could also do a memcpy, check if it is worth it
1315 	for (int i=0;i<4;i++)
1316 		dataOut.m_floats[i] = float(m_floats[i]);
1317 }
1318 
deSerializeFloat(const struct btVector3FloatData & dataIn)1319 SIMD_FORCE_INLINE void	btVector3::deSerializeFloat(const struct	btVector3FloatData& dataIn)
1320 {
1321 	for (int i=0;i<4;i++)
1322 		m_floats[i] = btScalar(dataIn.m_floats[i]);
1323 }
1324 
1325 
serializeDouble(struct btVector3DoubleData & dataOut)1326 SIMD_FORCE_INLINE	void	btVector3::serializeDouble(struct	btVector3DoubleData& dataOut) const
1327 {
1328 	///could also do a memcpy, check if it is worth it
1329 	for (int i=0;i<4;i++)
1330 		dataOut.m_floats[i] = double(m_floats[i]);
1331 }
1332 
deSerializeDouble(const struct btVector3DoubleData & dataIn)1333 SIMD_FORCE_INLINE void	btVector3::deSerializeDouble(const struct	btVector3DoubleData& dataIn)
1334 {
1335 	for (int i=0;i<4;i++)
1336 		m_floats[i] = btScalar(dataIn.m_floats[i]);
1337 }
1338 
1339 
serialize(struct btVector3Data & dataOut)1340 SIMD_FORCE_INLINE	void	btVector3::serialize(struct	btVector3Data& dataOut) const
1341 {
1342 	///could also do a memcpy, check if it is worth it
1343 	for (int i=0;i<4;i++)
1344 		dataOut.m_floats[i] = m_floats[i];
1345 }
1346 
deSerialize(const struct btVector3Data & dataIn)1347 SIMD_FORCE_INLINE void	btVector3::deSerialize(const struct	btVector3Data& dataIn)
1348 {
1349 	for (int i=0;i<4;i++)
1350 		m_floats[i] = dataIn.m_floats[i];
1351 }
1352 
1353 #endif //BT_VECTOR3_H
1354