1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14
15
16
17 #ifndef BT_VECTOR3_H
18 #define BT_VECTOR3_H
19
20 //#include <stdint.h>
21 #include "btScalar.h"
22 #include "btMinMax.h"
23 #include "btAlignedAllocator.h"
24
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btVector3Data btVector3DoubleData
27 #define btVector3DataName "btVector3DoubleData"
28 #else
29 #define btVector3Data btVector3FloatData
30 #define btVector3DataName "btVector3FloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
32
33 #if defined BT_USE_SSE
34
35 //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
36
37 #ifdef _MSC_VER
38 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
39 #endif
40
41
42 #define BT_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
43 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
44 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
45 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
46 #define bt_splat_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
47
48 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
49 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
50 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
51 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
52 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
53 #define btvxyzMaskf btvFFF0fMask
54 #define btvAbsfMask btCastiTo128f(btvAbsMask)
55
56 //there is an issue with XCode 3.2 (LCx errors)
57 #define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f))
58 #define v1110 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f))
59 #define vHalf (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f))
60 #define v1_5 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f))
61
62 //const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
63 //const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
64 //const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
65 //const __m128 ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
66
67 #endif
68
69 #ifdef BT_USE_NEON
70
ATTRIBUTE_ALIGNED16(btvMzeroMask)71 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
ATTRIBUTE_ALIGNED16(btvFFF0Mask)72 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF),
73 static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0};
ATTRIBUTE_ALIGNED16(btvAbsMask)74 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
ATTRIBUTE_ALIGNED16(btv3AbsMask)75 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
76
77 #endif
78
79 /**@brief btVector3 can be used to represent 3D points and vectors.
80 * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
81 * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
82 */
ATTRIBUTE_ALIGNED16(class)83 ATTRIBUTE_ALIGNED16(class) btVector3
84 {
85 public:
86
87 BT_DECLARE_ALIGNED_ALLOCATOR();
88
89 #if defined (__SPU__) && defined (__CELLOS_LV2__)
90 btScalar m_floats[4];
91 public:
92 SIMD_FORCE_INLINE const vec_float4& get128() const
93 {
94 return *((const vec_float4*)&m_floats[0]);
95 }
96 public:
97 #else //__CELLOS_LV2__ __SPU__
98 #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
99 union {
100 btSimdFloat4 mVec128;
101 btScalar m_floats[4];
102 };
103 SIMD_FORCE_INLINE btSimdFloat4 get128() const
104 {
105 return mVec128;
106 }
107 SIMD_FORCE_INLINE void set128(btSimdFloat4 v128)
108 {
109 mVec128 = v128;
110 }
111 #else
112 btScalar m_floats[4];
113 #endif
114 #endif //__CELLOS_LV2__ __SPU__
115
116 public:
117
118 /**@brief No initialization constructor */
119 SIMD_FORCE_INLINE btVector3()
120 {
121
122 }
123
124
125
126 /**@brief Constructor from scalars
127 * @param x X value
128 * @param y Y value
129 * @param z Z value
130 */
131 SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
132 {
133 m_floats[0] = _x;
134 m_floats[1] = _y;
135 m_floats[2] = _z;
136 m_floats[3] = btScalar(0.f);
137 }
138
139 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
140 // Set Vector
141 SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
142 {
143 mVec128 = v;
144 }
145
146 // Copy constructor
147 SIMD_FORCE_INLINE btVector3(const btVector3& rhs)
148 {
149 mVec128 = rhs.mVec128;
150 }
151
152 // Assignment Operator
153 SIMD_FORCE_INLINE btVector3&
154 operator=(const btVector3& v)
155 {
156 mVec128 = v.mVec128;
157
158 return *this;
159 }
160 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
161
162 /**@brief Add a vector to this one
163 * @param The vector to add to this one */
164 SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
165 {
166 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
167 mVec128 = _mm_add_ps(mVec128, v.mVec128);
168 #elif defined(BT_USE_NEON)
169 mVec128 = vaddq_f32(mVec128, v.mVec128);
170 #else
171 m_floats[0] += v.m_floats[0];
172 m_floats[1] += v.m_floats[1];
173 m_floats[2] += v.m_floats[2];
174 #endif
175 return *this;
176 }
177
178
179 /**@brief Subtract a vector from this one
180 * @param The vector to subtract */
181 SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v)
182 {
183 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
184 mVec128 = _mm_sub_ps(mVec128, v.mVec128);
185 #elif defined(BT_USE_NEON)
186 mVec128 = vsubq_f32(mVec128, v.mVec128);
187 #else
188 m_floats[0] -= v.m_floats[0];
189 m_floats[1] -= v.m_floats[1];
190 m_floats[2] -= v.m_floats[2];
191 #endif
192 return *this;
193 }
194
195 /**@brief Scale the vector
196 * @param s Scale factor */
197 SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
198 {
199 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
200 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
201 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
202 mVec128 = _mm_mul_ps(mVec128, vs);
203 #elif defined(BT_USE_NEON)
204 mVec128 = vmulq_n_f32(mVec128, s);
205 #else
206 m_floats[0] *= s;
207 m_floats[1] *= s;
208 m_floats[2] *= s;
209 #endif
210 return *this;
211 }
212
213 /**@brief Inversely scale the vector
214 * @param s Scale factor to divide by */
215 SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s)
216 {
217 btFullAssert(s != btScalar(0.0));
218
219 #if 0 //defined(BT_USE_SSE_IN_API)
220 // this code is not faster !
221 __m128 vs = _mm_load_ss(&s);
222 vs = _mm_div_ss(v1110, vs);
223 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
224
225 mVec128 = _mm_mul_ps(mVec128, vs);
226
227 return *this;
228 #else
229 return *this *= btScalar(1.0) / s;
230 #endif
231 }
232
233 /**@brief Return the dot product
234 * @param v The other vector in the dot product */
235 SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
236 {
237 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
238 __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
239 __m128 z = _mm_movehl_ps(vd, vd);
240 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
241 vd = _mm_add_ss(vd, y);
242 vd = _mm_add_ss(vd, z);
243 return _mm_cvtss_f32(vd);
244 #elif defined(BT_USE_NEON)
245 float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
246 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
247 x = vadd_f32(x, vget_high_f32(vd));
248 return vget_lane_f32(x, 0);
249 #else
250 return m_floats[0] * v.m_floats[0] +
251 m_floats[1] * v.m_floats[1] +
252 m_floats[2] * v.m_floats[2];
253 #endif
254 }
255
256 /**@brief Return the length of the vector squared */
257 SIMD_FORCE_INLINE btScalar length2() const
258 {
259 return dot(*this);
260 }
261
262 /**@brief Return the length of the vector */
263 SIMD_FORCE_INLINE btScalar length() const
264 {
265 return btSqrt(length2());
266 }
267
268 /**@brief Return the norm (length) of the vector */
269 SIMD_FORCE_INLINE btScalar norm() const
270 {
271 return length();
272 }
273
274 /**@brief Return the distance squared between the ends of this and another vector
275 * This is symantically treating the vector like a point */
276 SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
277
278 /**@brief Return the distance between the ends of this and another vector
279 * This is symantically treating the vector like a point */
280 SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
281
282 SIMD_FORCE_INLINE btVector3& safeNormalize()
283 {
284 btVector3 absVec = this->absolute();
285 int maxIndex = absVec.maxAxis();
286 if (absVec[maxIndex]>0)
287 {
288 *this /= absVec[maxIndex];
289 return *this /= length();
290 }
291 setValue(1,0,0);
292 return *this;
293 }
294
295 /**@brief Normalize this vector
296 * x^2 + y^2 + z^2 = 1 */
297 SIMD_FORCE_INLINE btVector3& normalize()
298 {
299
300 btAssert(!fuzzyZero());
301
302 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
303 // dot product first
304 __m128 vd = _mm_mul_ps(mVec128, mVec128);
305 __m128 z = _mm_movehl_ps(vd, vd);
306 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
307 vd = _mm_add_ss(vd, y);
308 vd = _mm_add_ss(vd, z);
309
310 #if 0
311 vd = _mm_sqrt_ss(vd);
312 vd = _mm_div_ss(v1110, vd);
313 vd = bt_splat_ps(vd, 0x80);
314 mVec128 = _mm_mul_ps(mVec128, vd);
315 #else
316
317 // NR step 1/sqrt(x) - vd is x, y is output
318 y = _mm_rsqrt_ss(vd); // estimate
319
320 // one step NR
321 z = v1_5;
322 vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
323 //x2 = vd;
324 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
325 vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
326 z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
327
328 y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
329
330 y = bt_splat_ps(y, 0x80);
331 mVec128 = _mm_mul_ps(mVec128, y);
332
333 #endif
334
335
336 return *this;
337 #else
338 return *this /= length();
339 #endif
340 }
341
342 /**@brief Return a normalized version of this vector */
343 SIMD_FORCE_INLINE btVector3 normalized() const;
344
345 /**@brief Return a rotated version of this vector
346 * @param wAxis The axis to rotate about
347 * @param angle The angle to rotate by */
348 SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
349
350 /**@brief Return the angle between this and another vector
351 * @param v The other vector */
352 SIMD_FORCE_INLINE btScalar angle(const btVector3& v) const
353 {
354 btScalar s = btSqrt(length2() * v.length2());
355 btFullAssert(s != btScalar(0.0));
356 return btAcos(dot(v) / s);
357 }
358
359 /**@brief Return a vector will the absolute values of each element */
360 SIMD_FORCE_INLINE btVector3 absolute() const
361 {
362
363 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
364 return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
365 #elif defined(BT_USE_NEON)
366 return btVector3(vabsq_f32(mVec128));
367 #else
368 return btVector3(
369 btFabs(m_floats[0]),
370 btFabs(m_floats[1]),
371 btFabs(m_floats[2]));
372 #endif
373 }
374
375 /**@brief Return the cross product between this and another vector
376 * @param v The other vector */
377 SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
378 {
379 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
380 __m128 T, V;
381
382 T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
383 V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
384
385 V = _mm_mul_ps(V, mVec128);
386 T = _mm_mul_ps(T, v.mVec128);
387 V = _mm_sub_ps(V, T);
388
389 V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
390 return btVector3(V);
391 #elif defined(BT_USE_NEON)
392 float32x4_t T, V;
393 // form (Y, Z, X, _) of mVec128 and v.mVec128
394 float32x2_t Tlow = vget_low_f32(mVec128);
395 float32x2_t Vlow = vget_low_f32(v.mVec128);
396 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
397 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
398
399 V = vmulq_f32(V, mVec128);
400 T = vmulq_f32(T, v.mVec128);
401 V = vsubq_f32(V, T);
402 Vlow = vget_low_f32(V);
403 // form (Y, Z, X, _);
404 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
405 V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
406
407 return btVector3(V);
408 #else
409 return btVector3(
410 m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
411 m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
412 m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
413 #endif
414 }
415
416 SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
417 {
418 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
419 // cross:
420 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
421 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
422
423 V = _mm_mul_ps(V, v1.mVec128);
424 T = _mm_mul_ps(T, v2.mVec128);
425 V = _mm_sub_ps(V, T);
426
427 V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
428
429 // dot:
430 V = _mm_mul_ps(V, mVec128);
431 __m128 z = _mm_movehl_ps(V, V);
432 __m128 y = _mm_shuffle_ps(V, V, 0x55);
433 V = _mm_add_ss(V, y);
434 V = _mm_add_ss(V, z);
435 return _mm_cvtss_f32(V);
436
437 #elif defined(BT_USE_NEON)
438 // cross:
439 float32x4_t T, V;
440 // form (Y, Z, X, _) of mVec128 and v.mVec128
441 float32x2_t Tlow = vget_low_f32(v1.mVec128);
442 float32x2_t Vlow = vget_low_f32(v2.mVec128);
443 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
444 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
445
446 V = vmulq_f32(V, v1.mVec128);
447 T = vmulq_f32(T, v2.mVec128);
448 V = vsubq_f32(V, T);
449 Vlow = vget_low_f32(V);
450 // form (Y, Z, X, _);
451 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
452
453 // dot:
454 V = vmulq_f32(mVec128, V);
455 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
456 x = vadd_f32(x, vget_high_f32(V));
457 return vget_lane_f32(x, 0);
458 #else
459 return
460 m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
461 m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
462 m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
463 #endif
464 }
465
466 /**@brief Return the axis with the smallest value
467 * Note return values are 0,1,2 for x, y, or z */
468 SIMD_FORCE_INLINE int minAxis() const
469 {
470 return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
471 }
472
473 /**@brief Return the axis with the largest value
474 * Note return values are 0,1,2 for x, y, or z */
475 SIMD_FORCE_INLINE int maxAxis() const
476 {
477 return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
478 }
479
480 SIMD_FORCE_INLINE int furthestAxis() const
481 {
482 return absolute().minAxis();
483 }
484
485 SIMD_FORCE_INLINE int closestAxis() const
486 {
487 return absolute().maxAxis();
488 }
489
490
491 SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
492 {
493 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
494 __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
495 btScalar s = btScalar(1.0) - rt;
496 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
497 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
498 __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
499 vrt = bt_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
500 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
501 __m128 tmp3 = _mm_add_ps(r0,r1);
502 mVec128 = tmp3;
503 #elif defined(BT_USE_NEON)
504 float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
505 vl = vmulq_n_f32(vl, rt);
506 mVec128 = vaddq_f32(vl, v0.mVec128);
507 #else
508 btScalar s = btScalar(1.0) - rt;
509 m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
510 m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
511 m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
512 //don't do the unused w component
513 // m_co[3] = s * v0[3] + rt * v1[3];
514 #endif
515 }
516
517 /**@brief Return the linear interpolation between this and another vector
518 * @param v The other vector
519 * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
520 SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const
521 {
522 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
523 __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
524 vt = bt_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
525 __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
526 vl = _mm_mul_ps(vl, vt);
527 vl = _mm_add_ps(vl, mVec128);
528
529 return btVector3(vl);
530 #elif defined(BT_USE_NEON)
531 float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
532 vl = vmulq_n_f32(vl, t);
533 vl = vaddq_f32(vl, mVec128);
534
535 return btVector3(vl);
536 #else
537 return
538 btVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
539 m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
540 m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
541 #endif
542 }
543
544 /**@brief Elementwise multiply this vector by the other
545 * @param v The other vector */
546 SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
547 {
548 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
549 mVec128 = _mm_mul_ps(mVec128, v.mVec128);
550 #elif defined(BT_USE_NEON)
551 mVec128 = vmulq_f32(mVec128, v.mVec128);
552 #else
553 m_floats[0] *= v.m_floats[0];
554 m_floats[1] *= v.m_floats[1];
555 m_floats[2] *= v.m_floats[2];
556 #endif
557 return *this;
558 }
559
560 /**@brief Return the x value */
561 SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
562 /**@brief Return the y value */
563 SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
564 /**@brief Return the z value */
565 SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
566 /**@brief Set the x value */
567 SIMD_FORCE_INLINE void setX(btScalar _x) { m_floats[0] = _x;};
568 /**@brief Set the y value */
569 SIMD_FORCE_INLINE void setY(btScalar _y) { m_floats[1] = _y;};
570 /**@brief Set the z value */
571 SIMD_FORCE_INLINE void setZ(btScalar _z) { m_floats[2] = _z;};
572 /**@brief Set the w value */
573 SIMD_FORCE_INLINE void setW(btScalar _w) { m_floats[3] = _w;};
574 /**@brief Return the x value */
575 SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
576 /**@brief Return the y value */
577 SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
578 /**@brief Return the z value */
579 SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
580 /**@brief Return the w value */
581 SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
582
583 //SIMD_FORCE_INLINE btScalar& operator[](int i) { return (&m_floats[0])[i]; }
584 //SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
585 ///operator btScalar*() replaces operator[], using implicit conversion. We added operator != and operator == to avoid pointer comparisons.
586 SIMD_FORCE_INLINE operator btScalar *() { return &m_floats[0]; }
587 SIMD_FORCE_INLINE operator const btScalar *() const { return &m_floats[0]; }
588
589 SIMD_FORCE_INLINE bool operator==(const btVector3& other) const
590 {
591 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
592 return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
593 #else
594 return ((m_floats[3]==other.m_floats[3]) &&
595 (m_floats[2]==other.m_floats[2]) &&
596 (m_floats[1]==other.m_floats[1]) &&
597 (m_floats[0]==other.m_floats[0]));
598 #endif
599 }
600
601 SIMD_FORCE_INLINE bool operator!=(const btVector3& other) const
602 {
603 return !(*this == other);
604 }
605
606 /**@brief Set each element to the max of the current values and the values of another btVector3
607 * @param other The other btVector3 to compare with
608 */
609 SIMD_FORCE_INLINE void setMax(const btVector3& other)
610 {
611 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
612 mVec128 = _mm_max_ps(mVec128, other.mVec128);
613 #elif defined(BT_USE_NEON)
614 mVec128 = vmaxq_f32(mVec128, other.mVec128);
615 #else
616 btSetMax(m_floats[0], other.m_floats[0]);
617 btSetMax(m_floats[1], other.m_floats[1]);
618 btSetMax(m_floats[2], other.m_floats[2]);
619 btSetMax(m_floats[3], other.w());
620 #endif
621 }
622
623 /**@brief Set each element to the min of the current values and the values of another btVector3
624 * @param other The other btVector3 to compare with
625 */
626 SIMD_FORCE_INLINE void setMin(const btVector3& other)
627 {
628 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
629 mVec128 = _mm_min_ps(mVec128, other.mVec128);
630 #elif defined(BT_USE_NEON)
631 mVec128 = vminq_f32(mVec128, other.mVec128);
632 #else
633 btSetMin(m_floats[0], other.m_floats[0]);
634 btSetMin(m_floats[1], other.m_floats[1]);
635 btSetMin(m_floats[2], other.m_floats[2]);
636 btSetMin(m_floats[3], other.w());
637 #endif
638 }
639
640 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
641 {
642 m_floats[0]=_x;
643 m_floats[1]=_y;
644 m_floats[2]=_z;
645 m_floats[3] = btScalar(0.f);
646 }
647
648 void getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
649 {
650 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
651
652 __m128 V = _mm_and_ps(mVec128, btvFFF0fMask);
653 __m128 V0 = _mm_xor_ps(btvMzeroMask, V);
654 __m128 V2 = _mm_movelh_ps(V0, V);
655
656 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
657
658 V0 = _mm_shuffle_ps(V0, V, 0xDB);
659 V2 = _mm_shuffle_ps(V2, V, 0xF9);
660
661 v0->mVec128 = V0;
662 v1->mVec128 = V1;
663 v2->mVec128 = V2;
664 #else
665 v0->setValue(0. ,-z() ,y());
666 v1->setValue(z() ,0. ,-x());
667 v2->setValue(-y() ,x() ,0.);
668 #endif
669 }
670
671 void setZero()
672 {
673 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
674 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
675 #elif defined(BT_USE_NEON)
676 int32x4_t vi = vdupq_n_s32(0);
677 mVec128 = vreinterpretq_f32_s32(vi);
678 #else
679 setValue(btScalar(0.),btScalar(0.),btScalar(0.));
680 #endif
681 }
682
683 SIMD_FORCE_INLINE bool isZero() const
684 {
685 return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
686 }
687
688
689 SIMD_FORCE_INLINE bool fuzzyZero() const
690 {
691 return length2() < SIMD_EPSILON*SIMD_EPSILON;
692 }
693
694 SIMD_FORCE_INLINE void serialize(struct btVector3Data& dataOut) const;
695
696 SIMD_FORCE_INLINE void deSerialize(const struct btVector3Data& dataIn);
697
698 SIMD_FORCE_INLINE void serializeFloat(struct btVector3FloatData& dataOut) const;
699
700 SIMD_FORCE_INLINE void deSerializeFloat(const struct btVector3FloatData& dataIn);
701
702 SIMD_FORCE_INLINE void serializeDouble(struct btVector3DoubleData& dataOut) const;
703
704 SIMD_FORCE_INLINE void deSerializeDouble(const struct btVector3DoubleData& dataIn);
705
706 /**@brief returns index of maximum dot product between this and vectors in array[]
707 * @param array The other vectors
708 * @param array_count The number of other vectors
709 * @param dotOut The maximum dot product */
710 SIMD_FORCE_INLINE long maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
711
712 /**@brief returns index of minimum dot product between this and vectors in array[]
713 * @param array The other vectors
714 * @param array_count The number of other vectors
715 * @param dotOut The minimum dot product */
716 SIMD_FORCE_INLINE long minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
717
718 /* create a vector as btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 )) */
719 SIMD_FORCE_INLINE btVector3 dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
720 {
721 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
722
723 __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
724 __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
725 __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
726 __m128 b0 = _mm_unpacklo_ps( a0, a1 );
727 __m128 b1 = _mm_unpackhi_ps( a0, a1 );
728 __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
729 __m128 r = _mm_movelh_ps( b0, b2 );
730 r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
731 a2 = _mm_and_ps( a2, btvxyzMaskf);
732 r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
733 return btVector3(r);
734
735 #elif defined(BT_USE_NEON)
736 static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
737 float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
738 float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
739 float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
740 float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
741 a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
742 float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
743 float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
744 return btVector3( vcombine_f32(b0, b1) );
745 #else
746 return btVector3( dot(v0), dot(v1), dot(v2));
747 #endif
748 }
749 };
750
751 /**@brief Return the sum of two vectors (Point symantics)*/
752 SIMD_FORCE_INLINE btVector3
753 operator+(const btVector3& v1, const btVector3& v2)
754 {
755 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
756 return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
757 #elif defined(BT_USE_NEON)
758 return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
759 #else
760 return btVector3(
761 v1.m_floats[0] + v2.m_floats[0],
762 v1.m_floats[1] + v2.m_floats[1],
763 v1.m_floats[2] + v2.m_floats[2]);
764 #endif
765 }
766
767 /**@brief Return the elementwise product of two vectors */
768 SIMD_FORCE_INLINE btVector3
769 operator*(const btVector3& v1, const btVector3& v2)
770 {
771 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
772 return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
773 #elif defined(BT_USE_NEON)
774 return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
775 #else
776 return btVector3(
777 v1.m_floats[0] * v2.m_floats[0],
778 v1.m_floats[1] * v2.m_floats[1],
779 v1.m_floats[2] * v2.m_floats[2]);
780 #endif
781 }
782
783 /**@brief Return the difference between two vectors */
784 SIMD_FORCE_INLINE btVector3
785 operator-(const btVector3& v1, const btVector3& v2)
786 {
787 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
788
789 // without _mm_and_ps this code causes slowdown in Concave moving
790 __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
791 return btVector3(_mm_and_ps(r, btvFFF0fMask));
792 #elif defined(BT_USE_NEON)
793 float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
794 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
795 #else
796 return btVector3(
797 v1.m_floats[0] - v2.m_floats[0],
798 v1.m_floats[1] - v2.m_floats[1],
799 v1.m_floats[2] - v2.m_floats[2]);
800 #endif
801 }
802
803 /**@brief Return the negative of the vector */
804 SIMD_FORCE_INLINE btVector3
805 operator-(const btVector3& v)
806 {
807 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
808 __m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
809 return btVector3(_mm_and_ps(r, btvFFF0fMask));
810 #elif defined(BT_USE_NEON)
811 return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
812 #else
813 return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
814 #endif
815 }
816
817 /**@brief Return the vector scaled by s */
818 SIMD_FORCE_INLINE btVector3
819 operator*(const btVector3& v, const btScalar& s)
820 {
821 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
822 __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
823 vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
824 return btVector3(_mm_mul_ps(v.mVec128, vs));
825 #elif defined(BT_USE_NEON)
826 float32x4_t r = vmulq_n_f32(v.mVec128, s);
827 return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
828 #else
829 return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
830 #endif
831 }
832
833 /**@brief Return the vector scaled by s */
834 SIMD_FORCE_INLINE btVector3
835 operator*(const btScalar& s, const btVector3& v)
836 {
837 return v * s;
838 }
839
840 /**@brief Return the vector inversely scaled by s */
841 SIMD_FORCE_INLINE btVector3
842 operator/(const btVector3& v, const btScalar& s)
843 {
844 btFullAssert(s != btScalar(0.0));
845 #if 0 //defined(BT_USE_SSE_IN_API)
846 // this code is not faster !
847 __m128 vs = _mm_load_ss(&s);
848 vs = _mm_div_ss(v1110, vs);
849 vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
850
851 return btVector3(_mm_mul_ps(v.mVec128, vs));
852 #else
853 return v * (btScalar(1.0) / s);
854 #endif
855 }
856
857 /**@brief Return the vector inversely scaled by s */
858 SIMD_FORCE_INLINE btVector3
859 operator/(const btVector3& v1, const btVector3& v2)
860 {
861 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
862 __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
863 vec = _mm_and_ps(vec, btvFFF0fMask);
864 return btVector3(vec);
865 #elif defined(BT_USE_NEON)
866 float32x4_t x, y, v, m;
867
868 x = v1.mVec128;
869 y = v2.mVec128;
870
871 v = vrecpeq_f32(y); // v ~ 1/y
872 m = vrecpsq_f32(y, v); // m = (2-v*y)
873 v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
874 m = vrecpsq_f32(y, v); // mm = (2-vv*y)
875 v = vmulq_f32(v, x); // x*vv
876 v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
877
878 return btVector3(v);
879 #else
880 return btVector3(
881 v1.m_floats[0] / v2.m_floats[0],
882 v1.m_floats[1] / v2.m_floats[1],
883 v1.m_floats[2] / v2.m_floats[2]);
884 #endif
885 }
886
887 /**@brief Return the dot product between two vectors */
888 SIMD_FORCE_INLINE btScalar
btDot(const btVector3 & v1,const btVector3 & v2)889 btDot(const btVector3& v1, const btVector3& v2)
890 {
891 return v1.dot(v2);
892 }
893
894
895 /**@brief Return the distance squared between two vectors */
896 SIMD_FORCE_INLINE btScalar
btDistance2(const btVector3 & v1,const btVector3 & v2)897 btDistance2(const btVector3& v1, const btVector3& v2)
898 {
899 return v1.distance2(v2);
900 }
901
902
903 /**@brief Return the distance between two vectors */
904 SIMD_FORCE_INLINE btScalar
btDistance(const btVector3 & v1,const btVector3 & v2)905 btDistance(const btVector3& v1, const btVector3& v2)
906 {
907 return v1.distance(v2);
908 }
909
910 /**@brief Return the angle between two vectors */
911 SIMD_FORCE_INLINE btScalar
btAngle(const btVector3 & v1,const btVector3 & v2)912 btAngle(const btVector3& v1, const btVector3& v2)
913 {
914 return v1.angle(v2);
915 }
916
917 /**@brief Return the cross product of two vectors */
918 SIMD_FORCE_INLINE btVector3
btCross(const btVector3 & v1,const btVector3 & v2)919 btCross(const btVector3& v1, const btVector3& v2)
920 {
921 return v1.cross(v2);
922 }
923
924 SIMD_FORCE_INLINE btScalar
btTriple(const btVector3 & v1,const btVector3 & v2,const btVector3 & v3)925 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
926 {
927 return v1.triple(v2, v3);
928 }
929
930 /**@brief Return the linear interpolation between two vectors
931 * @param v1 One vector
932 * @param v2 The other vector
933 * @param t The ration of this to v (t = 0 => return v1, t=1 => return v2) */
934 SIMD_FORCE_INLINE btVector3
lerp(const btVector3 & v1,const btVector3 & v2,const btScalar & t)935 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
936 {
937 return v1.lerp(v2, t);
938 }
939
940
941
distance2(const btVector3 & v)942 SIMD_FORCE_INLINE btScalar btVector3::distance2(const btVector3& v) const
943 {
944 return (v - *this).length2();
945 }
946
distance(const btVector3 & v)947 SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
948 {
949 return (v - *this).length();
950 }
951
normalized()952 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
953 {
954 btVector3 nrm = *this;
955
956 return nrm.normalize();
957 }
958
rotate(const btVector3 & wAxis,const btScalar _angle)959 SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
960 {
961 // wAxis must be a unit lenght vector
962
963 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
964
965 __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
966 btScalar ssin = btSin( _angle );
967 __m128 C = wAxis.cross( mVec128 ).mVec128;
968 O = _mm_and_ps(O, btvFFF0fMask);
969 btScalar scos = btCos( _angle );
970
971 __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
972 __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
973
974 __m128 Y = bt_pshufd_ps(O, 0xC9); // (Y Z X 0)
975 __m128 Z = bt_pshufd_ps(O, 0xD2); // (Z X Y 0)
976 O = _mm_add_ps(O, Y);
977 vsin = bt_pshufd_ps(vsin, 0x80); // (S S S 0)
978 O = _mm_add_ps(O, Z);
979 vcos = bt_pshufd_ps(vcos, 0x80); // (S S S 0)
980
981 vsin = vsin * C;
982 O = O * wAxis.mVec128;
983 __m128 X = mVec128 - O;
984
985 O = O + vsin;
986 vcos = vcos * X;
987 O = O + vcos;
988
989 return btVector3(O);
990 #else
991 btVector3 o = wAxis * wAxis.dot( *this );
992 btVector3 _x = *this - o;
993 btVector3 _y;
994
995 _y = wAxis.cross( *this );
996
997 return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
998 #endif
999 }
1000
maxDot(const btVector3 * array,long array_count,btScalar & dotOut)1001 SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1002 {
1003 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1004 #if defined _WIN32 || defined (BT_USE_SSE)
1005 const long scalar_cutoff = 10;
1006 long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1007 #elif defined BT_USE_NEON
1008 const long scalar_cutoff = 4;
1009 extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1010 #endif
1011 if( array_count < scalar_cutoff )
1012 #endif
1013 {
1014 btScalar maxDot1 = -SIMD_INFINITY;
1015 int i = 0;
1016 int ptIndex = -1;
1017 for( i = 0; i < array_count; i++ )
1018 {
1019 btScalar dot = array[i].dot(*this);
1020
1021 if( dot > maxDot1 )
1022 {
1023 maxDot1 = dot;
1024 ptIndex = i;
1025 }
1026 }
1027
1028 dotOut = maxDot1;
1029 return ptIndex;
1030 }
1031 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1032 return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1033 #endif
1034 }
1035
minDot(const btVector3 * array,long array_count,btScalar & dotOut)1036 SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1037 {
1038 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1039 #if defined BT_USE_SSE
1040 const long scalar_cutoff = 10;
1041 long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1042 #elif defined BT_USE_NEON
1043 const long scalar_cutoff = 4;
1044 extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1045 #else
1046 #error unhandled arch!
1047 #endif
1048
1049 if( array_count < scalar_cutoff )
1050 #endif
1051 {
1052 btScalar minDot = SIMD_INFINITY;
1053 int i = 0;
1054 int ptIndex = -1;
1055
1056 for( i = 0; i < array_count; i++ )
1057 {
1058 btScalar dot = array[i].dot(*this);
1059
1060 if( dot < minDot )
1061 {
1062 minDot = dot;
1063 ptIndex = i;
1064 }
1065 }
1066
1067 dotOut = minDot;
1068
1069 return ptIndex;
1070 }
1071 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1072 return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1073 #endif//BT_USE_SIMD_VECTOR3
1074 }
1075
1076
1077 class btVector4 : public btVector3
1078 {
1079 public:
1080
btVector4()1081 SIMD_FORCE_INLINE btVector4() {}
1082
1083
btVector4(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1084 SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1085 : btVector3(_x,_y,_z)
1086 {
1087 m_floats[3] = _w;
1088 }
1089
1090 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON)
btVector4(const btSimdFloat4 vec)1091 SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
1092 {
1093 mVec128 = vec;
1094 }
1095
btVector4(const btVector3 & rhs)1096 SIMD_FORCE_INLINE btVector4(const btVector3& rhs)
1097 {
1098 mVec128 = rhs.mVec128;
1099 }
1100
1101 SIMD_FORCE_INLINE btVector4&
1102 operator=(const btVector4& v)
1103 {
1104 mVec128 = v.mVec128;
1105 return *this;
1106 }
1107 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1108
absolute4()1109 SIMD_FORCE_INLINE btVector4 absolute4() const
1110 {
1111 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
1112 return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
1113 #elif defined(BT_USE_NEON)
1114 return btVector4(vabsq_f32(mVec128));
1115 #else
1116 return btVector4(
1117 btFabs(m_floats[0]),
1118 btFabs(m_floats[1]),
1119 btFabs(m_floats[2]),
1120 btFabs(m_floats[3]));
1121 #endif
1122 }
1123
1124
getW()1125 btScalar getW() const { return m_floats[3];}
1126
1127
maxAxis4()1128 SIMD_FORCE_INLINE int maxAxis4() const
1129 {
1130 int maxIndex = -1;
1131 btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
1132 if (m_floats[0] > maxVal)
1133 {
1134 maxIndex = 0;
1135 maxVal = m_floats[0];
1136 }
1137 if (m_floats[1] > maxVal)
1138 {
1139 maxIndex = 1;
1140 maxVal = m_floats[1];
1141 }
1142 if (m_floats[2] > maxVal)
1143 {
1144 maxIndex = 2;
1145 maxVal =m_floats[2];
1146 }
1147 if (m_floats[3] > maxVal)
1148 {
1149 maxIndex = 3;
1150 maxVal = m_floats[3];
1151 }
1152
1153 return maxIndex;
1154 }
1155
1156
minAxis4()1157 SIMD_FORCE_INLINE int minAxis4() const
1158 {
1159 int minIndex = -1;
1160 btScalar minVal = btScalar(BT_LARGE_FLOAT);
1161 if (m_floats[0] < minVal)
1162 {
1163 minIndex = 0;
1164 minVal = m_floats[0];
1165 }
1166 if (m_floats[1] < minVal)
1167 {
1168 minIndex = 1;
1169 minVal = m_floats[1];
1170 }
1171 if (m_floats[2] < minVal)
1172 {
1173 minIndex = 2;
1174 minVal =m_floats[2];
1175 }
1176 if (m_floats[3] < minVal)
1177 {
1178 minIndex = 3;
1179 minVal = m_floats[3];
1180 }
1181
1182 return minIndex;
1183 }
1184
1185
closestAxis4()1186 SIMD_FORCE_INLINE int closestAxis4() const
1187 {
1188 return absolute4().maxAxis4();
1189 }
1190
1191
1192
1193
1194 /**@brief Set x,y,z and zero w
1195 * @param x Value of x
1196 * @param y Value of y
1197 * @param z Value of z
1198 */
1199
1200
1201 /* void getValue(btScalar *m) const
1202 {
1203 m[0] = m_floats[0];
1204 m[1] = m_floats[1];
1205 m[2] =m_floats[2];
1206 }
1207 */
1208 /**@brief Set the values
1209 * @param x Value of x
1210 * @param y Value of y
1211 * @param z Value of z
1212 * @param w Value of w
1213 */
setValue(const btScalar & _x,const btScalar & _y,const btScalar & _z,const btScalar & _w)1214 SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1215 {
1216 m_floats[0]=_x;
1217 m_floats[1]=_y;
1218 m_floats[2]=_z;
1219 m_floats[3]=_w;
1220 }
1221
1222
1223 };
1224
1225
1226 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapScalarEndian(const btScalar & sourceVal,btScalar & destVal)1227 SIMD_FORCE_INLINE void btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
1228 {
1229 #ifdef BT_USE_DOUBLE_PRECISION
1230 unsigned char* dest = (unsigned char*) &destVal;
1231 unsigned char* src = (unsigned char*) &sourceVal;
1232 dest[0] = src[7];
1233 dest[1] = src[6];
1234 dest[2] = src[5];
1235 dest[3] = src[4];
1236 dest[4] = src[3];
1237 dest[5] = src[2];
1238 dest[6] = src[1];
1239 dest[7] = src[0];
1240 #else
1241 unsigned char* dest = (unsigned char*) &destVal;
1242 unsigned char* src = (unsigned char*) &sourceVal;
1243 dest[0] = src[3];
1244 dest[1] = src[2];
1245 dest[2] = src[1];
1246 dest[3] = src[0];
1247 #endif //BT_USE_DOUBLE_PRECISION
1248 }
1249 ///btSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btSwapVector3Endian(const btVector3 & sourceVec,btVector3 & destVec)1250 SIMD_FORCE_INLINE void btSwapVector3Endian(const btVector3& sourceVec, btVector3& destVec)
1251 {
1252 for (int i=0;i<4;i++)
1253 {
1254 btSwapScalarEndian(sourceVec[i],destVec[i]);
1255 }
1256
1257 }
1258
1259 ///btUnSwapVector3Endian swaps vector endianness, useful for network and cross-platform serialization
btUnSwapVector3Endian(btVector3 & vector)1260 SIMD_FORCE_INLINE void btUnSwapVector3Endian(btVector3& vector)
1261 {
1262
1263 btVector3 swappedVec;
1264 for (int i=0;i<4;i++)
1265 {
1266 btSwapScalarEndian(vector[i],swappedVec[i]);
1267 }
1268 vector = swappedVec;
1269 }
1270
1271 template <class T>
btPlaneSpace1(const T & n,T & p,T & q)1272 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
1273 {
1274 if (btFabs(n[2]) > SIMDSQRT12) {
1275 // choose p in y-z plane
1276 btScalar a = n[1]*n[1] + n[2]*n[2];
1277 btScalar k = btRecipSqrt (a);
1278 p[0] = 0;
1279 p[1] = -n[2]*k;
1280 p[2] = n[1]*k;
1281 // set q = n x p
1282 q[0] = a*k;
1283 q[1] = -n[0]*p[2];
1284 q[2] = n[0]*p[1];
1285 }
1286 else {
1287 // choose p in x-y plane
1288 btScalar a = n[0]*n[0] + n[1]*n[1];
1289 btScalar k = btRecipSqrt (a);
1290 p[0] = -n[1]*k;
1291 p[1] = n[0]*k;
1292 p[2] = 0;
1293 // set q = n x p
1294 q[0] = -n[2]*p[1];
1295 q[1] = n[2]*p[0];
1296 q[2] = a*k;
1297 }
1298 }
1299
1300
1301 struct btVector3FloatData
1302 {
1303 float m_floats[4];
1304 };
1305
1306 struct btVector3DoubleData
1307 {
1308 double m_floats[4];
1309
1310 };
1311
serializeFloat(struct btVector3FloatData & dataOut)1312 SIMD_FORCE_INLINE void btVector3::serializeFloat(struct btVector3FloatData& dataOut) const
1313 {
1314 ///could also do a memcpy, check if it is worth it
1315 for (int i=0;i<4;i++)
1316 dataOut.m_floats[i] = float(m_floats[i]);
1317 }
1318
deSerializeFloat(const struct btVector3FloatData & dataIn)1319 SIMD_FORCE_INLINE void btVector3::deSerializeFloat(const struct btVector3FloatData& dataIn)
1320 {
1321 for (int i=0;i<4;i++)
1322 m_floats[i] = btScalar(dataIn.m_floats[i]);
1323 }
1324
1325
serializeDouble(struct btVector3DoubleData & dataOut)1326 SIMD_FORCE_INLINE void btVector3::serializeDouble(struct btVector3DoubleData& dataOut) const
1327 {
1328 ///could also do a memcpy, check if it is worth it
1329 for (int i=0;i<4;i++)
1330 dataOut.m_floats[i] = double(m_floats[i]);
1331 }
1332
deSerializeDouble(const struct btVector3DoubleData & dataIn)1333 SIMD_FORCE_INLINE void btVector3::deSerializeDouble(const struct btVector3DoubleData& dataIn)
1334 {
1335 for (int i=0;i<4;i++)
1336 m_floats[i] = btScalar(dataIn.m_floats[i]);
1337 }
1338
1339
serialize(struct btVector3Data & dataOut)1340 SIMD_FORCE_INLINE void btVector3::serialize(struct btVector3Data& dataOut) const
1341 {
1342 ///could also do a memcpy, check if it is worth it
1343 for (int i=0;i<4;i++)
1344 dataOut.m_floats[i] = m_floats[i];
1345 }
1346
deSerialize(const struct btVector3Data & dataIn)1347 SIMD_FORCE_INLINE void btVector3::deSerialize(const struct btVector3Data& dataIn)
1348 {
1349 for (int i=0;i<4;i++)
1350 m_floats[i] = dataIn.m_floats[i];
1351 }
1352
1353 #endif //BT_VECTOR3_H
1354