1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #ifndef API_BASE_MATH_MATRIX_H
17 #define API_BASE_MATH_MATRIX_H
18
19 #include <cstddef>
20 #if defined(_M_X64)
21 #include <immintrin.h>
22 #elif defined(_M_ARM64) || defined(__ARM_ARCH_ISA_A64)
23 #include <arm_neon.h>
24 #endif
25 #include <base/containers/type_traits.h>
26 #include <base/math/vector.h>
27 #include <base/math/vector_util.h>
28 #include <base/namespace.h>
29
30 // m00[0] m01[3] m02[6]
31 // m10[1] m11[4] m12[7]
32 // m20[2] m21[5] m22[8]
33
34 // m00[0] m01[4] m02[8] m03[12]
35 // m10[1] m11[5] m12[9] m13[13]
36 // m20[2] m21[6] m22[10] m23[14]
37 // m30[3] m31[7] m32[11] m33[15]
38
39 // m00[0] m01[4] m02[8]
40 // m10[1] m11[5] m12[9]
41 // m20[2] m21[6] m22[10]
42 // m30[3] m31[7] m32[11]
BASE_BEGIN_NAMESPACE()43 BASE_BEGIN_NAMESPACE()
44 namespace Math {
45 #include <base/math/disable_warning_4201_heading.h>
46
47 /** @ingroup group_math_matrix */
48 /** Matrix 3X3 presentation in column major format */
49 class Mat3X3 final {
50 public:
51 union {
52 struct {
53 Vec3 x, y, z;
54 };
55 Vec3 base[3];
56 float data[9];
57 };
58
59 /** Subscript operator */
60 constexpr Vec3& operator[](size_t aIndex)
61 {
62 return base[aIndex];
63 }
64
65 /** Subscript operator */
66 constexpr const Vec3& operator[](size_t aIndex) const
67 {
68 return base[aIndex];
69 }
70
71 // Constructors
72 /** Default constructor */
73 inline constexpr Mat3X3() noexcept : data { 0 } {}
74
75 /** Identity constructor */
76 inline explicit constexpr Mat3X3(float id) noexcept : data { id, 0.0f, 0.0f, 0.0f, id, 0.0f, 0.0f, 0.0f, id } {}
77
78 /** Constructor for using Vector3's */
79 inline constexpr Mat3X3(Vec3 const& v0, Vec3 const& v1, Vec3 const& v2) noexcept : x(v0), y(v1), z(v2) {}
80
81 /** Constructor for array of floats */
82 inline constexpr Mat3X3(const float d[9]) noexcept : data { d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8] }
83 {}
84
85 inline ~Mat3X3() = default;
86
87 /** Multiply two matrices */
88 inline constexpr Mat3X3 operator*(const Mat3X3& rhs) const
89 {
90 const Vec3& rha { rhs.x.x, rhs.y.x, rhs.z.x };
91 const Vec3& rhb { rhs.x.y, rhs.y.y, rhs.z.y };
92 const Vec3& rhc { rhs.x.z, rhs.y.z, rhs.z.z };
93
94 return { { Dot(x, rha), Dot(x, rhb), Dot(x, rhc) }, { Dot(y, rha), Dot(y, rhb), Dot(y, rhc) },
95 { Dot(z, rha), Dot(z, rhb), Dot(z, rhc) } };
96 }
97
98 /** Multiply columns by float scalar value */
99 inline constexpr Mat3X3 operator*(const float& scalar) const
100 {
101 return Mat3X3(x * scalar, y * scalar, z * scalar);
102 }
103
104 /** Equality operator, returns true if matrices are equal */
105 inline constexpr bool operator==(const Mat3X3& mat) const
106 {
107 for (size_t i = 0; i < countof(data); ++i) {
108 if (data[i] != mat.data[i]) {
109 return false;
110 }
111 }
112 return true;
113 }
114
115 /** Inequality operator, returns true if matrices are inequal */
116 inline constexpr bool operator!=(const Mat3X3& mat) const
117 {
118 for (size_t i = 0; i < countof(data); ++i) {
119 if (data[i] != mat.data[i]) {
120 return true;
121 }
122 }
123 return false;
124 }
125 };
126
127 // Assert that Mat3X3 is the same as 9 floats
128 static_assert(sizeof(Mat3X3) == 9 * sizeof(float));
129
130 static constexpr Mat3X3 IDENTITY_3X3(1.f);
131
132 /** @ingroup group_math_matrix */
133 /** Matrix 4X4 presentation in column major format */
134 class Mat4X4 final {
135 public:
136 union {
137 struct {
138 Vec4 x, y, z, w;
139 };
140 Vec4 base[4]; // base[0] is X ,base [1] is Y, etc..
141 float data[16];
142 };
143
144 // "For programming purposes, OpenGL matrices are 16-value arrays with base vectors laid out contiguously in memory.
145 // The translation components occupy the 13th, 14th, and 15th elements of the 16-element matrix."
146 // https://www.khronos.org/opengl/wiki/General_OpenGL:_Transformations#Are_OpenGL_matrices_column-major_or_row-major.3F
147 // this is also the same as with glm.
148 /** Subscript operator */
149 constexpr Vec4& operator[](size_t aIndex)
150 {
151 return base[aIndex];
152 }
153
154 /** Subscript operator */
155 constexpr const Vec4& operator[](size_t aIndex) const
156 {
157 return base[aIndex];
158 }
159
160 // Constructors
161 /** Zero initializer constructor */
162 inline constexpr Mat4X4() : data { 0 } {}
163
164 /** Constructor for Vector4's */
165 inline constexpr Mat4X4(Vec4 const& v0, Vec4 const& v1, Vec4 const& v2, Vec4 const& v3) : x(v0), y(v1), z(v2), w(v3)
166 {}
167
168 /** Constructor for array of floats */
169 inline constexpr Mat4X4(const float d[16])
170 : data { d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15] }
171 {}
172
173 /** Constructor for floats */
174 inline constexpr Mat4X4(float d0, float d1, float d2, float d3, float d4, float d5, float d6, float d7, float d8,
175 float d9, float d10, float d11, float d12, float d13, float d14, float d15)
176 : data { d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15 }
177 {}
178
179 /** Identity constructor */
180 inline explicit constexpr Mat4X4(float id)
181 : data { id, 0.0f, 0.0f, 0.0f, 0.0f, id, 0.0f, 0.0f, 0.0f, 0.0f, id, 0.0f, 0.0f, 0.0f, 0.0f, id }
182 {}
183
184 /** Conversion constructor from Mat3X3 to Mat4X4 */
185 explicit inline constexpr Mat4X4(const Mat3X3& mat3X3)
186 : data { mat3X3.data[0], mat3X3.data[1], mat3X3.data[2], 0.0f, mat3X3.data[3], mat3X3.data[4], mat3X3.data[5],
187 0.0f, mat3X3.data[6], mat3X3.data[7], mat3X3.data[8], 0.0f, 0.0f, 0.0f, 0.0f, 1.0f }
188 {}
189
190 inline ~Mat4X4() = default;
191
192 /** Multiply two matrices */
193 inline Mat4X4 operator*(const Mat4X4& rhs) const
194 {
195 Mat4X4 res;
196 #if defined(BASE_SIMD) && defined(_M_X64)
197 #if USE_FMA_AVX
198 // duplicate components of the first column of rhs.
199 __m128 rhsX = _mm_broadcast_ss(rhs.x.data + 0);
200 __m128 rhsY = _mm_broadcast_ss(rhs.x.data + 1);
201 __m128 rhsZ = _mm_broadcast_ss(rhs.x.data + 2);
202 __m128 rhsW = _mm_broadcast_ss(rhs.x.data + 3);
203 // multiply each column of lhs with rhs's first columns components while adding the results
204 __m128 rhsC = _mm_mul_ps(x.vec4, rhsX); // res.x = lhs.x * rhs.x.x
205 rhsC = _mm_fmadd_ps(y.vec4, rhsY, rhsC); // res.x += lhs.y * rhs.x.y
206 rhsC = _mm_fmadd_ps(z.vec4, rhsZ, rhsC); // res.x += lhs.z * rhs.x.z
207 res.x.vec4 = _mm_fmadd_ps(w.vec4, rhsW, rhsC); // res.x += lhs.w * rhs.x.w
208
209 // repeat for second column of rhs
210 rhsX = _mm_broadcast_ss(rhs.y.data + 0);
211 rhsY = _mm_broadcast_ss(rhs.y.data + 1);
212 rhsZ = _mm_broadcast_ss(rhs.y.data + 2);
213 rhsW = _mm_broadcast_ss(rhs.y.data + 3);
214 rhsC = _mm_mul_ps(x.vec4, rhsX);
215 rhsC = _mm_fmadd_ps(y.vec4, rhsY, rhsC);
216 rhsC = _mm_fmadd_ps(z.vec4, rhsZ, rhsC);
217 res.y.vec4 = _mm_fmadd_ps(w.vec4, rhsW, rhsC);
218
219 // repeat for third column of rhs
220 rhsX = _mm_broadcast_ss(rhs.z.data + 0);
221 rhsY = _mm_broadcast_ss(rhs.z.data + 1);
222 rhsZ = _mm_broadcast_ss(rhs.z.data + 2);
223 rhsW = _mm_broadcast_ss(rhs.z.data + 3);
224 rhsC = _mm_mul_ps(x.vec4, rhsX);
225 rhsC = _mm_fmadd_ps(y.vec4, rhsY, rhsC);
226 rhsC = _mm_fmadd_ps(z.vec4, rhsZ, rhsC);
227 res.z.vec4 = _mm_fmadd_ps(w.vec4, rhsW, rhsC);
228
229 // repeat for fourth column of rhs
230 rhsX = _mm_broadcast_ss(rhs.w.data + 0);
231 rhsY = _mm_broadcast_ss(rhs.w.data + 1);
232 rhsZ = _mm_broadcast_ss(rhs.w.data + 2);
233 rhsW = _mm_broadcast_ss(rhs.w.data + 3);
234 rhsC = _mm_mul_ps(x.vec4, rhsX);
235 rhsC = _mm_fmadd_ps(y.vec4, rhsY, rhsC);
236 rhsC = _mm_fmadd_ps(z.vec4, rhsZ, rhsC);
237 res.w.vec4 = _mm_fmadd_ps(w.vec4, rhsW, rhsC);
238 #else
239 // only SSE intrisics which should be fine with any x64
240 // duplicate components of the first column of rhs.
241 __m128 rhsX = _mm_set_ps1(rhs.x.x);
242 __m128 rhsY = _mm_set_ps1(rhs.x.y);
243 __m128 rhsZ = _mm_set_ps1(rhs.x.z);
244 __m128 rhsW = _mm_set_ps1(rhs.x.w);
245 // multiply each column of lhs with rhs's first columns components
246 rhsX = _mm_mul_ps(x.vec4, rhsX); // lhs.x * rhs.x.x
247 rhsY = _mm_mul_ps(y.vec4, rhsY); // lhs.y * rhs.x.y
248 rhsZ = _mm_mul_ps(z.vec4, rhsZ); // lhs.z * rhs.x.z
249 rhsW = _mm_mul_ps(w.vec4, rhsW); // lhs.w * rhs.x.w
250 // sum the products
251 rhsX = _mm_add_ps(rhsX, rhsY); // (lhs.x * rhs.x.x) + (lhs.y * rhs.x.y)
252 rhsZ = _mm_add_ps(rhsZ, rhsW); // (lhs.z * rhs.x.z) + (lhs.w * rhs.x.w)
253 res.x.vec4 = _mm_add_ps(rhsX, rhsZ);
254
255 // repeat for second column of rhs
256 rhsX = _mm_set_ps1(rhs.y.x);
257 rhsY = _mm_set_ps1(rhs.y.y);
258 rhsZ = _mm_set_ps1(rhs.y.z);
259 rhsW = _mm_set_ps1(rhs.y.w);
260 rhsX = _mm_mul_ps(x.vec4, rhsX);
261 rhsY = _mm_mul_ps(y.vec4, rhsY);
262 rhsZ = _mm_mul_ps(z.vec4, rhsZ);
263 rhsW = _mm_mul_ps(w.vec4, rhsW);
264 rhsX = _mm_add_ps(rhsX, rhsY);
265 rhsZ = _mm_add_ps(rhsZ, rhsW);
266 res.y.vec4 = _mm_add_ps(rhsX, rhsZ);
267
268 // repeat for third column of rhs
269 rhsX = _mm_set_ps1(rhs.z.x);
270 rhsY = _mm_set_ps1(rhs.z.y);
271 rhsZ = _mm_set_ps1(rhs.z.z);
272 rhsW = _mm_set_ps1(rhs.z.w);
273 rhsX = _mm_mul_ps(x.vec4, rhsX);
274 rhsY = _mm_mul_ps(y.vec4, rhsY);
275 rhsZ = _mm_mul_ps(z.vec4, rhsZ);
276 rhsW = _mm_mul_ps(w.vec4, rhsW);
277 rhsX = _mm_add_ps(rhsX, rhsY);
278 rhsZ = _mm_add_ps(rhsZ, rhsW);
279 res.z.vec4 = _mm_add_ps(rhsX, rhsZ);
280
281 // repeat for fourth column of rhs
282 rhsX = _mm_set_ps1(rhs.w.x);
283 rhsY = _mm_set_ps1(rhs.w.y);
284 rhsZ = _mm_set_ps1(rhs.w.z);
285 rhsW = _mm_set_ps1(rhs.w.w);
286 rhsX = _mm_mul_ps(x.vec4, rhsX);
287 rhsY = _mm_mul_ps(y.vec4, rhsY);
288 rhsZ = _mm_mul_ps(z.vec4, rhsZ);
289 rhsW = _mm_mul_ps(w.vec4, rhsW);
290 rhsX = _mm_add_ps(rhsX, rhsY);
291 rhsZ = _mm_add_ps(rhsZ, rhsW);
292 res.w.vec4 = _mm_add_ps(rhsX, rhsZ);
293 #endif
294 #elif defined(_M_ARM64) || defined(__ARM_ARCH_ISA_A64)
295 // multiply each column of lhs with rhs's first columns components
296 float32x4_t rhsC = vld1q_f32(rhs.x.data);
297 res.x.vec4 = vmovq_n_f32(0);
298 res.x.vec4 = vfmaq_laneq_f32(res.x.vec4, x.vec4, rhsC, 0); // res.x += lhs.x * rhs.x.x
299 res.x.vec4 = vfmaq_laneq_f32(res.x.vec4, y.vec4, rhsC, 1); // res.x += lhs.y * rhs.x.y
300 res.x.vec4 = vfmaq_laneq_f32(res.x.vec4, z.vec4, rhsC, 2); // res.x += lhs.z * rhs.x.z
301 res.x.vec4 = vfmaq_laneq_f32(res.x.vec4, w.vec4, rhsC, 3); // res.x += lhs.w * rhs.x.w
302
303 // repeat for second column of rhs
304 rhsC = vld1q_f32(rhs.y.data);
305 res.y.vec4 = vmovq_n_f32(0);
306 res.y.vec4 = vfmaq_laneq_f32(res.y.vec4, x.vec4, rhsC, 0);
307 res.y.vec4 = vfmaq_laneq_f32(res.y.vec4, y.vec4, rhsC, 1);
308 res.y.vec4 = vfmaq_laneq_f32(res.y.vec4, z.vec4, rhsC, 2);
309 res.y.vec4 = vfmaq_laneq_f32(res.y.vec4, w.vec4, rhsC, 3);
310
311 // repeat for third column of rhs
312 rhsC = vld1q_f32(rhs.z.data);
313 res.z.vec4 = vmovq_n_f32(0);
314 res.z.vec4 = vfmaq_laneq_f32(res.z.vec4, x.vec4, rhsC, 0);
315 res.z.vec4 = vfmaq_laneq_f32(res.z.vec4, y.vec4, rhsC, 1);
316 res.z.vec4 = vfmaq_laneq_f32(res.z.vec4, z.vec4, rhsC, 2);
317 res.z.vec4 = vfmaq_laneq_f32(res.z.vec4, w.vec4, rhsC, 3);
318
319 // repeat for fourth column of rhs
320 rhsC = vld1q_f32(rhs.w.data);
321 res.w.vec4 = vmovq_n_f32(0);
322 res.w.vec4 = vfmaq_laneq_f32(res.w.vec4, x.vec4, rhsC, 0);
323 res.w.vec4 = vfmaq_laneq_f32(res.w.vec4, y.vec4, rhsC, 1);
324 res.w.vec4 = vfmaq_laneq_f32(res.w.vec4, z.vec4, rhsC, 2);
325 res.w.vec4 = vfmaq_laneq_f32(res.w.vec4, w.vec4, rhsC, 3);
326 #else
327 #define d data
328 res.d[0] = d[0] * rhs.d[0] + d[4] * rhs.d[1] + d[8] * rhs.d[2] + d[12] * rhs.d[3];
329 res.d[4] = d[0] * rhs.d[4] + d[4] * rhs.d[5] + d[8] * rhs.d[6] + d[12] * rhs.d[7];
330 res.d[8] = d[0] * rhs.d[8] + d[4] * rhs.d[9] + d[8] * rhs.d[10] + d[12] * rhs.d[11];
331 res.d[12] = d[0] * rhs.d[12] + d[4] * rhs.d[13] + d[8] * rhs.d[14] + d[12] * rhs.d[15];
332
333 res.d[1] = d[1] * rhs.d[0] + d[5] * rhs.d[1] + d[9] * rhs.d[2] + d[13] * rhs.d[3];
334 res.d[5] = d[1] * rhs.d[4] + d[5] * rhs.d[5] + d[9] * rhs.d[6] + d[13] * rhs.d[7];
335 res.d[9] = d[1] * rhs.d[8] + d[5] * rhs.d[9] + d[9] * rhs.d[10] + d[13] * rhs.d[11];
336 res.d[13] = d[1] * rhs.d[12] + d[5] * rhs.d[13] + d[9] * rhs.d[14] + d[13] * rhs.d[15];
337
338 res.d[2] = d[2] * rhs.d[0] + d[6] * rhs.d[1] + d[10] * rhs.d[2] + d[14] * rhs.d[3];
339 res.d[6] = d[2] * rhs.d[4] + d[6] * rhs.d[5] + d[10] * rhs.d[6] + d[14] * rhs.d[7];
340 res.d[10] = d[2] * rhs.d[8] + d[6] * rhs.d[9] + d[10] * rhs.d[10] + d[14] * rhs.d[11];
341 res.d[14] = d[2] * rhs.d[12] + d[6] * rhs.d[13] + d[10] * rhs.d[14] + d[14] * rhs.d[15];
342
343 res.d[3] = d[3] * rhs.d[0] + d[7] * rhs.d[1] + d[11] * rhs.d[2] + d[15] * rhs.d[3];
344 res.d[7] = d[3] * rhs.d[4] + d[7] * rhs.d[5] + d[11] * rhs.d[6] + d[15] * rhs.d[7];
345 res.d[11] = d[3] * rhs.d[8] + d[7] * rhs.d[9] + d[11] * rhs.d[10] + d[15] * rhs.d[11];
346 res.d[15] = d[3] * rhs.d[12] + d[7] * rhs.d[13] + d[11] * rhs.d[14] + d[15] * rhs.d[15];
347 #undef d
348 #endif
349 return res;
350 }
351
352 /** Multiply columns by float scalar value */
353 inline constexpr Mat4X4 operator*(const float& scalar) const
354 {
355 return Mat4X4(x * scalar, y * scalar, z * scalar, w * scalar);
356 }
357
358 /** Equality operator, returns true if matrices are equal */
359 inline constexpr bool operator==(const Mat4X4& mat) const
360 {
361 for (size_t i = 0; i < countof(data); ++i) {
362 if (data[i] != mat.data[i]) {
363 return false;
364 }
365 }
366 return true;
367 }
368
369 /** Inequality operator, returns true if matrices are inequal */
370 inline constexpr bool operator!=(const Mat4X4& mat) const
371 {
372 for (size_t i = 0; i < countof(data); ++i) {
373 if (data[i] != mat.data[i]) {
374 return true;
375 }
376 }
377 return false;
378 }
379 };
380
381 // Assert that Mat4X4 is the same as 16 floats
382 static_assert(sizeof(Mat4X4) == 16 * sizeof(float));
383
384 static constexpr Mat4X4 IDENTITY_4X4(1.f);
385
386 /** @ingroup group_math_matrix */
387 /** Matrix 4X3 presentation in column major format */
388 class Mat4X3 final {
389 public:
390 union {
391 struct {
392 Vec3 x, y, z, w;
393 };
394 Vec3 base[4]; // base[0] is X ,base [1] is Y, etc..
395 float data[12];
396 };
397
398 // "For programming purposes, OpenGL matrices are 16-value arrays with base vectors laid out contiguously in memory.
399 // The translation components occupy the 13th, 14th, and 15th elements of the 16-element matrix."
400 // https://www.khronos.org/opengl/wiki/General_OpenGL:_Transformations#Are_OpenGL_matrices_column-major_or_row-major.3F
401 // this is also the same as with glm.
402 /** Subscript operator */
403 constexpr Vec3& operator[](size_t aIndex)
404 {
405 return base[aIndex];
406 }
407
408 /** Subscript operator */
409 constexpr const Vec3& operator[](size_t aIndex) const
410 {
411 return base[aIndex];
412 }
413
414 // Constructors
415 /** Zero initializer constructor */
416 inline constexpr Mat4X3() : data { 0 } {}
417
418 /** Constructor for Vector4's */
419 inline constexpr Mat4X3(Vec3 const& v0, Vec3 const& v1, Vec3 const& v2, Vec3 const& v3) : x(v0), y(v1), z(v2), w(v3)
420 {}
421
422 /** Constructor for array of floats */
423 inline constexpr Mat4X3(const float d[12])
424 : data { d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9], d[10], d[11] }
425 {}
426
427 /** Constructor for floats */
428 inline constexpr Mat4X3(float d0, float d1, float d2, float d3, float d4, float d5, float d6, float d7, float d8,
429 float d9, float d10, float d11)
430 : data { d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11 }
431 {}
432
433 /** Identity constructor */
434 inline explicit constexpr Mat4X3(float id)
435 : data { id, 0.0f, 0.0f, 0.0f, id, 0.0f, 0.0f, 0.0f, id, 0.0f, 0.0f, 0.0f }
436 {}
437
438 inline ~Mat4X3() = default;
439
440 /** Multiply columns by float scalar value */
441 inline constexpr Mat4X3 operator*(const float& scalar) const
442 {
443 return Mat4X3(x * scalar, y * scalar, z * scalar, w * scalar);
444 }
445
446 /** Equality operator, returns true if matrices are equal */
447 inline constexpr bool operator==(const Mat4X3& mat) const
448 {
449 for (size_t i = 0; i < countof(data); ++i) {
450 if (data[i] != mat.data[i]) {
451 return false;
452 }
453 }
454 return true;
455 }
456
457 /** Inequality operator, returns true if matrices are inequal */
458 inline constexpr bool operator!=(const Mat4X3& mat) const
459 {
460 for (size_t i = 0; i < countof(data); ++i) {
461 if (data[i] != mat.data[i]) {
462 return true;
463 }
464 }
465 return false;
466 }
467 };
468
469 // Assert that Mat4X4 is the same as 12 floats
470 static_assert(sizeof(Mat4X3) == 12 * sizeof(float));
471
472 static constexpr Mat4X3 IDENTITY_4X3(1.f);
473
474 #include <base/math/disable_warning_4201_footer.h>
475 } // namespace Math
476 BASE_END_NAMESPACE()
477
478 #endif // API_BASE_MATH_MATRIX_H
479