1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "common/math/mat.h"
18
19 #include <assert.h>
20 #include <float.h>
21
22 #ifdef _OS_BUILD_
23 #include <nanohub_math.h>
24 #include <seos.h>
25 #else
26 #include <math.h>
27 #ifndef UNROLLED
28 #define UNROLLED
29 #endif
30 #endif // _OS_BUILD_
31
32 #include <stddef.h>
33 #include <string.h>
34
35 #define EPSILON 1E-5f
36 #define CHOLESKY_TOLERANCE 1E-6f
37
38 // Forward declarations.
39 static void mat33SwapRows(struct Mat33 *A, uint32_t i, uint32_t j);
40 static uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k);
41 static void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k,
42 uint32_t l, uint32_t i, uint32_t j);
43
44 static void mat44SwapRows(struct Mat44 *A, uint32_t i, uint32_t j);
45
initZeroMatrix(struct Mat33 * A)46 void initZeroMatrix(struct Mat33 *A) {
47 ASSERT_NOT_NULL(A);
48 memset(A->elem, 0.0f, sizeof(A->elem));
49 }
50
51 UNROLLED
initDiagonalMatrix(struct Mat33 * A,float x)52 void initDiagonalMatrix(struct Mat33 *A, float x) {
53 ASSERT_NOT_NULL(A);
54 initZeroMatrix(A);
55
56 uint32_t i;
57 for (i = 0; i < 3; ++i) {
58 A->elem[i][i] = x;
59 }
60 }
61
initMatrixColumns(struct Mat33 * A,const struct Vec3 * v1,const struct Vec3 * v2,const struct Vec3 * v3)62 void initMatrixColumns(struct Mat33 *A, const struct Vec3 *v1,
63 const struct Vec3 *v2, const struct Vec3 *v3) {
64 ASSERT_NOT_NULL(A);
65 ASSERT_NOT_NULL(v1);
66 ASSERT_NOT_NULL(v2);
67 ASSERT_NOT_NULL(v3);
68 A->elem[0][0] = v1->x;
69 A->elem[0][1] = v2->x;
70 A->elem[0][2] = v3->x;
71
72 A->elem[1][0] = v1->y;
73 A->elem[1][1] = v2->y;
74 A->elem[1][2] = v3->y;
75
76 A->elem[2][0] = v1->z;
77 A->elem[2][1] = v2->z;
78 A->elem[2][2] = v3->z;
79 }
80
mat33Apply(struct Vec3 * out,const struct Mat33 * A,const struct Vec3 * v)81 void mat33Apply(struct Vec3 *out, const struct Mat33 *A, const struct Vec3 *v) {
82 ASSERT_NOT_NULL(out);
83 ASSERT_NOT_NULL(A);
84 ASSERT_NOT_NULL(v);
85 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z;
86 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z;
87 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z;
88 }
89
90 UNROLLED
mat33Multiply(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)91 void mat33Multiply(struct Mat33 *out, const struct Mat33 *A,
92 const struct Mat33 *B) {
93 ASSERT_NOT_NULL(out);
94 ASSERT_NOT_NULL(A);
95 ASSERT_NOT_NULL(B);
96 ASSERT(out != A);
97 ASSERT(out != B);
98
99 uint32_t i;
100 for (i = 0; i < 3; ++i) {
101 uint32_t j;
102 for (j = 0; j < 3; ++j) {
103 uint32_t k;
104 float sum = 0.0f;
105 for (k = 0; k < 3; ++k) {
106 sum += A->elem[i][k] * B->elem[k][j];
107 }
108
109 out->elem[i][j] = sum;
110 }
111 }
112 }
113
114 UNROLLED
mat33ScalarMul(struct Mat33 * A,float c)115 void mat33ScalarMul(struct Mat33 *A, float c) {
116 ASSERT_NOT_NULL(A);
117 uint32_t i;
118 for (i = 0; i < 3; ++i) {
119 uint32_t j;
120 for (j = 0; j < 3; ++j) {
121 A->elem[i][j] *= c;
122 }
123 }
124 }
125
126 UNROLLED
mat33Add(struct Mat33 * out,const struct Mat33 * A)127 void mat33Add(struct Mat33 *out, const struct Mat33 *A) {
128 ASSERT_NOT_NULL(out);
129 ASSERT_NOT_NULL(A);
130 uint32_t i;
131 for (i = 0; i < 3; ++i) {
132 uint32_t j;
133 for (j = 0; j < 3; ++j) {
134 out->elem[i][j] += A->elem[i][j];
135 }
136 }
137 }
138
139 UNROLLED
mat33Sub(struct Mat33 * out,const struct Mat33 * A)140 void mat33Sub(struct Mat33 *out, const struct Mat33 *A) {
141 ASSERT_NOT_NULL(out);
142 ASSERT_NOT_NULL(A);
143 uint32_t i;
144 for (i = 0; i < 3; ++i) {
145 uint32_t j;
146 for (j = 0; j < 3; ++j) {
147 out->elem[i][j] -= A->elem[i][j];
148 }
149 }
150 }
151
152 UNROLLED
mat33IsPositiveSemidefinite(const struct Mat33 * A,float tolerance)153 int mat33IsPositiveSemidefinite(const struct Mat33 *A, float tolerance) {
154 ASSERT_NOT_NULL(A);
155 uint32_t i;
156 for (i = 0; i < 3; ++i) {
157 if (A->elem[i][i] < 0.0f) {
158 return 0;
159 }
160 }
161
162 for (i = 0; i < 3; ++i) {
163 uint32_t j;
164 for (j = i + 1; j < 3; ++j) {
165 if (fabsf(A->elem[i][j] - A->elem[j][i]) > tolerance) {
166 return 0;
167 }
168 }
169 }
170
171 return 1;
172 }
173
174 UNROLLED
mat33MultiplyTransposed(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)175 void mat33MultiplyTransposed(struct Mat33 *out, const struct Mat33 *A,
176 const struct Mat33 *B) {
177 ASSERT(out != A);
178 ASSERT(out != B);
179 ASSERT_NOT_NULL(out);
180 ASSERT_NOT_NULL(A);
181 ASSERT_NOT_NULL(B);
182 uint32_t i;
183 for (i = 0; i < 3; ++i) {
184 uint32_t j;
185 for (j = 0; j < 3; ++j) {
186 uint32_t k;
187 float sum = 0.0f;
188 for (k = 0; k < 3; ++k) {
189 sum += A->elem[k][i] * B->elem[k][j];
190 }
191
192 out->elem[i][j] = sum;
193 }
194 }
195 }
196
197 UNROLLED
mat33MultiplyTransposed2(struct Mat33 * out,const struct Mat33 * A,const struct Mat33 * B)198 void mat33MultiplyTransposed2(struct Mat33 *out, const struct Mat33 *A,
199 const struct Mat33 *B) {
200 ASSERT(out != A);
201 ASSERT(out != B);
202 ASSERT_NOT_NULL(out);
203 ASSERT_NOT_NULL(A);
204 ASSERT_NOT_NULL(B);
205 uint32_t i;
206 for (i = 0; i < 3; ++i) {
207 uint32_t j;
208 for (j = 0; j < 3; ++j) {
209 uint32_t k;
210 float sum = 0.0f;
211 for (k = 0; k < 3; ++k) {
212 sum += A->elem[i][k] * B->elem[j][k];
213 }
214
215 out->elem[i][j] = sum;
216 }
217 }
218 }
219
220 UNROLLED
mat33Invert(struct Mat33 * out,const struct Mat33 * A)221 void mat33Invert(struct Mat33 *out, const struct Mat33 *A) {
222 ASSERT_NOT_NULL(out);
223 ASSERT_NOT_NULL(A);
224 float t;
225 initDiagonalMatrix(out, 1.0f);
226
227 struct Mat33 tmp = *A;
228
229 uint32_t i, k;
230 for (i = 0; i < 3; ++i) {
231 uint32_t swap = i;
232 uint32_t j;
233 for (j = i + 1; j < 3; ++j) {
234 if (fabsf(tmp.elem[j][i]) > fabsf(tmp.elem[i][i])) {
235 swap = j;
236 }
237 }
238
239 if (swap != i) {
240 for (k = 0; k < 3; ++k) {
241 t = tmp.elem[i][k];
242 tmp.elem[i][k] = tmp.elem[swap][k];
243 tmp.elem[swap][k] = t;
244
245 t = out->elem[i][k];
246 out->elem[i][k] = out->elem[swap][k];
247 out->elem[swap][k] = t;
248 }
249 }
250 // divide by zero guard.
251 ASSERT(fabsf(tmp.elem[i][i]) > 0);
252 if(!(fabsf(tmp.elem[i][i]) > 0)) {
253 return;
254 }
255 t = 1.0f / tmp.elem[i][i];
256 for (k = 0; k < 3; ++k) {
257 tmp.elem[i][k] *= t;
258 out->elem[i][k] *= t;
259 }
260
261 for (j = 0; j < 3; ++j) {
262 if (j != i) {
263 t = tmp.elem[j][i];
264 for (k = 0; k < 3; ++k) {
265 tmp.elem[j][k] -= tmp.elem[i][k] * t;
266 out->elem[j][k] -= out->elem[i][k] * t;
267 }
268 }
269 }
270 }
271 }
272
273 UNROLLED
mat33Transpose(struct Mat33 * out,const struct Mat33 * A)274 void mat33Transpose(struct Mat33 *out, const struct Mat33 *A) {
275 ASSERT_NOT_NULL(out);
276 ASSERT_NOT_NULL(A);
277 uint32_t i;
278 for (i = 0; i < 3; ++i) {
279 uint32_t j;
280 for (j = 0; j < 3; ++j) {
281 out->elem[i][j] = A->elem[j][i];
282 }
283 }
284 }
285
286 UNROLLED
mat33SwapRows(struct Mat33 * A,const uint32_t i,const uint32_t j)287 void mat33SwapRows(struct Mat33 *A, const uint32_t i, const uint32_t j) {
288 ASSERT_NOT_NULL(A);
289 const uint32_t N = 3;
290 uint32_t k;
291
292 if (i == j) {
293 return;
294 }
295
296 for (k = 0; k < N; ++k) {
297 float tmp = A->elem[i][k];
298 A->elem[i][k] = A->elem[j][k];
299 A->elem[j][k] = tmp;
300 }
301 }
302
303 UNROLLED
mat33GetEigenbasis(struct Mat33 * S,struct Vec3 * eigenvals,struct Mat33 * eigenvecs)304 void mat33GetEigenbasis(struct Mat33 *S, struct Vec3 *eigenvals,
305 struct Mat33 *eigenvecs) {
306 ASSERT_NOT_NULL(S);
307 ASSERT_NOT_NULL(eigenvals);
308 ASSERT_NOT_NULL(eigenvecs);
309 const uint32_t N = 3;
310 uint32_t i, j, k, l, m;
311
312 float _eigenvals[N];
313
314 uint32_t ind[N];
315 for (k = 0; k < N; ++k) {
316 ind[k] = mat33Maxind(S, k);
317 _eigenvals[k] = S->elem[k][k];
318 }
319
320 initDiagonalMatrix(eigenvecs, 1.0f);
321
322 for (;;) {
323 m = 0;
324 for (k = 1; k + 1 < N; ++k) {
325 if (fabsf(S->elem[k][ind[k]]) > fabsf(S->elem[m][ind[m]])) {
326 m = k;
327 }
328 }
329
330 k = m;
331 l = ind[m];
332 float p = S->elem[k][l];
333
334 if (fabsf(p) < EPSILON) {
335 break;
336 }
337
338 float y = (_eigenvals[l] - _eigenvals[k]) * 0.5f;
339
340 float t = fabsf(y) + sqrtf(p * p + y * y);
341 float s = sqrtf(p * p + t * t);
342 float c = t / s;
343 s = p / s;
344 t = p * p / t;
345
346 if (y < 0.0f) {
347 s = -s;
348 t = -t;
349 }
350
351 S->elem[k][l] = 0.0f;
352
353 _eigenvals[k] -= t;
354 _eigenvals[l] += t;
355
356 for (i = 0; i < k; ++i) {
357 mat33Rotate(S, c, s, i, k, i, l);
358 }
359
360 for (i = k + 1; i < l; ++i) {
361 mat33Rotate(S, c, s, k, i, i, l);
362 }
363
364 for (i = l + 1; i < N; ++i) {
365 mat33Rotate(S, c, s, k, i, l, i);
366 }
367
368 for (i = 0; i < N; ++i) {
369 float tmp = c * eigenvecs->elem[k][i] - s * eigenvecs->elem[l][i];
370 eigenvecs->elem[l][i] =
371 s * eigenvecs->elem[k][i] + c * eigenvecs->elem[l][i];
372 eigenvecs->elem[k][i] = tmp;
373 }
374
375 ind[k] = mat33Maxind(S, k);
376 ind[l] = mat33Maxind(S, l);
377
378 float sum = 0.0f;
379 for (i = 0; i < N; ++i) {
380 for (j = i + 1; j < N; ++j) {
381 sum += fabsf(S->elem[i][j]);
382 }
383 }
384
385 if (sum < EPSILON) {
386 break;
387 }
388 }
389
390 for (k = 0; k < N; ++k) {
391 m = k;
392 for (l = k + 1; l < N; ++l) {
393 if (_eigenvals[l] > _eigenvals[m]) {
394 m = l;
395 }
396 }
397
398 if (k != m) {
399 float tmp = _eigenvals[k];
400 _eigenvals[k] = _eigenvals[m];
401 _eigenvals[m] = tmp;
402
403 mat33SwapRows(eigenvecs, k, m);
404 }
405 }
406
407 initVec3(eigenvals, _eigenvals[0], _eigenvals[1], _eigenvals[2]);
408 }
409
mat33Determinant(const struct Mat33 * A)410 float mat33Determinant(const struct Mat33 *A) {
411 ASSERT_NOT_NULL(A);
412 return A->elem[0][0] *
413 (A->elem[1][1] * A->elem[2][2] - A->elem[1][2] * A->elem[2][1])
414 - A->elem[0][1] *
415 (A->elem[1][0] * A->elem[2][2] - A->elem[1][2] * A->elem[2][0])
416 + A->elem[0][2] *
417 (A->elem[1][0] * A->elem[2][1] - A->elem[1][1] * A->elem[2][0]);
418 }
419
420 // index of largest off-diagonal element in row k
421 UNROLLED
mat33Maxind(const struct Mat33 * A,uint32_t k)422 uint32_t mat33Maxind(const struct Mat33 *A, uint32_t k) {
423 ASSERT_NOT_NULL(A);
424 const uint32_t N = 3;
425
426 uint32_t m = k + 1;
427 uint32_t i;
428
429 for (i = k + 2; i < N; ++i) {
430 if (fabsf(A->elem[k][i]) > fabsf(A->elem[k][m])) {
431 m = i;
432 }
433 }
434
435 return m;
436 }
437
mat33Rotate(struct Mat33 * A,float c,float s,uint32_t k,uint32_t l,uint32_t i,uint32_t j)438 void mat33Rotate(struct Mat33 *A, float c, float s, uint32_t k, uint32_t l,
439 uint32_t i, uint32_t j) {
440 ASSERT_NOT_NULL(A);
441 float tmp = c * A->elem[k][l] - s * A->elem[i][j];
442 A->elem[i][j] = s * A->elem[k][l] + c * A->elem[i][j];
443 A->elem[k][l] = tmp;
444 }
445
mat44Apply(struct Vec4 * out,const struct Mat44 * A,const struct Vec4 * v)446 void mat44Apply(struct Vec4 *out, const struct Mat44 *A, const struct Vec4 *v) {
447 ASSERT_NOT_NULL(out);
448 ASSERT_NOT_NULL(A);
449 ASSERT_NOT_NULL(v);
450
451 out->x = A->elem[0][0] * v->x + A->elem[0][1] * v->y + A->elem[0][2] * v->z +
452 A->elem[0][3] * v->w;
453
454 out->y = A->elem[1][0] * v->x + A->elem[1][1] * v->y + A->elem[1][2] * v->z +
455 A->elem[1][3] * v->w;
456
457 out->z = A->elem[2][0] * v->x + A->elem[2][1] * v->y + A->elem[2][2] * v->z +
458 A->elem[2][3] * v->w;
459
460 out->w = A->elem[3][0] * v->x + A->elem[3][1] * v->y + A->elem[3][2] * v->z +
461 A->elem[3][3] * v->w;
462 }
463
464 UNROLLED
mat44DecomposeLup(struct Mat44 * LU,struct Size4 * pivot)465 void mat44DecomposeLup(struct Mat44 *LU, struct Size4 *pivot) {
466 ASSERT_NOT_NULL(LU);
467 ASSERT_NOT_NULL(pivot);
468 const uint32_t N = 4;
469 uint32_t i, j, k;
470
471 for (k = 0; k < N; ++k) {
472 pivot->elem[k] = k;
473 float max = fabsf(LU->elem[k][k]);
474 for (j = k + 1; j < N; ++j) {
475 if (max < fabsf(LU->elem[j][k])) {
476 max = fabsf(LU->elem[j][k]);
477 pivot->elem[k] = j;
478 }
479 }
480
481 if (pivot->elem[k] != k) {
482 mat44SwapRows(LU, k, pivot->elem[k]);
483 }
484
485 if (fabsf(LU->elem[k][k]) < EPSILON) {
486 continue;
487 }
488
489 for (j = k + 1; j < N; ++j) {
490 LU->elem[k][j] /= LU->elem[k][k];
491 }
492
493 for (i = k + 1; i < N; ++i) {
494 for (j = k + 1; j < N; ++j) {
495 LU->elem[i][j] -= LU->elem[i][k] * LU->elem[k][j];
496 }
497 }
498 }
499 }
500
501 UNROLLED
mat44SwapRows(struct Mat44 * A,const uint32_t i,const uint32_t j)502 void mat44SwapRows(struct Mat44 *A, const uint32_t i, const uint32_t j) {
503 ASSERT_NOT_NULL(A);
504 const uint32_t N = 4;
505 uint32_t k;
506
507 if (i == j) {
508 return;
509 }
510
511 for (k = 0; k < N; ++k) {
512 float tmp = A->elem[i][k];
513 A->elem[i][k] = A->elem[j][k];
514 A->elem[j][k] = tmp;
515 }
516 }
517
518 UNROLLED
mat44Solve(const struct Mat44 * A,struct Vec4 * x,const struct Vec4 * b,const struct Size4 * pivot)519 void mat44Solve(const struct Mat44 *A, struct Vec4 *x, const struct Vec4 *b,
520 const struct Size4 *pivot) {
521 ASSERT_NOT_NULL(A);
522 ASSERT_NOT_NULL(x);
523 ASSERT_NOT_NULL(b);
524 ASSERT_NOT_NULL(pivot);
525 const uint32_t N = 4;
526 uint32_t i, k;
527
528 float bCopy[N];
529 bCopy[0] = b->x;
530 bCopy[1] = b->y;
531 bCopy[2] = b->z;
532 bCopy[3] = b->w;
533
534 float _x[N];
535 for (k = 0; k < N; ++k) {
536 if (pivot->elem[k] != k) {
537 float tmp = bCopy[k];
538 bCopy[k] = bCopy[pivot->elem[k]];
539 bCopy[pivot->elem[k]] = tmp;
540 }
541
542 _x[k] = bCopy[k];
543 for (i = 0; i < k; ++i) {
544 _x[k] -= _x[i] * A->elem[k][i];
545 }
546 _x[k] /= A->elem[k][k];
547 }
548
549 for (k = N; k-- > 0;) {
550 for (i = k + 1; i < N; ++i) {
551 _x[k] -= _x[i] * A->elem[k][i];
552 }
553 }
554
555 initVec4(x, _x[0], _x[1], _x[2], _x[3]);
556 }
557
matMaxDiagonalElement(const float * square_mat,size_t n)558 float matMaxDiagonalElement(const float *square_mat, size_t n) {
559 ASSERT_NOT_NULL(square_mat);
560 ASSERT(n > 0);
561 size_t i;
562 float max = square_mat[0];
563 const size_t n_square = n * n;
564 const size_t offset = n + 1;
565 for (i = offset; i < n_square; i += offset) {
566 if (square_mat[i] > max) {
567 max = square_mat[i];
568 }
569 }
570 return max;
571 }
572
matAddConstantDiagonal(float * square_mat,float u,size_t n)573 void matAddConstantDiagonal(float *square_mat, float u, size_t n) {
574 ASSERT_NOT_NULL(square_mat);
575 size_t i;
576 const size_t n_square = n * n;
577 const size_t offset = n + 1;
578 for (i = 0; i < n_square; i += offset) {
579 square_mat[i] += u;
580 }
581 }
582
matTransposeMultiplyMat(float * out,const float * A,size_t nrows,size_t ncols)583 void matTransposeMultiplyMat(float *out, const float *A,
584 size_t nrows, size_t ncols) {
585 ASSERT_NOT_NULL(out);
586 ASSERT_NOT_NULL(A);
587 size_t i;
588 size_t j;
589 size_t k;
590 memset(out, 0, sizeof(float) * ncols * ncols);
591 for (i = 0; i < ncols; ++i) {
592 for (j = 0; j < ncols; ++j) {
593 // Since A' * A is symmetric, can use upper diagonal elements
594 // to fill in the lower diagonal without recomputing.
595 if (j < i) {
596 out[i * ncols + j] = out[j * ncols + i];
597 } else {
598 // mat_out[i, j] = ai ' * aj
599 out[i * ncols + j] = 0;
600 for (k = 0; k < nrows; ++k) {
601 out[i * ncols + j] += A[k * ncols + i] *
602 A[k * ncols + j];
603 }
604 }
605 }
606 }
607 }
608
matMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)609 void matMultiplyVec(float *out, const float *A, const float *v,
610 size_t nrows, size_t ncols) {
611 ASSERT_NOT_NULL(out);
612 ASSERT_NOT_NULL(A);
613 ASSERT_NOT_NULL(v);
614 size_t i;
615 for (i = 0; i < nrows; ++i) {
616 const float *row = &A[i * ncols];
617 out[i] = vecDot(row, v, ncols);
618 }
619 }
620
matTransposeMultiplyVec(float * out,const float * A,const float * v,size_t nrows,size_t ncols)621 void matTransposeMultiplyVec(float *out, const float *A, const float *v,
622 size_t nrows, size_t ncols) {
623 ASSERT_NOT_NULL(out);
624 ASSERT_NOT_NULL(A);
625 ASSERT_NOT_NULL(v);
626 size_t i, j;
627 for (i = 0; i < ncols; ++i) {
628 out[i] = 0;
629 for (j = 0; j < nrows; ++j) {
630 out[i] += A[j * ncols + i] * v[j];
631 }
632 }
633 }
634
matLinearSolveCholesky(float * x,const float * L,const float * b,size_t n)635 bool matLinearSolveCholesky(float *x, const float *L, const float *b, size_t n) {
636 ASSERT_NOT_NULL(x);
637 ASSERT_NOT_NULL(L);
638 ASSERT_NOT_NULL(b);
639 ASSERT(n <= INT32_MAX);
640 int32_t i, j; // Loops below require signed integers.
641 int32_t s_n = (int32_t)n; // Signed n.
642 float sum = 0.0f;
643 // 1. Solve Ly = b through forward substitution. Use x[] to store y.
644 for (i = 0; i < s_n; ++i) {
645 sum = 0.0f;
646 for (j = 0; j < i; ++j) {
647 sum += L[i * s_n + j] * x[j];
648 }
649 // Check for non-zero diagonals (don't divide by zero).
650 if (L[i * s_n + i] < EPSILON) {
651 return false;
652 }
653 x[i] = (b[i] - sum) / L[i * s_n + i];
654 }
655
656 // 2. Solve L'x = y through backwards substitution. Use x[] to store both
657 // y and x.
658 for (i = s_n - 1; i >= 0; --i) {
659 sum = 0.0f;
660 for (j = i + 1; j < s_n; ++j) {
661 sum += L[j * s_n + i] * x[j];
662 }
663 x[i] = (x[i] - sum) / L[i * s_n + i];
664 }
665
666 return true;
667 }
668
matCholeskyDecomposition(float * L,const float * A,size_t n)669 bool matCholeskyDecomposition(float *L, const float *A, size_t n) {
670 ASSERT_NOT_NULL(L);
671 ASSERT_NOT_NULL(A);
672 size_t i, j, k;
673 float sum = 0.0f;
674 // initialize L to zero.
675 memset(L, 0, sizeof(float) * n * n);
676
677 for (i = 0; i < n; ++i) {
678 // compute L[i][i] = sqrt(A[i][i] - sum_k = 1:i-1 L^2[i][k])
679 sum = 0.0f;
680 for (k = 0; k < i; ++k) {
681 sum += L[i * n + k] * L[i * n + k];
682 }
683 sum = A[i * n + i] - sum;
684 // If diagonal element of L is too small, cholesky fails.
685 if (sum < CHOLESKY_TOLERANCE) {
686 return false;
687 }
688 L[i * n + i] = sqrtf(sum);
689
690 // for j = i+1:N, compute L[j][i] =
691 // (1/L[i][i]) * (A[i][j] - sum_k = 1:i-1 L[i][k] * L[j][k])
692 for (j = i + 1; j < n; ++j) {
693 sum = 0.0f;
694 for (k = 0; k < i; ++k) {
695 sum += L[i * n + k] * L[j * n + k];
696 }
697 // division okay because magnitude of L[i][i] already checked above.
698 L[j * n + i] = (A[i * n + j] - sum) / L[i * n + i];
699 }
700 }
701
702 return true;
703 }
704