1 /*
2 * Copyright (c) 2021, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11 #include "av1/common/av1_common_int.h"
12 #include "av1/encoder/sparse_linear_solver.h"
13 #include "config/aom_config.h"
14 #include "aom_mem/aom_mem.h"
15 #include "av1/common/alloccommon.h"
16
17 #if CONFIG_OPTICAL_FLOW_API
18 /*
19 * Input:
20 * rows: array of row positions
21 * cols: array of column positions
22 * values: array of element values
23 * num_elem: total number of elements in the matrix
24 * num_rows: number of rows in the matrix
25 * num_cols: number of columns in the matrix
26 *
27 * Output:
28 * sm: pointer to the sparse matrix to be initialized
29 *
30 * Return: 0 - success
31 * -1 - failed
32 */
av1_init_sparse_mtx(const int * rows,const int * cols,const double * values,int num_elem,int num_rows,int num_cols,SPARSE_MTX * sm)33 int av1_init_sparse_mtx(const int *rows, const int *cols, const double *values,
34 int num_elem, int num_rows, int num_cols,
35 SPARSE_MTX *sm) {
36 sm->n_elem = num_elem;
37 sm->n_rows = num_rows;
38 sm->n_cols = num_cols;
39 if (num_elem == 0) {
40 sm->row_pos = NULL;
41 sm->col_pos = NULL;
42 sm->value = NULL;
43 return 0;
44 }
45 sm->row_pos = aom_calloc(num_elem, sizeof(*sm->row_pos));
46 sm->col_pos = aom_calloc(num_elem, sizeof(*sm->col_pos));
47 sm->value = aom_calloc(num_elem, sizeof(*sm->value));
48
49 if (!sm->row_pos || !sm->col_pos || !sm->value) {
50 av1_free_sparse_mtx_elems(sm);
51 return -1;
52 }
53
54 memcpy(sm->row_pos, rows, num_elem * sizeof(*sm->row_pos));
55 memcpy(sm->col_pos, cols, num_elem * sizeof(*sm->col_pos));
56 memcpy(sm->value, values, num_elem * sizeof(*sm->value));
57
58 return 0;
59 }
60
61 /*
62 * Combines two sparse matrices (allocating new space).
63 *
64 * Input:
65 * sm1, sm2: matrices to be combined
66 * row_offset1, row_offset2: row offset of each matrix in the new matrix
67 * col_offset1, col_offset2: column offset of each matrix in the new matrix
68 * new_n_rows, new_n_cols: number of rows and columns in the new matrix
69 *
70 * Output:
71 * sm: the combined matrix
72 *
73 * Return: 0 - success
74 * -1 - failed
75 */
av1_init_combine_sparse_mtx(const SPARSE_MTX * sm1,const SPARSE_MTX * sm2,SPARSE_MTX * sm,int row_offset1,int col_offset1,int row_offset2,int col_offset2,int new_n_rows,int new_n_cols)76 int av1_init_combine_sparse_mtx(const SPARSE_MTX *sm1, const SPARSE_MTX *sm2,
77 SPARSE_MTX *sm, int row_offset1,
78 int col_offset1, int row_offset2,
79 int col_offset2, int new_n_rows,
80 int new_n_cols) {
81 sm->n_elem = sm1->n_elem + sm2->n_elem;
82 sm->n_cols = new_n_cols;
83 sm->n_rows = new_n_rows;
84
85 if (sm->n_elem == 0) {
86 sm->row_pos = NULL;
87 sm->col_pos = NULL;
88 sm->value = NULL;
89 return 0;
90 }
91
92 sm->row_pos = aom_calloc(sm->n_elem, sizeof(*sm->row_pos));
93 sm->col_pos = aom_calloc(sm->n_elem, sizeof(*sm->col_pos));
94 sm->value = aom_calloc(sm->n_elem, sizeof(*sm->value));
95
96 if (!sm->row_pos || !sm->col_pos || !sm->value) {
97 av1_free_sparse_mtx_elems(sm);
98 return -1;
99 }
100
101 for (int i = 0; i < sm1->n_elem; i++) {
102 sm->row_pos[i] = sm1->row_pos[i] + row_offset1;
103 sm->col_pos[i] = sm1->col_pos[i] + col_offset1;
104 }
105 memcpy(sm->value, sm1->value, sm1->n_elem * sizeof(*sm1->value));
106 int n_elem1 = sm1->n_elem;
107 for (int i = 0; i < sm2->n_elem; i++) {
108 sm->row_pos[n_elem1 + i] = sm2->row_pos[i] + row_offset2;
109 sm->col_pos[n_elem1 + i] = sm2->col_pos[i] + col_offset2;
110 }
111 memcpy(sm->value + n_elem1, sm2->value, sm2->n_elem * sizeof(*sm2->value));
112 return 0;
113 }
114
av1_free_sparse_mtx_elems(SPARSE_MTX * sm)115 void av1_free_sparse_mtx_elems(SPARSE_MTX *sm) {
116 sm->n_cols = 0;
117 sm->n_rows = 0;
118 if (sm->n_elem != 0) {
119 aom_free(sm->row_pos);
120 aom_free(sm->col_pos);
121 aom_free(sm->value);
122 }
123 sm->n_elem = 0;
124 }
125
126 /*
127 * Calculate matrix and vector multiplication: A*b
128 *
129 * Input:
130 * sm: matrix A
131 * srcv: the vector b to be multiplied to
132 * dstl: the length of vectors
133 *
134 * Output:
135 * dstv: pointer to the resulting vector
136 */
av1_mtx_vect_multi_right(const SPARSE_MTX * sm,const double * srcv,double * dstv,int dstl)137 void av1_mtx_vect_multi_right(const SPARSE_MTX *sm, const double *srcv,
138 double *dstv, int dstl) {
139 memset(dstv, 0, sizeof(*dstv) * dstl);
140 for (int i = 0; i < sm->n_elem; i++) {
141 dstv[sm->row_pos[i]] += srcv[sm->col_pos[i]] * sm->value[i];
142 }
143 }
144 /*
145 * Calculate matrix and vector multiplication: b*A
146 *
147 * Input:
148 * sm: matrix A
149 * srcv: the vector b to be multiplied to
150 * dstl: the length of vectors
151 *
152 * Output:
153 * dstv: pointer to the resulting vector
154 */
av1_mtx_vect_multi_left(const SPARSE_MTX * sm,const double * srcv,double * dstv,int dstl)155 void av1_mtx_vect_multi_left(const SPARSE_MTX *sm, const double *srcv,
156 double *dstv, int dstl) {
157 memset(dstv, 0, sizeof(*dstv) * dstl);
158 for (int i = 0; i < sm->n_elem; i++) {
159 dstv[sm->col_pos[i]] += srcv[sm->row_pos[i]] * sm->value[i];
160 }
161 }
162
163 /*
164 * Calculate inner product of two vectors
165 *
166 * Input:
167 * src1, scr2: the vectors to be multiplied
168 * src1l: length of the vectors
169 *
170 * Output:
171 * the inner product
172 */
av1_vect_vect_multi(const double * src1,int src1l,const double * src2)173 double av1_vect_vect_multi(const double *src1, int src1l, const double *src2) {
174 double result = 0;
175 for (int i = 0; i < src1l; i++) {
176 result += src1[i] * src2[i];
177 }
178 return result;
179 }
180
181 /*
182 * Multiply each element in the matrix sm with a constant c
183 */
av1_constant_multiply_sparse_matrix(SPARSE_MTX * sm,double c)184 void av1_constant_multiply_sparse_matrix(SPARSE_MTX *sm, double c) {
185 for (int i = 0; i < sm->n_elem; i++) {
186 sm->value[i] *= c;
187 }
188 }
189
free_solver_local_buf(double * buf1,double * buf2,double * buf3,double * buf4,double * buf5,double * buf6,double * buf7)190 static INLINE void free_solver_local_buf(double *buf1, double *buf2,
191 double *buf3, double *buf4,
192 double *buf5, double *buf6,
193 double *buf7) {
194 aom_free(buf1);
195 aom_free(buf2);
196 aom_free(buf3);
197 aom_free(buf4);
198 aom_free(buf5);
199 aom_free(buf6);
200 aom_free(buf7);
201 }
202
203 /*
204 * Solve for Ax = b
205 * no requirement on A
206 *
207 * Input:
208 * A: the sparse matrix
209 * b: the vector b
210 * bl: length of b
211 * x: the vector x
212 *
213 * Output:
214 * x: pointer to the solution vector
215 *
216 * Return: 0 - success
217 * -1 - failed
218 */
av1_bi_conjugate_gradient_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)219 int av1_bi_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b,
220 int bl, double *x) {
221 double *r = NULL, *r_hat = NULL, *p = NULL, *p_hat = NULL, *Ap = NULL,
222 *p_hatA = NULL, *x_hat = NULL;
223 double alpha, beta, rtr, r_norm_2;
224 double denormtemp;
225
226 // initialize
227 r = aom_calloc(bl, sizeof(*r));
228 r_hat = aom_calloc(bl, sizeof(*r_hat));
229 p = aom_calloc(bl, sizeof(*p));
230 p_hat = aom_calloc(bl, sizeof(*p_hat));
231 Ap = aom_calloc(bl, sizeof(*Ap));
232 p_hatA = aom_calloc(bl, sizeof(*p_hatA));
233 x_hat = aom_calloc(bl, sizeof(*x_hat));
234 if (!r || !r_hat || !p || !p_hat || !Ap || !p_hatA || !x_hat) {
235 free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
236 return -1;
237 }
238
239 int i;
240 for (i = 0; i < bl; i++) {
241 r[i] = b[i];
242 r_hat[i] = b[i];
243 p[i] = r[i];
244 p_hat[i] = r_hat[i];
245 x[i] = 0;
246 x_hat[i] = 0;
247 }
248 r_norm_2 = av1_vect_vect_multi(r_hat, bl, r);
249 for (int k = 0; k < MAX_CG_SP_ITER; k++) {
250 rtr = r_norm_2;
251 av1_mtx_vect_multi_right(A, p, Ap, bl);
252 av1_mtx_vect_multi_left(A, p_hat, p_hatA, bl);
253
254 denormtemp = av1_vect_vect_multi(p_hat, bl, Ap);
255 if (denormtemp < 1e-10) break;
256 alpha = rtr / denormtemp;
257 r_norm_2 = 0;
258 for (i = 0; i < bl; i++) {
259 x[i] += alpha * p[i];
260 x_hat[i] += alpha * p_hat[i];
261 r[i] -= alpha * Ap[i];
262 r_hat[i] -= alpha * p_hatA[i];
263 r_norm_2 += r_hat[i] * r[i];
264 }
265 if (sqrt(r_norm_2) < 1e-2) {
266 break;
267 }
268 if (rtr < 1e-10) break;
269 beta = r_norm_2 / rtr;
270 for (i = 0; i < bl; i++) {
271 p[i] = r[i] + beta * p[i];
272 p_hat[i] = r_hat[i] + beta * p_hat[i];
273 }
274 }
275 // free
276 free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
277 return 0;
278 }
279
280 /*
281 * Solve for Ax = b when A is symmetric and positive definite
282 *
283 * Input:
284 * A: the sparse matrix
285 * b: the vector b
286 * bl: length of b
287 * x: the vector x
288 *
289 * Output:
290 * x: pointer to the solution vector
291 *
292 * Return: 0 - success
293 * -1 - failed
294 */
av1_conjugate_gradient_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)295 int av1_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b, int bl,
296 double *x) {
297 double *r = NULL, *p = NULL, *Ap = NULL;
298 double alpha, beta, rtr, r_norm_2;
299 double denormtemp;
300
301 // initialize
302 r = aom_calloc(bl, sizeof(*r));
303 p = aom_calloc(bl, sizeof(*p));
304 Ap = aom_calloc(bl, sizeof(*Ap));
305 if (!r || !p || !Ap) {
306 free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);
307 return -1;
308 }
309
310 int i;
311 for (i = 0; i < bl; i++) {
312 r[i] = b[i];
313 p[i] = r[i];
314 x[i] = 0;
315 }
316 r_norm_2 = av1_vect_vect_multi(r, bl, r);
317 int k;
318 for (k = 0; k < MAX_CG_SP_ITER; k++) {
319 rtr = r_norm_2;
320 av1_mtx_vect_multi_right(A, p, Ap, bl);
321 denormtemp = av1_vect_vect_multi(p, bl, Ap);
322 if (denormtemp < 1e-10) break;
323 alpha = rtr / denormtemp;
324 r_norm_2 = 0;
325 for (i = 0; i < bl; i++) {
326 x[i] += alpha * p[i];
327 r[i] -= alpha * Ap[i];
328 r_norm_2 += r[i] * r[i];
329 }
330 if (r_norm_2 < 1e-8 * bl) break;
331 if (rtr < 1e-10) break;
332 beta = r_norm_2 / rtr;
333 for (i = 0; i < bl; i++) {
334 p[i] = r[i] + beta * p[i];
335 }
336 }
337 // free
338 free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);
339
340 return 0;
341 }
342
343 /*
344 * Solve for Ax = b using Jacobi method
345 *
346 * Input:
347 * A: the sparse matrix
348 * b: the vector b
349 * bl: length of b
350 * x: the vector x
351 *
352 * Output:
353 * x: pointer to the solution vector
354 *
355 * Return: 0 - success
356 * -1 - failed
357 */
av1_jacobi_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)358 int av1_jacobi_sparse(const SPARSE_MTX *A, const double *b, int bl, double *x) {
359 double *diags = NULL, *Rx = NULL, *x_last = NULL, *x_cur = NULL,
360 *tempx = NULL;
361 double resi2;
362
363 diags = aom_calloc(bl, sizeof(*diags));
364 Rx = aom_calloc(bl, sizeof(*Rx));
365 x_last = aom_calloc(bl, sizeof(*x_last));
366 x_cur = aom_calloc(bl, sizeof(*x_cur));
367
368 if (!diags || !Rx || !x_last || !x_cur) {
369 free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
370 return -1;
371 }
372
373 int i;
374 memset(x_last, 0, sizeof(*x_last) * bl);
375 // get the diagonals of A
376 memset(diags, 0, sizeof(*diags) * bl);
377 for (int c = 0; c < A->n_elem; c++) {
378 if (A->row_pos[c] != A->col_pos[c]) continue;
379 diags[A->row_pos[c]] = A->value[c];
380 }
381 int k;
382 for (k = 0; k < MAX_CG_SP_ITER; k++) {
383 // R = A - diag(diags)
384 // get R*x_last
385 memset(Rx, 0, sizeof(*Rx) * bl);
386 for (int c = 0; c < A->n_elem; c++) {
387 if (A->row_pos[c] == A->col_pos[c]) continue;
388 Rx[A->row_pos[c]] += x_last[A->col_pos[c]] * A->value[c];
389 }
390 resi2 = 0;
391 for (i = 0; i < bl; i++) {
392 x_cur[i] = (b[i] - Rx[i]) / diags[i];
393 resi2 += (x_last[i] - x_cur[i]) * (x_last[i] - x_cur[i]);
394 }
395 if (resi2 <= 1e-10 * bl) break;
396 // swap last & cur buffer ptrs
397 tempx = x_last;
398 x_last = x_cur;
399 x_cur = tempx;
400 }
401 printf("\n numiter: %d\n", k);
402 for (i = 0; i < bl; i++) {
403 x[i] = x_cur[i];
404 }
405 free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
406 return 0;
407 }
408
409 /*
410 * Solve for Ax = b using Steepest descent method
411 *
412 * Input:
413 * A: the sparse matrix
414 * b: the vector b
415 * bl: length of b
416 * x: the vector x
417 *
418 * Output:
419 * x: pointer to the solution vector
420 *
421 * Return: 0 - success
422 * -1 - failed
423 */
av1_steepest_descent_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)424 int av1_steepest_descent_sparse(const SPARSE_MTX *A, const double *b, int bl,
425 double *x) {
426 double *d = NULL, *Ad = NULL, *Ax = NULL;
427 double resi2, resi2_last, dAd, temp;
428
429 d = aom_calloc(bl, sizeof(*d));
430 Ax = aom_calloc(bl, sizeof(*Ax));
431 Ad = aom_calloc(bl, sizeof(*Ad));
432
433 if (!d || !Ax || !Ad) {
434 free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);
435 return -1;
436 }
437
438 int i;
439 // initialize with 0s
440 resi2 = 0;
441 for (i = 0; i < bl; i++) {
442 x[i] = 0;
443 d[i] = b[i];
444 resi2 += d[i] * d[i] / bl;
445 }
446 int k;
447 for (k = 0; k < MAX_CG_SP_ITER; k++) {
448 // get A*x_last
449 av1_mtx_vect_multi_right(A, d, Ad, bl);
450 dAd = resi2 * bl / av1_vect_vect_multi(d, bl, Ad);
451 for (i = 0; i < bl; i++) {
452 temp = dAd * d[i];
453 x[i] = x[i] + temp;
454 }
455 av1_mtx_vect_multi_right(A, x, Ax, bl);
456 resi2_last = resi2;
457 resi2 = 0;
458 for (i = 0; i < bl; i++) {
459 d[i] = b[i] - Ax[i];
460 resi2 += d[i] * d[i] / bl;
461 }
462 if (resi2 <= 1e-8) break;
463 if (resi2_last - resi2 < 1e-8) {
464 break;
465 }
466 }
467 free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);
468
469 return 0;
470 }
471
472 #endif // CONFIG_OPTICAL_FLOW_API
473