• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 #include "av1/common/av1_common_int.h"
12 #include "av1/encoder/sparse_linear_solver.h"
13 #include "config/aom_config.h"
14 #include "aom_mem/aom_mem.h"
15 #include "av1/common/alloccommon.h"
16 
17 #if CONFIG_OPTICAL_FLOW_API
18 /*
19  * Input:
20  * rows: array of row positions
21  * cols: array of column positions
22  * values: array of element values
23  * num_elem: total number of elements in the matrix
24  * num_rows: number of rows in the matrix
25  * num_cols: number of columns in the matrix
26  *
27  * Output:
28  * sm: pointer to the sparse matrix to be initialized
29  *
30  * Return: 0  - success
31  *         -1 - failed
32  */
av1_init_sparse_mtx(const int * rows,const int * cols,const double * values,int num_elem,int num_rows,int num_cols,SPARSE_MTX * sm)33 int av1_init_sparse_mtx(const int *rows, const int *cols, const double *values,
34                         int num_elem, int num_rows, int num_cols,
35                         SPARSE_MTX *sm) {
36   sm->n_elem = num_elem;
37   sm->n_rows = num_rows;
38   sm->n_cols = num_cols;
39   if (num_elem == 0) {
40     sm->row_pos = NULL;
41     sm->col_pos = NULL;
42     sm->value = NULL;
43     return 0;
44   }
45   sm->row_pos = aom_calloc(num_elem, sizeof(*sm->row_pos));
46   sm->col_pos = aom_calloc(num_elem, sizeof(*sm->col_pos));
47   sm->value = aom_calloc(num_elem, sizeof(*sm->value));
48 
49   if (!sm->row_pos || !sm->col_pos || !sm->value) {
50     av1_free_sparse_mtx_elems(sm);
51     return -1;
52   }
53 
54   memcpy(sm->row_pos, rows, num_elem * sizeof(*sm->row_pos));
55   memcpy(sm->col_pos, cols, num_elem * sizeof(*sm->col_pos));
56   memcpy(sm->value, values, num_elem * sizeof(*sm->value));
57 
58   return 0;
59 }
60 
61 /*
62  * Combines two sparse matrices (allocating new space).
63  *
64  * Input:
65  * sm1, sm2: matrices to be combined
66  * row_offset1, row_offset2: row offset of each matrix in the new matrix
67  * col_offset1, col_offset2: column offset of each matrix in the new matrix
68  * new_n_rows, new_n_cols: number of rows and columns in the new matrix
69  *
70  * Output:
71  * sm: the combined matrix
72  *
73  * Return: 0  - success
74  *         -1 - failed
75  */
av1_init_combine_sparse_mtx(const SPARSE_MTX * sm1,const SPARSE_MTX * sm2,SPARSE_MTX * sm,int row_offset1,int col_offset1,int row_offset2,int col_offset2,int new_n_rows,int new_n_cols)76 int av1_init_combine_sparse_mtx(const SPARSE_MTX *sm1, const SPARSE_MTX *sm2,
77                                 SPARSE_MTX *sm, int row_offset1,
78                                 int col_offset1, int row_offset2,
79                                 int col_offset2, int new_n_rows,
80                                 int new_n_cols) {
81   sm->n_elem = sm1->n_elem + sm2->n_elem;
82   sm->n_cols = new_n_cols;
83   sm->n_rows = new_n_rows;
84 
85   if (sm->n_elem == 0) {
86     sm->row_pos = NULL;
87     sm->col_pos = NULL;
88     sm->value = NULL;
89     return 0;
90   }
91 
92   sm->row_pos = aom_calloc(sm->n_elem, sizeof(*sm->row_pos));
93   sm->col_pos = aom_calloc(sm->n_elem, sizeof(*sm->col_pos));
94   sm->value = aom_calloc(sm->n_elem, sizeof(*sm->value));
95 
96   if (!sm->row_pos || !sm->col_pos || !sm->value) {
97     av1_free_sparse_mtx_elems(sm);
98     return -1;
99   }
100 
101   for (int i = 0; i < sm1->n_elem; i++) {
102     sm->row_pos[i] = sm1->row_pos[i] + row_offset1;
103     sm->col_pos[i] = sm1->col_pos[i] + col_offset1;
104   }
105   memcpy(sm->value, sm1->value, sm1->n_elem * sizeof(*sm1->value));
106   int n_elem1 = sm1->n_elem;
107   for (int i = 0; i < sm2->n_elem; i++) {
108     sm->row_pos[n_elem1 + i] = sm2->row_pos[i] + row_offset2;
109     sm->col_pos[n_elem1 + i] = sm2->col_pos[i] + col_offset2;
110   }
111   memcpy(sm->value + n_elem1, sm2->value, sm2->n_elem * sizeof(*sm2->value));
112   return 0;
113 }
114 
av1_free_sparse_mtx_elems(SPARSE_MTX * sm)115 void av1_free_sparse_mtx_elems(SPARSE_MTX *sm) {
116   sm->n_cols = 0;
117   sm->n_rows = 0;
118   if (sm->n_elem != 0) {
119     aom_free(sm->row_pos);
120     aom_free(sm->col_pos);
121     aom_free(sm->value);
122   }
123   sm->n_elem = 0;
124 }
125 
126 /*
127  * Calculate matrix and vector multiplication: A*b
128  *
129  * Input:
130  * sm: matrix A
131  * srcv: the vector b to be multiplied to
132  * dstl: the length of vectors
133  *
134  * Output:
135  * dstv: pointer to the resulting vector
136  */
av1_mtx_vect_multi_right(const SPARSE_MTX * sm,const double * srcv,double * dstv,int dstl)137 void av1_mtx_vect_multi_right(const SPARSE_MTX *sm, const double *srcv,
138                               double *dstv, int dstl) {
139   memset(dstv, 0, sizeof(*dstv) * dstl);
140   for (int i = 0; i < sm->n_elem; i++) {
141     dstv[sm->row_pos[i]] += srcv[sm->col_pos[i]] * sm->value[i];
142   }
143 }
144 /*
145  * Calculate matrix and vector multiplication: b*A
146  *
147  * Input:
148  * sm: matrix A
149  * srcv: the vector b to be multiplied to
150  * dstl: the length of vectors
151  *
152  * Output:
153  * dstv: pointer to the resulting vector
154  */
av1_mtx_vect_multi_left(const SPARSE_MTX * sm,const double * srcv,double * dstv,int dstl)155 void av1_mtx_vect_multi_left(const SPARSE_MTX *sm, const double *srcv,
156                              double *dstv, int dstl) {
157   memset(dstv, 0, sizeof(*dstv) * dstl);
158   for (int i = 0; i < sm->n_elem; i++) {
159     dstv[sm->col_pos[i]] += srcv[sm->row_pos[i]] * sm->value[i];
160   }
161 }
162 
163 /*
164  * Calculate inner product of two vectors
165  *
166  * Input:
167  * src1, scr2: the vectors to be multiplied
168  * src1l: length of the vectors
169  *
170  * Output:
171  * the inner product
172  */
av1_vect_vect_multi(const double * src1,int src1l,const double * src2)173 double av1_vect_vect_multi(const double *src1, int src1l, const double *src2) {
174   double result = 0;
175   for (int i = 0; i < src1l; i++) {
176     result += src1[i] * src2[i];
177   }
178   return result;
179 }
180 
181 /*
182  * Multiply each element in the matrix sm with a constant c
183  */
av1_constant_multiply_sparse_matrix(SPARSE_MTX * sm,double c)184 void av1_constant_multiply_sparse_matrix(SPARSE_MTX *sm, double c) {
185   for (int i = 0; i < sm->n_elem; i++) {
186     sm->value[i] *= c;
187   }
188 }
189 
free_solver_local_buf(double * buf1,double * buf2,double * buf3,double * buf4,double * buf5,double * buf6,double * buf7)190 static INLINE void free_solver_local_buf(double *buf1, double *buf2,
191                                          double *buf3, double *buf4,
192                                          double *buf5, double *buf6,
193                                          double *buf7) {
194   aom_free(buf1);
195   aom_free(buf2);
196   aom_free(buf3);
197   aom_free(buf4);
198   aom_free(buf5);
199   aom_free(buf6);
200   aom_free(buf7);
201 }
202 
203 /*
204  * Solve for Ax = b
205  * no requirement on A
206  *
207  * Input:
208  * A: the sparse matrix
209  * b: the vector b
210  * bl: length of b
211  * x: the vector x
212  *
213  * Output:
214  * x: pointer to the solution vector
215  *
216  * Return: 0  - success
217  *         -1 - failed
218  */
av1_bi_conjugate_gradient_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)219 int av1_bi_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b,
220                                      int bl, double *x) {
221   double *r = NULL, *r_hat = NULL, *p = NULL, *p_hat = NULL, *Ap = NULL,
222          *p_hatA = NULL, *x_hat = NULL;
223   double alpha, beta, rtr, r_norm_2;
224   double denormtemp;
225 
226   // initialize
227   r = aom_calloc(bl, sizeof(*r));
228   r_hat = aom_calloc(bl, sizeof(*r_hat));
229   p = aom_calloc(bl, sizeof(*p));
230   p_hat = aom_calloc(bl, sizeof(*p_hat));
231   Ap = aom_calloc(bl, sizeof(*Ap));
232   p_hatA = aom_calloc(bl, sizeof(*p_hatA));
233   x_hat = aom_calloc(bl, sizeof(*x_hat));
234   if (!r || !r_hat || !p || !p_hat || !Ap || !p_hatA || !x_hat) {
235     free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
236     return -1;
237   }
238 
239   int i;
240   for (i = 0; i < bl; i++) {
241     r[i] = b[i];
242     r_hat[i] = b[i];
243     p[i] = r[i];
244     p_hat[i] = r_hat[i];
245     x[i] = 0;
246     x_hat[i] = 0;
247   }
248   r_norm_2 = av1_vect_vect_multi(r_hat, bl, r);
249   for (int k = 0; k < MAX_CG_SP_ITER; k++) {
250     rtr = r_norm_2;
251     av1_mtx_vect_multi_right(A, p, Ap, bl);
252     av1_mtx_vect_multi_left(A, p_hat, p_hatA, bl);
253 
254     denormtemp = av1_vect_vect_multi(p_hat, bl, Ap);
255     if (denormtemp < 1e-10) break;
256     alpha = rtr / denormtemp;
257     r_norm_2 = 0;
258     for (i = 0; i < bl; i++) {
259       x[i] += alpha * p[i];
260       x_hat[i] += alpha * p_hat[i];
261       r[i] -= alpha * Ap[i];
262       r_hat[i] -= alpha * p_hatA[i];
263       r_norm_2 += r_hat[i] * r[i];
264     }
265     if (sqrt(r_norm_2) < 1e-2) {
266       break;
267     }
268     if (rtr < 1e-10) break;
269     beta = r_norm_2 / rtr;
270     for (i = 0; i < bl; i++) {
271       p[i] = r[i] + beta * p[i];
272       p_hat[i] = r_hat[i] + beta * p_hat[i];
273     }
274   }
275   // free
276   free_solver_local_buf(r, r_hat, p, p_hat, Ap, p_hatA, x_hat);
277   return 0;
278 }
279 
280 /*
281  * Solve for Ax = b when A is symmetric and positive definite
282  *
283  * Input:
284  * A: the sparse matrix
285  * b: the vector b
286  * bl: length of b
287  * x: the vector x
288  *
289  * Output:
290  * x: pointer to the solution vector
291  *
292  * Return: 0  - success
293  *         -1 - failed
294  */
av1_conjugate_gradient_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)295 int av1_conjugate_gradient_sparse(const SPARSE_MTX *A, const double *b, int bl,
296                                   double *x) {
297   double *r = NULL, *p = NULL, *Ap = NULL;
298   double alpha, beta, rtr, r_norm_2;
299   double denormtemp;
300 
301   // initialize
302   r = aom_calloc(bl, sizeof(*r));
303   p = aom_calloc(bl, sizeof(*p));
304   Ap = aom_calloc(bl, sizeof(*Ap));
305   if (!r || !p || !Ap) {
306     free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);
307     return -1;
308   }
309 
310   int i;
311   for (i = 0; i < bl; i++) {
312     r[i] = b[i];
313     p[i] = r[i];
314     x[i] = 0;
315   }
316   r_norm_2 = av1_vect_vect_multi(r, bl, r);
317   int k;
318   for (k = 0; k < MAX_CG_SP_ITER; k++) {
319     rtr = r_norm_2;
320     av1_mtx_vect_multi_right(A, p, Ap, bl);
321     denormtemp = av1_vect_vect_multi(p, bl, Ap);
322     if (denormtemp < 1e-10) break;
323     alpha = rtr / denormtemp;
324     r_norm_2 = 0;
325     for (i = 0; i < bl; i++) {
326       x[i] += alpha * p[i];
327       r[i] -= alpha * Ap[i];
328       r_norm_2 += r[i] * r[i];
329     }
330     if (r_norm_2 < 1e-8 * bl) break;
331     if (rtr < 1e-10) break;
332     beta = r_norm_2 / rtr;
333     for (i = 0; i < bl; i++) {
334       p[i] = r[i] + beta * p[i];
335     }
336   }
337   // free
338   free_solver_local_buf(r, p, Ap, NULL, NULL, NULL, NULL);
339 
340   return 0;
341 }
342 
343 /*
344  * Solve for Ax = b using Jacobi method
345  *
346  * Input:
347  * A: the sparse matrix
348  * b: the vector b
349  * bl: length of b
350  * x: the vector x
351  *
352  * Output:
353  * x: pointer to the solution vector
354  *
355  * Return: 0  - success
356  *         -1 - failed
357  */
av1_jacobi_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)358 int av1_jacobi_sparse(const SPARSE_MTX *A, const double *b, int bl, double *x) {
359   double *diags = NULL, *Rx = NULL, *x_last = NULL, *x_cur = NULL,
360          *tempx = NULL;
361   double resi2;
362 
363   diags = aom_calloc(bl, sizeof(*diags));
364   Rx = aom_calloc(bl, sizeof(*Rx));
365   x_last = aom_calloc(bl, sizeof(*x_last));
366   x_cur = aom_calloc(bl, sizeof(*x_cur));
367 
368   if (!diags || !Rx || !x_last || !x_cur) {
369     free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
370     return -1;
371   }
372 
373   int i;
374   memset(x_last, 0, sizeof(*x_last) * bl);
375   // get the diagonals of A
376   memset(diags, 0, sizeof(*diags) * bl);
377   for (int c = 0; c < A->n_elem; c++) {
378     if (A->row_pos[c] != A->col_pos[c]) continue;
379     diags[A->row_pos[c]] = A->value[c];
380   }
381   int k;
382   for (k = 0; k < MAX_CG_SP_ITER; k++) {
383     // R = A - diag(diags)
384     // get R*x_last
385     memset(Rx, 0, sizeof(*Rx) * bl);
386     for (int c = 0; c < A->n_elem; c++) {
387       if (A->row_pos[c] == A->col_pos[c]) continue;
388       Rx[A->row_pos[c]] += x_last[A->col_pos[c]] * A->value[c];
389     }
390     resi2 = 0;
391     for (i = 0; i < bl; i++) {
392       x_cur[i] = (b[i] - Rx[i]) / diags[i];
393       resi2 += (x_last[i] - x_cur[i]) * (x_last[i] - x_cur[i]);
394     }
395     if (resi2 <= 1e-10 * bl) break;
396     // swap last & cur buffer ptrs
397     tempx = x_last;
398     x_last = x_cur;
399     x_cur = tempx;
400   }
401   printf("\n numiter: %d\n", k);
402   for (i = 0; i < bl; i++) {
403     x[i] = x_cur[i];
404   }
405   free_solver_local_buf(diags, Rx, x_last, x_cur, NULL, NULL, NULL);
406   return 0;
407 }
408 
409 /*
410  * Solve for Ax = b using Steepest descent method
411  *
412  * Input:
413  * A: the sparse matrix
414  * b: the vector b
415  * bl: length of b
416  * x: the vector x
417  *
418  * Output:
419  * x: pointer to the solution vector
420  *
421  * Return: 0  - success
422  *         -1 - failed
423  */
av1_steepest_descent_sparse(const SPARSE_MTX * A,const double * b,int bl,double * x)424 int av1_steepest_descent_sparse(const SPARSE_MTX *A, const double *b, int bl,
425                                 double *x) {
426   double *d = NULL, *Ad = NULL, *Ax = NULL;
427   double resi2, resi2_last, dAd, temp;
428 
429   d = aom_calloc(bl, sizeof(*d));
430   Ax = aom_calloc(bl, sizeof(*Ax));
431   Ad = aom_calloc(bl, sizeof(*Ad));
432 
433   if (!d || !Ax || !Ad) {
434     free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);
435     return -1;
436   }
437 
438   int i;
439   // initialize with 0s
440   resi2 = 0;
441   for (i = 0; i < bl; i++) {
442     x[i] = 0;
443     d[i] = b[i];
444     resi2 += d[i] * d[i] / bl;
445   }
446   int k;
447   for (k = 0; k < MAX_CG_SP_ITER; k++) {
448     // get A*x_last
449     av1_mtx_vect_multi_right(A, d, Ad, bl);
450     dAd = resi2 * bl / av1_vect_vect_multi(d, bl, Ad);
451     for (i = 0; i < bl; i++) {
452       temp = dAd * d[i];
453       x[i] = x[i] + temp;
454     }
455     av1_mtx_vect_multi_right(A, x, Ax, bl);
456     resi2_last = resi2;
457     resi2 = 0;
458     for (i = 0; i < bl; i++) {
459       d[i] = b[i] - Ax[i];
460       resi2 += d[i] * d[i] / bl;
461     }
462     if (resi2 <= 1e-8) break;
463     if (resi2_last - resi2 < 1e-8) {
464       break;
465     }
466   }
467   free_solver_local_buf(d, Ax, Ad, NULL, NULL, NULL, NULL);
468 
469   return 0;
470 }
471 
472 #endif  // CONFIG_OPTICAL_FLOW_API
473