• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_dsp/mathutils.h"
19 #include "aom_dsp/noise_model.h"
20 #include "aom_dsp/noise_util.h"
21 #include "aom_mem/aom_mem.h"
22 
23 #define kLowPolyNumParams 3
24 
25 static const int kMaxLag = 4;
26 
27 // Defines a function that can be used to obtain the mean of a block for the
28 // provided data type (uint8_t, or uint16_t)
29 #define GET_BLOCK_MEAN(INT_TYPE, suffix)                                    \
30   static double get_block_mean_##suffix(const INT_TYPE *data, int w, int h, \
31                                         int stride, int x_o, int y_o,       \
32                                         int block_size) {                   \
33     const int max_h = AOMMIN(h - y_o, block_size);                          \
34     const int max_w = AOMMIN(w - x_o, block_size);                          \
35     double block_mean = 0;                                                  \
36     for (int y = 0; y < max_h; ++y) {                                       \
37       for (int x = 0; x < max_w; ++x) {                                     \
38         block_mean += data[(y_o + y) * stride + x_o + x];                   \
39       }                                                                     \
40     }                                                                       \
41     return block_mean / (max_w * max_h);                                    \
42   }
43 
GET_BLOCK_MEAN(uint8_t,lowbd)44 GET_BLOCK_MEAN(uint8_t, lowbd)
45 GET_BLOCK_MEAN(uint16_t, highbd)
46 
47 static INLINE double get_block_mean(const uint8_t *data, int w, int h,
48                                     int stride, int x_o, int y_o,
49                                     int block_size, int use_highbd) {
50   if (use_highbd)
51     return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
52                                  block_size);
53   return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
54 }
55 
56 // Defines a function that can be used to obtain the variance of a block
57 // for the provided data type (uint8_t, or uint16_t)
58 #define GET_NOISE_VAR(INT_TYPE, suffix)                                  \
59   static double get_noise_var_##suffix(                                  \
60       const INT_TYPE *data, const INT_TYPE *denoised, int stride, int w, \
61       int h, int x_o, int y_o, int block_size_x, int block_size_y) {     \
62     const int max_h = AOMMIN(h - y_o, block_size_y);                     \
63     const int max_w = AOMMIN(w - x_o, block_size_x);                     \
64     double noise_var = 0;                                                \
65     double noise_mean = 0;                                               \
66     for (int y = 0; y < max_h; ++y) {                                    \
67       for (int x = 0; x < max_w; ++x) {                                  \
68         double noise = (double)data[(y_o + y) * stride + x_o + x] -      \
69                        denoised[(y_o + y) * stride + x_o + x];           \
70         noise_mean += noise;                                             \
71         noise_var += noise * noise;                                      \
72       }                                                                  \
73     }                                                                    \
74     noise_mean /= (max_w * max_h);                                       \
75     return noise_var / (max_w * max_h) - noise_mean * noise_mean;        \
76   }
77 
GET_NOISE_VAR(uint8_t,lowbd)78 GET_NOISE_VAR(uint8_t, lowbd)
79 GET_NOISE_VAR(uint16_t, highbd)
80 
81 static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
82                                    int w, int h, int stride, int x_o, int y_o,
83                                    int block_size_x, int block_size_y,
84                                    int use_highbd) {
85   if (use_highbd)
86     return get_noise_var_highbd((const uint16_t *)data,
87                                 (const uint16_t *)denoised, w, h, stride, x_o,
88                                 y_o, block_size_x, block_size_y);
89   return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
90                              block_size_x, block_size_y);
91 }
92 
equation_system_clear(aom_equation_system_t * eqns)93 static void equation_system_clear(aom_equation_system_t *eqns) {
94   const int n = eqns->n;
95   memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
96   memset(eqns->x, 0, sizeof(*eqns->x) * n);
97   memset(eqns->b, 0, sizeof(*eqns->b) * n);
98 }
99 
equation_system_copy(aom_equation_system_t * dst,const aom_equation_system_t * src)100 static void equation_system_copy(aom_equation_system_t *dst,
101                                  const aom_equation_system_t *src) {
102   const int n = dst->n;
103   memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
104   memcpy(dst->x, src->x, sizeof(*dst->x) * n);
105   memcpy(dst->b, src->b, sizeof(*dst->b) * n);
106 }
107 
equation_system_init(aom_equation_system_t * eqns,int n)108 static int equation_system_init(aom_equation_system_t *eqns, int n) {
109   eqns->A = (double *)aom_malloc(sizeof(*eqns->A) * n * n);
110   eqns->b = (double *)aom_malloc(sizeof(*eqns->b) * n);
111   eqns->x = (double *)aom_malloc(sizeof(*eqns->x) * n);
112   eqns->n = n;
113   if (!eqns->A || !eqns->b || !eqns->x) {
114     fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
115     aom_free(eqns->A);
116     aom_free(eqns->b);
117     aom_free(eqns->x);
118     memset(eqns, 0, sizeof(*eqns));
119     return 0;
120   }
121   equation_system_clear(eqns);
122   return 1;
123 }
124 
equation_system_solve(aom_equation_system_t * eqns)125 static int equation_system_solve(aom_equation_system_t *eqns) {
126   const int n = eqns->n;
127   double *b = (double *)aom_malloc(sizeof(*b) * n);
128   double *A = (double *)aom_malloc(sizeof(*A) * n * n);
129   int ret = 0;
130   if (A == NULL || b == NULL) {
131     fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
132     aom_free(b);
133     aom_free(A);
134     return 0;
135   }
136   memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
137   memcpy(b, eqns->b, sizeof(*eqns->b) * n);
138   ret = linsolve(n, A, eqns->n, b, eqns->x);
139   aom_free(b);
140   aom_free(A);
141 
142   if (ret == 0) {
143     return 0;
144   }
145   return 1;
146 }
147 
equation_system_add(aom_equation_system_t * dest,aom_equation_system_t * src)148 static void equation_system_add(aom_equation_system_t *dest,
149                                 aom_equation_system_t *src) {
150   const int n = dest->n;
151   int i, j;
152   for (i = 0; i < n; ++i) {
153     for (j = 0; j < n; ++j) {
154       dest->A[i * n + j] += src->A[i * n + j];
155     }
156     dest->b[i] += src->b[i];
157   }
158 }
159 
equation_system_free(aom_equation_system_t * eqns)160 static void equation_system_free(aom_equation_system_t *eqns) {
161   if (!eqns) return;
162   aom_free(eqns->A);
163   aom_free(eqns->b);
164   aom_free(eqns->x);
165   memset(eqns, 0, sizeof(*eqns));
166 }
167 
noise_strength_solver_clear(aom_noise_strength_solver_t * solver)168 static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
169   equation_system_clear(&solver->eqns);
170   solver->num_equations = 0;
171   solver->total = 0;
172 }
173 
noise_strength_solver_add(aom_noise_strength_solver_t * dest,aom_noise_strength_solver_t * src)174 static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
175                                       aom_noise_strength_solver_t *src) {
176   equation_system_add(&dest->eqns, &src->eqns);
177   dest->num_equations += src->num_equations;
178   dest->total += src->total;
179 }
180 
181 // Return the number of coefficients required for the given parameters
num_coeffs(const aom_noise_model_params_t params)182 static int num_coeffs(const aom_noise_model_params_t params) {
183   const int n = 2 * params.lag + 1;
184   switch (params.shape) {
185     case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
186     case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
187   }
188   return 0;
189 }
190 
noise_state_init(aom_noise_state_t * state,int n,int bit_depth)191 static int noise_state_init(aom_noise_state_t *state, int n, int bit_depth) {
192   const int kNumBins = 20;
193   if (!equation_system_init(&state->eqns, n)) {
194     fprintf(stderr, "Failed initialization noise state with size %d\n", n);
195     return 0;
196   }
197   state->ar_gain = 1.0;
198   state->num_observations = 0;
199   return aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
200                                         bit_depth);
201 }
202 
set_chroma_coefficient_fallback_soln(aom_equation_system_t * eqns)203 static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
204   const double kTolerance = 1e-6;
205   const int last = eqns->n - 1;
206   // Set all of the AR coefficients to zero, but try to solve for correlation
207   // with the luma channel
208   memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
209   if (fabs(eqns->A[last * eqns->n + last]) > kTolerance) {
210     eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
211   }
212 }
213 
aom_noise_strength_lut_init(aom_noise_strength_lut_t * lut,int num_points)214 int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
215   if (!lut) return 0;
216   if (num_points <= 0) return 0;
217   lut->num_points = 0;
218   lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
219   if (!lut->points) return 0;
220   lut->num_points = num_points;
221   memset(lut->points, 0, sizeof(*lut->points) * num_points);
222   return 1;
223 }
224 
aom_noise_strength_lut_free(aom_noise_strength_lut_t * lut)225 void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
226   if (!lut) return;
227   aom_free(lut->points);
228   memset(lut, 0, sizeof(*lut));
229 }
230 
aom_noise_strength_lut_eval(const aom_noise_strength_lut_t * lut,double x)231 double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
232                                    double x) {
233   int i = 0;
234   // Constant extrapolation for x <  x_0.
235   if (x < lut->points[0][0]) return lut->points[0][1];
236   for (i = 0; i < lut->num_points - 1; ++i) {
237     if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
238       const double a =
239           (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
240       return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
241     }
242   }
243   // Constant extrapolation for x > x_{n-1}
244   return lut->points[lut->num_points - 1][1];
245 }
246 
noise_strength_solver_get_bin_index(const aom_noise_strength_solver_t * solver,double value)247 static double noise_strength_solver_get_bin_index(
248     const aom_noise_strength_solver_t *solver, double value) {
249   const double val =
250       fclamp(value, solver->min_intensity, solver->max_intensity);
251   const double range = solver->max_intensity - solver->min_intensity;
252   return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
253 }
254 
noise_strength_solver_get_value(const aom_noise_strength_solver_t * solver,double x)255 static double noise_strength_solver_get_value(
256     const aom_noise_strength_solver_t *solver, double x) {
257   const double bin = noise_strength_solver_get_bin_index(solver, x);
258   const int bin_i0 = (int)floor(bin);
259   const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
260   const double a = bin - bin_i0;
261   return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
262 }
263 
aom_noise_strength_solver_add_measurement(aom_noise_strength_solver_t * solver,double block_mean,double noise_std)264 void aom_noise_strength_solver_add_measurement(
265     aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
266   const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
267   const int bin_i0 = (int)floor(bin);
268   const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
269   const double a = bin - bin_i0;
270   const int n = solver->num_bins;
271   solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
272   solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
273   solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
274   solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
275   solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
276   solver->eqns.b[bin_i1] += a * noise_std;
277   solver->total += noise_std;
278   solver->num_equations++;
279 }
280 
aom_noise_strength_solver_solve(aom_noise_strength_solver_t * solver)281 int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
282   // Add regularization proportional to the number of constraints
283   const int n = solver->num_bins;
284   const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
285   int result = 0;
286   double mean = 0;
287 
288   // Do this in a non-destructive manner so it is not confusing to the caller
289   double *old_A = solver->eqns.A;
290   double *A = (double *)aom_malloc(sizeof(*A) * n * n);
291   if (!A) {
292     fprintf(stderr, "Unable to allocate copy of A\n");
293     return 0;
294   }
295   memcpy(A, old_A, sizeof(*A) * n * n);
296 
297   for (int i = 0; i < n; ++i) {
298     const int i_lo = AOMMAX(0, i - 1);
299     const int i_hi = AOMMIN(n - 1, i + 1);
300     A[i * n + i_lo] -= kAlpha;
301     A[i * n + i] += 2 * kAlpha;
302     A[i * n + i_hi] -= kAlpha;
303   }
304 
305   // Small regularization to give average noise strength
306   mean = solver->total / solver->num_equations;
307   for (int i = 0; i < n; ++i) {
308     A[i * n + i] += 1.0 / 8192.;
309     solver->eqns.b[i] += mean / 8192.;
310   }
311   solver->eqns.A = A;
312   result = equation_system_solve(&solver->eqns);
313   solver->eqns.A = old_A;
314 
315   aom_free(A);
316   return result;
317 }
318 
aom_noise_strength_solver_init(aom_noise_strength_solver_t * solver,int num_bins,int bit_depth)319 int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
320                                    int num_bins, int bit_depth) {
321   if (!solver) return 0;
322   memset(solver, 0, sizeof(*solver));
323   solver->num_bins = num_bins;
324   solver->min_intensity = 0;
325   solver->max_intensity = (1 << bit_depth) - 1;
326   solver->total = 0;
327   solver->num_equations = 0;
328   return equation_system_init(&solver->eqns, num_bins);
329 }
330 
aom_noise_strength_solver_free(aom_noise_strength_solver_t * solver)331 void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver) {
332   if (!solver) return;
333   equation_system_free(&solver->eqns);
334 }
335 
aom_noise_strength_solver_get_center(const aom_noise_strength_solver_t * solver,int i)336 double aom_noise_strength_solver_get_center(
337     const aom_noise_strength_solver_t *solver, int i) {
338   const double range = solver->max_intensity - solver->min_intensity;
339   const int n = solver->num_bins;
340   return ((double)i) / (n - 1) * range + solver->min_intensity;
341 }
342 
343 // Computes the residual if a point were to be removed from the lut. This is
344 // calculated as the area between the output of the solver and the line segment
345 // that would be formed between [x_{i - 1}, x_{i + 1}).
update_piecewise_linear_residual(const aom_noise_strength_solver_t * solver,const aom_noise_strength_lut_t * lut,double * residual,int start,int end)346 static void update_piecewise_linear_residual(
347     const aom_noise_strength_solver_t *solver,
348     const aom_noise_strength_lut_t *lut, double *residual, int start, int end) {
349   const double dx = 255. / solver->num_bins;
350   for (int i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
351     const int lower = AOMMAX(0, (int)floor(noise_strength_solver_get_bin_index(
352                                     solver, lut->points[i - 1][0])));
353     const int upper = AOMMIN(solver->num_bins - 1,
354                              (int)ceil(noise_strength_solver_get_bin_index(
355                                  solver, lut->points[i + 1][0])));
356     double r = 0;
357     for (int j = lower; j <= upper; ++j) {
358       const double x = aom_noise_strength_solver_get_center(solver, j);
359       if (x < lut->points[i - 1][0]) continue;
360       if (x >= lut->points[i + 1][0]) continue;
361       const double y = solver->eqns.x[j];
362       const double a = (x - lut->points[i - 1][0]) /
363                        (lut->points[i + 1][0] - lut->points[i - 1][0]);
364       const double estimate_y =
365           lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
366       r += fabs(y - estimate_y);
367     }
368     residual[i] = r * dx;
369   }
370 }
371 
aom_noise_strength_solver_fit_piecewise(const aom_noise_strength_solver_t * solver,int max_output_points,aom_noise_strength_lut_t * lut)372 int aom_noise_strength_solver_fit_piecewise(
373     const aom_noise_strength_solver_t *solver, int max_output_points,
374     aom_noise_strength_lut_t *lut) {
375   // The tolerance is normalized to be give consistent results between
376   // different bit-depths.
377   const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
378   if (!aom_noise_strength_lut_init(lut, solver->num_bins)) {
379     fprintf(stderr, "Failed to init lut\n");
380     return 0;
381   }
382   for (int i = 0; i < solver->num_bins; ++i) {
383     lut->points[i][0] = aom_noise_strength_solver_get_center(solver, i);
384     lut->points[i][1] = solver->eqns.x[i];
385   }
386   if (max_output_points < 0) {
387     max_output_points = solver->num_bins;
388   }
389 
390   double *residual = aom_malloc(solver->num_bins * sizeof(*residual));
391   if (!residual) {
392     aom_noise_strength_lut_free(lut);
393     return 0;
394   }
395   memset(residual, 0, sizeof(*residual) * solver->num_bins);
396 
397   update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
398 
399   // Greedily remove points if there are too many or if it doesn't hurt local
400   // approximation (never remove the end points)
401   while (lut->num_points > 2) {
402     int min_index = 1;
403     for (int j = 1; j < lut->num_points - 1; ++j) {
404       if (residual[j] < residual[min_index]) {
405         min_index = j;
406       }
407     }
408     const double dx =
409         lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
410     const double avg_residual = residual[min_index] / dx;
411     if (lut->num_points <= max_output_points && avg_residual > kTolerance) {
412       break;
413     }
414 
415     const int num_remaining = lut->num_points - min_index - 1;
416     memmove(lut->points + min_index, lut->points + min_index + 1,
417             sizeof(lut->points[0]) * num_remaining);
418     lut->num_points--;
419 
420     update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
421                                      min_index + 1);
422   }
423   aom_free(residual);
424   return 1;
425 }
426 
aom_flat_block_finder_init(aom_flat_block_finder_t * block_finder,int block_size,int bit_depth,int use_highbd)427 int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
428                                int block_size, int bit_depth, int use_highbd) {
429   const int n = block_size * block_size;
430   aom_equation_system_t eqns;
431   double *AtA_inv = 0;
432   double *A = 0;
433   int x = 0, y = 0, i = 0, j = 0;
434   block_finder->A = NULL;
435   block_finder->AtA_inv = NULL;
436 
437   if (!equation_system_init(&eqns, kLowPolyNumParams)) {
438     fprintf(stderr, "Failed to init equation system for block_size=%d\n",
439             block_size);
440     return 0;
441   }
442 
443   AtA_inv = (double *)aom_malloc(kLowPolyNumParams * kLowPolyNumParams *
444                                  sizeof(*AtA_inv));
445   A = (double *)aom_malloc(kLowPolyNumParams * n * sizeof(*A));
446   if (AtA_inv == NULL || A == NULL) {
447     fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
448             block_size);
449     aom_free(AtA_inv);
450     aom_free(A);
451     equation_system_free(&eqns);
452     return 0;
453   }
454 
455   block_finder->A = A;
456   block_finder->AtA_inv = AtA_inv;
457   block_finder->block_size = block_size;
458   block_finder->normalization = (1 << bit_depth) - 1;
459   block_finder->use_highbd = use_highbd;
460 
461   for (y = 0; y < block_size; ++y) {
462     const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
463     for (x = 0; x < block_size; ++x) {
464       const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
465       const double coords[3] = { yd, xd, 1 };
466       const int row = y * block_size + x;
467       A[kLowPolyNumParams * row + 0] = yd;
468       A[kLowPolyNumParams * row + 1] = xd;
469       A[kLowPolyNumParams * row + 2] = 1;
470 
471       for (i = 0; i < kLowPolyNumParams; ++i) {
472         for (j = 0; j < kLowPolyNumParams; ++j) {
473           eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
474         }
475       }
476     }
477   }
478 
479   // Lazy inverse using existing equation solver.
480   for (i = 0; i < kLowPolyNumParams; ++i) {
481     memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
482     eqns.b[i] = 1;
483     equation_system_solve(&eqns);
484 
485     for (j = 0; j < kLowPolyNumParams; ++j) {
486       AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
487     }
488   }
489   equation_system_free(&eqns);
490   return 1;
491 }
492 
aom_flat_block_finder_free(aom_flat_block_finder_t * block_finder)493 void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
494   if (!block_finder) return;
495   aom_free(block_finder->A);
496   aom_free(block_finder->AtA_inv);
497   memset(block_finder, 0, sizeof(*block_finder));
498 }
499 
aom_flat_block_finder_extract_block(const aom_flat_block_finder_t * block_finder,const uint8_t * const data,int w,int h,int stride,int offsx,int offsy,double * plane,double * block)500 void aom_flat_block_finder_extract_block(
501     const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
502     int w, int h, int stride, int offsx, int offsy, double *plane,
503     double *block) {
504   const int block_size = block_finder->block_size;
505   const int n = block_size * block_size;
506   const double *A = block_finder->A;
507   const double *AtA_inv = block_finder->AtA_inv;
508   double plane_coords[kLowPolyNumParams];
509   double AtA_inv_b[kLowPolyNumParams];
510   int xi, yi, i;
511 
512   if (block_finder->use_highbd) {
513     const uint16_t *const data16 = (const uint16_t *const)data;
514     for (yi = 0; yi < block_size; ++yi) {
515       const int y = clamp(offsy + yi, 0, h - 1);
516       for (xi = 0; xi < block_size; ++xi) {
517         const int x = clamp(offsx + xi, 0, w - 1);
518         block[yi * block_size + xi] =
519             ((double)data16[y * stride + x]) / block_finder->normalization;
520       }
521     }
522   } else {
523     for (yi = 0; yi < block_size; ++yi) {
524       const int y = clamp(offsy + yi, 0, h - 1);
525       for (xi = 0; xi < block_size; ++xi) {
526         const int x = clamp(offsx + xi, 0, w - 1);
527         block[yi * block_size + xi] =
528             ((double)data[y * stride + x]) / block_finder->normalization;
529       }
530     }
531   }
532   multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
533   multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
534                kLowPolyNumParams, 1);
535   multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
536 
537   for (i = 0; i < n; ++i) {
538     block[i] -= plane[i];
539   }
540 }
541 
542 typedef struct {
543   int index;
544   float score;
545 } index_and_score_t;
546 
compare_scores(const void * a,const void * b)547 static int compare_scores(const void *a, const void *b) {
548   const float diff =
549       ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
550   if (diff < 0)
551     return -1;
552   else if (diff > 0)
553     return 1;
554   return 0;
555 }
556 
aom_flat_block_finder_run(const aom_flat_block_finder_t * block_finder,const uint8_t * const data,int w,int h,int stride,uint8_t * flat_blocks)557 int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
558                               const uint8_t *const data, int w, int h,
559                               int stride, uint8_t *flat_blocks) {
560   // The gradient-based features used in this code are based on:
561   //  A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
562   //  correlation for improved video denoising," 2012 19th, ICIP.
563   // The thresholds are more lenient to allow for correct grain modeling
564   // if extreme cases.
565   const int block_size = block_finder->block_size;
566   const int n = block_size * block_size;
567   const double kTraceThreshold = 0.15 / (32 * 32);
568   const double kRatioThreshold = 1.25;
569   const double kNormThreshold = 0.08 / (32 * 32);
570   const double kVarThreshold = 0.005 / (double)n;
571   const int num_blocks_w = (w + block_size - 1) / block_size;
572   const int num_blocks_h = (h + block_size - 1) / block_size;
573   int num_flat = 0;
574   int bx = 0, by = 0;
575   double *plane = (double *)aom_malloc(n * sizeof(*plane));
576   double *block = (double *)aom_malloc(n * sizeof(*block));
577   index_and_score_t *scores = (index_and_score_t *)aom_malloc(
578       num_blocks_w * num_blocks_h * sizeof(*scores));
579   if (plane == NULL || block == NULL || scores == NULL) {
580     fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
581     aom_free(plane);
582     aom_free(block);
583     aom_free(scores);
584     return -1;
585   }
586 
587 #ifdef NOISE_MODEL_LOG_SCORE
588   fprintf(stderr, "score = [");
589 #endif
590   for (by = 0; by < num_blocks_h; ++by) {
591     for (bx = 0; bx < num_blocks_w; ++bx) {
592       // Compute gradient covariance matrix.
593       double Gxx = 0, Gxy = 0, Gyy = 0;
594       double var = 0;
595       double mean = 0;
596       int xi, yi;
597       aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
598                                           bx * block_size, by * block_size,
599                                           plane, block);
600 
601       for (yi = 1; yi < block_size - 1; ++yi) {
602         for (xi = 1; xi < block_size - 1; ++xi) {
603           const double gx = (block[yi * block_size + xi + 1] -
604                              block[yi * block_size + xi - 1]) /
605                             2;
606           const double gy = (block[yi * block_size + xi + block_size] -
607                              block[yi * block_size + xi - block_size]) /
608                             2;
609           Gxx += gx * gx;
610           Gxy += gx * gy;
611           Gyy += gy * gy;
612 
613           const double value = block[yi * block_size + xi];
614           mean += value;
615           var += value * value;
616         }
617       }
618       mean /= (block_size - 2) * (block_size - 2);
619 
620       // Normalize gradients by block_size.
621       Gxx /= ((block_size - 2) * (block_size - 2));
622       Gxy /= ((block_size - 2) * (block_size - 2));
623       Gyy /= ((block_size - 2) * (block_size - 2));
624       var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
625 
626       {
627         const double trace = Gxx + Gyy;
628         const double det = Gxx * Gyy - Gxy * Gxy;
629         const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
630         const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
631         const double norm = e1;  // Spectral norm
632         const double ratio = (e1 / AOMMAX(e2, 1e-6));
633         const int is_flat = (trace < kTraceThreshold) &&
634                             (ratio < kRatioThreshold) &&
635                             (norm < kNormThreshold) && (var > kVarThreshold);
636         // The following weights are used to combine the above features to give
637         // a sigmoid score for flatness. If the input was normalized to [0,100]
638         // the magnitude of these values would be close to 1 (e.g., weights
639         // corresponding to variance would be a factor of 10000x smaller).
640         // The weights are given in the following order:
641         //    [{var}, {ratio}, {trace}, {norm}, offset]
642         // with one of the most discriminative being simply the variance.
643         const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
644         double sum_weights = weights[0] * var + weights[1] * ratio +
645                              weights[2] * trace + weights[3] * norm +
646                              weights[4];
647         // clamp the value to [-25.0, 100.0] to prevent overflow
648         sum_weights = fclamp(sum_weights, -25.0, 100.0);
649         const float score = (float)(1.0 / (1 + exp(-sum_weights)));
650         flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
651         scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
652         scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
653 #ifdef NOISE_MODEL_LOG_SCORE
654         fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
655                 is_flat);
656 #endif
657         num_flat += is_flat;
658       }
659     }
660 #ifdef NOISE_MODEL_LOG_SCORE
661     fprintf(stderr, "\n");
662 #endif
663   }
664 #ifdef NOISE_MODEL_LOG_SCORE
665   fprintf(stderr, "];\n");
666 #endif
667   // Find the top-scored blocks (most likely to be flat) and set the flat blocks
668   // be the union of the thresholded results and the top 10th percentile of the
669   // scored results.
670   qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
671   const int top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
672   const float score_threshold = scores[top_nth_percentile].score;
673   for (int i = 0; i < num_blocks_w * num_blocks_h; ++i) {
674     if (scores[i].score >= score_threshold) {
675       num_flat += flat_blocks[scores[i].index] == 0;
676       flat_blocks[scores[i].index] |= 1;
677     }
678   }
679   aom_free(block);
680   aom_free(plane);
681   aom_free(scores);
682   return num_flat;
683 }
684 
aom_noise_model_init(aom_noise_model_t * model,const aom_noise_model_params_t params)685 int aom_noise_model_init(aom_noise_model_t *model,
686                          const aom_noise_model_params_t params) {
687   const int n = num_coeffs(params);
688   const int lag = params.lag;
689   const int bit_depth = params.bit_depth;
690   int x = 0, y = 0, i = 0, c = 0;
691 
692   memset(model, 0, sizeof(*model));
693   if (params.lag < 1) {
694     fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
695     return 0;
696   }
697   if (params.lag > kMaxLag) {
698     fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
699             kMaxLag);
700     return 0;
701   }
702   if (!(params.bit_depth == 8 || params.bit_depth == 10 ||
703         params.bit_depth == 12)) {
704     return 0;
705   }
706 
707   memcpy(&model->params, &params, sizeof(params));
708   for (c = 0; c < 3; ++c) {
709     if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
710       fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
711       aom_noise_model_free(model);
712       return 0;
713     }
714     if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
715       fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
716       aom_noise_model_free(model);
717       return 0;
718     }
719   }
720   model->n = n;
721   model->coords = (int(*)[2])aom_malloc(sizeof(*model->coords) * n);
722   if (!model->coords) {
723     aom_noise_model_free(model);
724     return 0;
725   }
726 
727   for (y = -lag; y <= 0; ++y) {
728     const int max_x = y == 0 ? -1 : lag;
729     for (x = -lag; x <= max_x; ++x) {
730       switch (params.shape) {
731         case AOM_NOISE_SHAPE_DIAMOND:
732           if (abs(x) <= y + lag) {
733             model->coords[i][0] = x;
734             model->coords[i][1] = y;
735             ++i;
736           }
737           break;
738         case AOM_NOISE_SHAPE_SQUARE:
739           model->coords[i][0] = x;
740           model->coords[i][1] = y;
741           ++i;
742           break;
743         default:
744           fprintf(stderr, "Invalid shape\n");
745           aom_noise_model_free(model);
746           return 0;
747       }
748     }
749   }
750   assert(i == n);
751   return 1;
752 }
753 
aom_noise_model_free(aom_noise_model_t * model)754 void aom_noise_model_free(aom_noise_model_t *model) {
755   int c = 0;
756   if (!model) return;
757 
758   aom_free(model->coords);
759   for (c = 0; c < 3; ++c) {
760     equation_system_free(&model->latest_state[c].eqns);
761     equation_system_free(&model->combined_state[c].eqns);
762 
763     equation_system_free(&model->latest_state[c].strength_solver.eqns);
764     equation_system_free(&model->combined_state[c].strength_solver.eqns);
765   }
766   memset(model, 0, sizeof(*model));
767 }
768 
769 // Extracts the neighborhood defined by coords around point (x, y) from
770 // the difference between the data and denoised images. Also extracts the
771 // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
772 #define EXTRACT_AR_ROW(INT_TYPE, suffix)                                   \
773   static double extract_ar_row_##suffix(                                   \
774       int(*coords)[2], int num_coords, const INT_TYPE *const data,         \
775       const INT_TYPE *const denoised, int stride, int sub_log2[2],         \
776       const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised,  \
777       int alt_stride, int x, int y, double *buffer) {                      \
778     for (int i = 0; i < num_coords; ++i) {                                 \
779       const int x_i = x + coords[i][0], y_i = y + coords[i][1];            \
780       buffer[i] =                                                          \
781           (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
782     }                                                                      \
783     const double val =                                                     \
784         (double)data[y * stride + x] - denoised[y * stride + x];           \
785                                                                            \
786     if (alt_data && alt_denoised) {                                        \
787       double avg_data = 0, avg_denoised = 0;                               \
788       int num_samples = 0;                                                 \
789       for (int dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) {              \
790         const int y_up = (y << sub_log2[1]) + dy_i;                        \
791         for (int dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) {            \
792           const int x_up = (x << sub_log2[0]) + dx_i;                      \
793           avg_data += alt_data[y_up * alt_stride + x_up];                  \
794           avg_denoised += alt_denoised[y_up * alt_stride + x_up];          \
795           num_samples++;                                                   \
796         }                                                                  \
797       }                                                                    \
798       buffer[num_coords] = (avg_data - avg_denoised) / num_samples;        \
799     }                                                                      \
800     return val;                                                            \
801   }
802 
EXTRACT_AR_ROW(uint8_t,lowbd)803 EXTRACT_AR_ROW(uint8_t, lowbd)
804 EXTRACT_AR_ROW(uint16_t, highbd)
805 
806 static int add_block_observations(
807     aom_noise_model_t *noise_model, int c, const uint8_t *const data,
808     const uint8_t *const denoised, int w, int h, int stride, int sub_log2[2],
809     const uint8_t *const alt_data, const uint8_t *const alt_denoised,
810     int alt_stride, const uint8_t *const flat_blocks, int block_size,
811     int num_blocks_w, int num_blocks_h) {
812   const int lag = noise_model->params.lag;
813   const int num_coords = noise_model->n;
814   const double normalization = (1 << noise_model->params.bit_depth) - 1;
815   double *A = noise_model->latest_state[c].eqns.A;
816   double *b = noise_model->latest_state[c].eqns.b;
817   double *buffer = (double *)aom_malloc(sizeof(*buffer) * (num_coords + 1));
818   const int n = noise_model->latest_state[c].eqns.n;
819 
820   if (!buffer) {
821     fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
822     return 0;
823   }
824   for (int by = 0; by < num_blocks_h; ++by) {
825     const int y_o = by * (block_size >> sub_log2[1]);
826     for (int bx = 0; bx < num_blocks_w; ++bx) {
827       const int x_o = bx * (block_size >> sub_log2[0]);
828       if (!flat_blocks[by * num_blocks_w + bx]) {
829         continue;
830       }
831       int y_start =
832           (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
833       int x_start =
834           (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
835       int y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
836                          block_size >> sub_log2[1]);
837       int x_end = AOMMIN(
838           (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
839           (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
840               ? (block_size >> sub_log2[0])
841               : ((block_size >> sub_log2[0]) - lag));
842       for (int y = y_start; y < y_end; ++y) {
843         for (int x = x_start; x < x_end; ++x) {
844           const double val =
845               noise_model->params.use_highbd
846                   ? extract_ar_row_highbd(noise_model->coords, num_coords,
847                                           (const uint16_t *const)data,
848                                           (const uint16_t *const)denoised,
849                                           stride, sub_log2,
850                                           (const uint16_t *const)alt_data,
851                                           (const uint16_t *const)alt_denoised,
852                                           alt_stride, x + x_o, y + y_o, buffer)
853                   : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
854                                          denoised, stride, sub_log2, alt_data,
855                                          alt_denoised, alt_stride, x + x_o,
856                                          y + y_o, buffer);
857           for (int i = 0; i < n; ++i) {
858             for (int j = 0; j < n; ++j) {
859               A[i * n + j] +=
860                   (buffer[i] * buffer[j]) / (normalization * normalization);
861             }
862             b[i] += (buffer[i] * val) / (normalization * normalization);
863           }
864           noise_model->latest_state[c].num_observations++;
865         }
866       }
867     }
868   }
869   aom_free(buffer);
870   return 1;
871 }
872 
add_noise_std_observations(aom_noise_model_t * noise_model,int c,const double * coeffs,const uint8_t * const data,const uint8_t * const denoised,int w,int h,int stride,int sub_log2[2],const uint8_t * const alt_data,int alt_stride,const uint8_t * const flat_blocks,int block_size,int num_blocks_w,int num_blocks_h)873 static void add_noise_std_observations(
874     aom_noise_model_t *noise_model, int c, const double *coeffs,
875     const uint8_t *const data, const uint8_t *const denoised, int w, int h,
876     int stride, int sub_log2[2], const uint8_t *const alt_data, int alt_stride,
877     const uint8_t *const flat_blocks, int block_size, int num_blocks_w,
878     int num_blocks_h) {
879   const int num_coords = noise_model->n;
880   aom_noise_strength_solver_t *noise_strength_solver =
881       &noise_model->latest_state[c].strength_solver;
882 
883   const aom_noise_strength_solver_t *noise_strength_luma =
884       &noise_model->latest_state[0].strength_solver;
885   const double luma_gain = noise_model->latest_state[0].ar_gain;
886   const double noise_gain = noise_model->latest_state[c].ar_gain;
887   for (int by = 0; by < num_blocks_h; ++by) {
888     const int y_o = by * (block_size >> sub_log2[1]);
889     for (int bx = 0; bx < num_blocks_w; ++bx) {
890       const int x_o = bx * (block_size >> sub_log2[0]);
891       if (!flat_blocks[by * num_blocks_w + bx]) {
892         continue;
893       }
894       const int num_samples_h =
895           AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
896                  block_size >> sub_log2[1]);
897       const int num_samples_w =
898           AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
899                  (block_size >> sub_log2[0]));
900       // Make sure that we have a reasonable amount of samples to consider the
901       // block
902       if (num_samples_w * num_samples_h > block_size) {
903         const double block_mean = get_block_mean(
904             alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
905             x_o << sub_log2[0], y_o << sub_log2[1], block_size,
906             noise_model->params.use_highbd);
907         const double noise_var = get_noise_var(
908             data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
909             y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
910             noise_model->params.use_highbd);
911         // We want to remove the part of the noise that came from being
912         // correlated with luma. Note that the noise solver for luma must
913         // have already been run.
914         const double luma_strength =
915             c > 0 ? luma_gain * noise_strength_solver_get_value(
916                                     noise_strength_luma, block_mean)
917                   : 0;
918         const double corr = c > 0 ? coeffs[num_coords] : 0;
919         // Chroma noise:
920         //    N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
921         // The uncorrelated component:
922         //   uncorr_var = noise_var - (corr * luma_strength)^2
923         // But don't allow fully correlated noise (hence the max), since the
924         // synthesis cannot model it.
925         const double uncorr_std = sqrt(
926             AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
927         // After we've removed correlation with luma, undo the gain that will
928         // come from running the IIR filter.
929         const double adjusted_strength = uncorr_std / noise_gain;
930         aom_noise_strength_solver_add_measurement(
931             noise_strength_solver, block_mean, adjusted_strength);
932       }
933     }
934   }
935 }
936 
937 // Return true if the noise estimate appears to be different from the combined
938 // (multi-frame) estimate. The difference is measured by checking whether the
939 // AR coefficients have diverged (using a threshold on normalized cross
940 // correlation), or whether the noise strength has changed.
is_noise_model_different(aom_noise_model_t * const noise_model)941 static int is_noise_model_different(aom_noise_model_t *const noise_model) {
942   // These thresholds are kind of arbitrary and will likely need further tuning
943   // (or exported as parameters). The threshold on noise strength is a weighted
944   // difference between the noise strength histograms
945   const double kCoeffThreshold = 0.9;
946   const double kStrengthThreshold =
947       0.005 * (1 << (noise_model->params.bit_depth - 8));
948   for (int c = 0; c < 1; ++c) {
949     const double corr =
950         aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
951                                          noise_model->combined_state[c].eqns.x,
952                                          noise_model->combined_state[c].eqns.n);
953     if (corr < kCoeffThreshold) return 1;
954 
955     const double dx =
956         1.0 / noise_model->latest_state[c].strength_solver.num_bins;
957 
958     const aom_equation_system_t *latest_eqns =
959         &noise_model->latest_state[c].strength_solver.eqns;
960     const aom_equation_system_t *combined_eqns =
961         &noise_model->combined_state[c].strength_solver.eqns;
962     double diff = 0;
963     double total_weight = 0;
964     for (int j = 0; j < latest_eqns->n; ++j) {
965       double weight = 0;
966       for (int i = 0; i < latest_eqns->n; ++i) {
967         weight += latest_eqns->A[i * latest_eqns->n + j];
968       }
969       weight = sqrt(weight);
970       diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
971       total_weight += weight;
972     }
973     if (diff * dx / total_weight > kStrengthThreshold) return 1;
974   }
975   return 0;
976 }
977 
ar_equation_system_solve(aom_noise_state_t * state,int is_chroma)978 static int ar_equation_system_solve(aom_noise_state_t *state, int is_chroma) {
979   const int ret = equation_system_solve(&state->eqns);
980   state->ar_gain = 1.0;
981   if (!ret) return ret;
982 
983   // Update the AR gain from the equation system as it will be used to fit
984   // the noise strength as a function of intensity.  In the Yule-Walker
985   // equations, the diagonal should be the variance of the correlated noise.
986   // In the case of the least squares estimate, there will be some variability
987   // in the diagonal. So use the mean of the diagonal as the estimate of
988   // overall variance (this works for least squares or Yule-Walker formulation).
989   double var = 0;
990   const int n = state->eqns.n;
991   for (int i = 0; i < (state->eqns.n - is_chroma); ++i) {
992     var += state->eqns.A[i * n + i] / state->num_observations;
993   }
994   var /= (n - is_chroma);
995 
996   // Keep track of E(Y^2) = <b, x> + E(X^2)
997   // In the case that we are using chroma and have an estimate of correlation
998   // with luma we adjust that estimate slightly to remove the correlated bits by
999   // subtracting out the last column of a scaled by our correlation estimate
1000   // from b. E(y^2) = <b - A(:, end)*x(end), x>
1001   double sum_covar = 0;
1002   for (int i = 0; i < state->eqns.n - is_chroma; ++i) {
1003     double bi = state->eqns.b[i];
1004     if (is_chroma) {
1005       bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
1006     }
1007     sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
1008   }
1009   // Now, get an estimate of the variance of uncorrelated noise signal and use
1010   // it to determine the gain of the AR filter.
1011   const double noise_var = AOMMAX(var - sum_covar, 1e-6);
1012   state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
1013   return ret;
1014 }
1015 
aom_noise_model_update(aom_noise_model_t * const noise_model,const uint8_t * const data[3],const uint8_t * const denoised[3],int w,int h,int stride[3],int chroma_sub_log2[2],const uint8_t * const flat_blocks,int block_size)1016 aom_noise_status_t aom_noise_model_update(
1017     aom_noise_model_t *const noise_model, const uint8_t *const data[3],
1018     const uint8_t *const denoised[3], int w, int h, int stride[3],
1019     int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size) {
1020   const int num_blocks_w = (w + block_size - 1) / block_size;
1021   const int num_blocks_h = (h + block_size - 1) / block_size;
1022   int y_model_different = 0;
1023   int num_blocks = 0;
1024   int i = 0, channel = 0;
1025 
1026   if (block_size <= 1) {
1027     fprintf(stderr, "block_size = %d must be > 1\n", block_size);
1028     return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1029   }
1030 
1031   if (block_size < noise_model->params.lag * 2 + 1) {
1032     fprintf(stderr, "block_size = %d must be >= %d\n", block_size,
1033             noise_model->params.lag * 2 + 1);
1034     return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1035   }
1036 
1037   // Clear the latest equation system
1038   for (i = 0; i < 3; ++i) {
1039     equation_system_clear(&noise_model->latest_state[i].eqns);
1040     noise_model->latest_state[i].num_observations = 0;
1041     noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
1042   }
1043 
1044   // Check that we have enough flat blocks
1045   for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
1046     if (flat_blocks[i]) {
1047       num_blocks++;
1048     }
1049   }
1050 
1051   if (num_blocks <= 1) {
1052     fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
1053     return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
1054   }
1055 
1056   for (channel = 0; channel < 3; ++channel) {
1057     int no_subsampling[2] = { 0, 0 };
1058     const uint8_t *alt_data = channel > 0 ? data[0] : 0;
1059     const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
1060     int *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
1061     const int is_chroma = channel != 0;
1062     if (!data[channel] || !denoised[channel]) break;
1063     if (!add_block_observations(noise_model, channel, data[channel],
1064                                 denoised[channel], w, h, stride[channel], sub,
1065                                 alt_data, alt_denoised, stride[0], flat_blocks,
1066                                 block_size, num_blocks_w, num_blocks_h)) {
1067       fprintf(stderr, "Adding block observation failed\n");
1068       return AOM_NOISE_STATUS_INTERNAL_ERROR;
1069     }
1070 
1071     if (!ar_equation_system_solve(&noise_model->latest_state[channel],
1072                                   is_chroma)) {
1073       if (is_chroma) {
1074         set_chroma_coefficient_fallback_soln(
1075             &noise_model->latest_state[channel].eqns);
1076       } else {
1077         fprintf(stderr, "Solving latest noise equation system failed %d!\n",
1078                 channel);
1079         return AOM_NOISE_STATUS_INTERNAL_ERROR;
1080       }
1081     }
1082 
1083     add_noise_std_observations(
1084         noise_model, channel, noise_model->latest_state[channel].eqns.x,
1085         data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
1086         stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
1087 
1088     if (!aom_noise_strength_solver_solve(
1089             &noise_model->latest_state[channel].strength_solver)) {
1090       fprintf(stderr, "Solving latest noise strength failed!\n");
1091       return AOM_NOISE_STATUS_INTERNAL_ERROR;
1092     }
1093 
1094     // Check noise characteristics and return if error.
1095     if (channel == 0 &&
1096         noise_model->combined_state[channel].strength_solver.num_equations >
1097             0 &&
1098         is_noise_model_different(noise_model)) {
1099       y_model_different = 1;
1100     }
1101 
1102     // Don't update the combined stats if the y model is different.
1103     if (y_model_different) continue;
1104 
1105     noise_model->combined_state[channel].num_observations +=
1106         noise_model->latest_state[channel].num_observations;
1107     equation_system_add(&noise_model->combined_state[channel].eqns,
1108                         &noise_model->latest_state[channel].eqns);
1109     if (!ar_equation_system_solve(&noise_model->combined_state[channel],
1110                                   is_chroma)) {
1111       if (is_chroma) {
1112         set_chroma_coefficient_fallback_soln(
1113             &noise_model->combined_state[channel].eqns);
1114       } else {
1115         fprintf(stderr, "Solving combined noise equation system failed %d!\n",
1116                 channel);
1117         return AOM_NOISE_STATUS_INTERNAL_ERROR;
1118       }
1119     }
1120 
1121     noise_strength_solver_add(
1122         &noise_model->combined_state[channel].strength_solver,
1123         &noise_model->latest_state[channel].strength_solver);
1124 
1125     if (!aom_noise_strength_solver_solve(
1126             &noise_model->combined_state[channel].strength_solver)) {
1127       fprintf(stderr, "Solving combined noise strength failed!\n");
1128       return AOM_NOISE_STATUS_INTERNAL_ERROR;
1129     }
1130   }
1131 
1132   return y_model_different ? AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE
1133                            : AOM_NOISE_STATUS_OK;
1134 }
1135 
aom_noise_model_save_latest(aom_noise_model_t * noise_model)1136 void aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
1137   for (int c = 0; c < 3; c++) {
1138     equation_system_copy(&noise_model->combined_state[c].eqns,
1139                          &noise_model->latest_state[c].eqns);
1140     equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
1141                          &noise_model->latest_state[c].strength_solver.eqns);
1142     noise_model->combined_state[c].strength_solver.num_equations =
1143         noise_model->latest_state[c].strength_solver.num_equations;
1144     noise_model->combined_state[c].num_observations =
1145         noise_model->latest_state[c].num_observations;
1146     noise_model->combined_state[c].ar_gain =
1147         noise_model->latest_state[c].ar_gain;
1148   }
1149 }
1150 
aom_noise_model_get_grain_parameters(aom_noise_model_t * const noise_model,aom_film_grain_t * film_grain)1151 int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
1152                                          aom_film_grain_t *film_grain) {
1153   if (noise_model->params.lag > 3) {
1154     fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
1155     return 0;
1156   }
1157   uint16_t random_seed = film_grain->random_seed;
1158   memset(film_grain, 0, sizeof(*film_grain));
1159   film_grain->random_seed = random_seed;
1160 
1161   film_grain->apply_grain = 1;
1162   film_grain->update_parameters = 1;
1163 
1164   film_grain->ar_coeff_lag = noise_model->params.lag;
1165 
1166   // Convert the scaling functions to 8 bit values
1167   aom_noise_strength_lut_t scaling_points[3];
1168   if (!aom_noise_strength_solver_fit_piecewise(
1169           &noise_model->combined_state[0].strength_solver, 14,
1170           scaling_points + 0)) {
1171     return 0;
1172   }
1173   if (!aom_noise_strength_solver_fit_piecewise(
1174           &noise_model->combined_state[1].strength_solver, 10,
1175           scaling_points + 1)) {
1176     aom_noise_strength_lut_free(scaling_points + 0);
1177     return 0;
1178   }
1179   if (!aom_noise_strength_solver_fit_piecewise(
1180           &noise_model->combined_state[2].strength_solver, 10,
1181           scaling_points + 2)) {
1182     aom_noise_strength_lut_free(scaling_points + 0);
1183     aom_noise_strength_lut_free(scaling_points + 1);
1184     return 0;
1185   }
1186 
1187   // Both the domain and the range of the scaling functions in the film_grain
1188   // are normalized to 8-bit (e.g., they are implicitly scaled during grain
1189   // synthesis).
1190   const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
1191   double max_scaling_value = 1e-4;
1192   for (int c = 0; c < 3; ++c) {
1193     for (int i = 0; i < scaling_points[c].num_points; ++i) {
1194       scaling_points[c].points[i][0] =
1195           AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
1196       scaling_points[c].points[i][1] =
1197           AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
1198       max_scaling_value =
1199           AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
1200     }
1201   }
1202 
1203   // Scaling_shift values are in the range [8,11]
1204   const int max_scaling_value_log2 =
1205       clamp((int)floor(log2(max_scaling_value) + 1), 2, 5);
1206   film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
1207 
1208   const double scale_factor = 1 << (8 - max_scaling_value_log2);
1209   film_grain->num_y_points = scaling_points[0].num_points;
1210   film_grain->num_cb_points = scaling_points[1].num_points;
1211   film_grain->num_cr_points = scaling_points[2].num_points;
1212 
1213   int(*film_grain_scaling[3])[2] = {
1214     film_grain->scaling_points_y,
1215     film_grain->scaling_points_cb,
1216     film_grain->scaling_points_cr,
1217   };
1218   for (int c = 0; c < 3; c++) {
1219     for (int i = 0; i < scaling_points[c].num_points; ++i) {
1220       film_grain_scaling[c][i][0] = (int)(scaling_points[c].points[i][0] + 0.5);
1221       film_grain_scaling[c][i][1] = clamp(
1222           (int)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
1223     }
1224   }
1225   aom_noise_strength_lut_free(scaling_points + 0);
1226   aom_noise_strength_lut_free(scaling_points + 1);
1227   aom_noise_strength_lut_free(scaling_points + 2);
1228 
1229   // Convert the ar_coeffs into 8-bit values
1230   const int n_coeff = noise_model->combined_state[0].eqns.n;
1231   double max_coeff = 1e-4, min_coeff = -1e-4;
1232   double y_corr[2] = { 0, 0 };
1233   double avg_luma_strength = 0;
1234   for (int c = 0; c < 3; c++) {
1235     aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1236     for (int i = 0; i < n_coeff; ++i) {
1237       max_coeff = AOMMAX(max_coeff, eqns->x[i]);
1238       min_coeff = AOMMIN(min_coeff, eqns->x[i]);
1239     }
1240     // Since the correlation between luma/chroma was computed in an already
1241     // scaled space, we adjust it in the un-scaled space.
1242     aom_noise_strength_solver_t *solver =
1243         &noise_model->combined_state[c].strength_solver;
1244     // Compute a weighted average of the strength for the channel.
1245     double average_strength = 0, total_weight = 0;
1246     for (int i = 0; i < solver->eqns.n; ++i) {
1247       double w = 0;
1248       for (int j = 0; j < solver->eqns.n; ++j) {
1249         w += solver->eqns.A[i * solver->eqns.n + j];
1250       }
1251       w = sqrt(w);
1252       average_strength += solver->eqns.x[i] * w;
1253       total_weight += w;
1254     }
1255     if (total_weight == 0)
1256       average_strength = 1;
1257     else
1258       average_strength /= total_weight;
1259     if (c == 0) {
1260       avg_luma_strength = average_strength;
1261     } else {
1262       y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
1263       max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
1264       min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
1265     }
1266   }
1267   // Shift value: AR coeffs range (values 6-9)
1268   // 6: [-2, 2),  7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
1269   film_grain->ar_coeff_shift =
1270       clamp(7 - (int)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
1271             6, 9);
1272   double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
1273   int *ar_coeffs[3] = {
1274     film_grain->ar_coeffs_y,
1275     film_grain->ar_coeffs_cb,
1276     film_grain->ar_coeffs_cr,
1277   };
1278   for (int c = 0; c < 3; ++c) {
1279     aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1280     for (int i = 0; i < n_coeff; ++i) {
1281       ar_coeffs[c][i] =
1282           clamp((int)round(scale_ar_coeff * eqns->x[i]), -128, 127);
1283     }
1284     if (c > 0) {
1285       ar_coeffs[c][n_coeff] =
1286           clamp((int)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
1287     }
1288   }
1289 
1290   // At the moment, the noise modeling code assumes that the chroma scaling
1291   // functions are a function of luma.
1292   film_grain->cb_mult = 128;       // 8 bits
1293   film_grain->cb_luma_mult = 192;  // 8 bits
1294   film_grain->cb_offset = 256;     // 9 bits
1295 
1296   film_grain->cr_mult = 128;       // 8 bits
1297   film_grain->cr_luma_mult = 192;  // 8 bits
1298   film_grain->cr_offset = 256;     // 9 bits
1299 
1300   film_grain->chroma_scaling_from_luma = 0;
1301   film_grain->grain_scale_shift = 0;
1302   film_grain->overlap_flag = 1;
1303   return 1;
1304 }
1305 
pointwise_multiply(const float * a,float * b,int n)1306 static void pointwise_multiply(const float *a, float *b, int n) {
1307   for (int i = 0; i < n; ++i) {
1308     b[i] *= a[i];
1309   }
1310 }
1311 
get_half_cos_window(int block_size)1312 static float *get_half_cos_window(int block_size) {
1313   float *window_function =
1314       (float *)aom_malloc(block_size * block_size * sizeof(*window_function));
1315   if (!window_function) return NULL;
1316   for (int y = 0; y < block_size; ++y) {
1317     const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
1318     for (int x = 0; x < block_size; ++x) {
1319       const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
1320       window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
1321     }
1322   }
1323   return window_function;
1324 }
1325 
1326 #define DITHER_AND_QUANTIZE(INT_TYPE, suffix)                               \
1327   static void dither_and_quantize_##suffix(                                 \
1328       float *result, int result_stride, INT_TYPE *denoised, int w, int h,   \
1329       int stride, int chroma_sub_w, int chroma_sub_h, int block_size,       \
1330       float block_normalization) {                                          \
1331     for (int y = 0; y < (h >> chroma_sub_h); ++y) {                         \
1332       for (int x = 0; x < (w >> chroma_sub_w); ++x) {                       \
1333         const int result_idx =                                              \
1334             (y + (block_size >> chroma_sub_h)) * result_stride + x +        \
1335             (block_size >> chroma_sub_w);                                   \
1336         INT_TYPE new_val = (INT_TYPE)AOMMIN(                                \
1337             AOMMAX(result[result_idx] * block_normalization + 0.5f, 0),     \
1338             block_normalization);                                           \
1339         const float err =                                                   \
1340             -(((float)new_val) / block_normalization - result[result_idx]); \
1341         denoised[y * stride + x] = new_val;                                 \
1342         if (x + 1 < (w >> chroma_sub_w)) {                                  \
1343           result[result_idx + 1] += err * 7.0f / 16.0f;                     \
1344         }                                                                   \
1345         if (y + 1 < (h >> chroma_sub_h)) {                                  \
1346           if (x > 0) {                                                      \
1347             result[result_idx + result_stride - 1] += err * 3.0f / 16.0f;   \
1348           }                                                                 \
1349           result[result_idx + result_stride] += err * 5.0f / 16.0f;         \
1350           if (x + 1 < (w >> chroma_sub_w)) {                                \
1351             result[result_idx + result_stride + 1] += err * 1.0f / 16.0f;   \
1352           }                                                                 \
1353         }                                                                   \
1354       }                                                                     \
1355     }                                                                       \
1356   }
1357 
DITHER_AND_QUANTIZE(uint8_t,lowbd)1358 DITHER_AND_QUANTIZE(uint8_t, lowbd)
1359 DITHER_AND_QUANTIZE(uint16_t, highbd)
1360 
1361 int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
1362                           int w, int h, int stride[3], int chroma_sub[2],
1363                           float *noise_psd[3], int block_size, int bit_depth,
1364                           int use_highbd) {
1365   float *plane = NULL, *block = NULL, *window_full = NULL,
1366         *window_chroma = NULL;
1367   double *block_d = NULL, *plane_d = NULL;
1368   struct aom_noise_tx_t *tx_full = NULL;
1369   struct aom_noise_tx_t *tx_chroma = NULL;
1370   const int num_blocks_w = (w + block_size - 1) / block_size;
1371   const int num_blocks_h = (h + block_size - 1) / block_size;
1372   const int result_stride = (num_blocks_w + 2) * block_size;
1373   const int result_height = (num_blocks_h + 2) * block_size;
1374   float *result = NULL;
1375   int init_success = 1;
1376   aom_flat_block_finder_t block_finder_full;
1377   aom_flat_block_finder_t block_finder_chroma;
1378   const float kBlockNormalization = (float)((1 << bit_depth) - 1);
1379   if (chroma_sub[0] != chroma_sub[1]) {
1380     fprintf(stderr,
1381             "aom_wiener_denoise_2d doesn't handle different chroma "
1382             "subsampling\n");
1383     return 0;
1384   }
1385   init_success &= aom_flat_block_finder_init(&block_finder_full, block_size,
1386                                              bit_depth, use_highbd);
1387   result = (float *)aom_malloc((num_blocks_h + 2) * block_size * result_stride *
1388                                sizeof(*result));
1389   plane = (float *)aom_malloc(block_size * block_size * sizeof(*plane));
1390   block =
1391       (float *)aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
1392   block_d = (double *)aom_malloc(block_size * block_size * sizeof(*block_d));
1393   plane_d = (double *)aom_malloc(block_size * block_size * sizeof(*plane_d));
1394   window_full = get_half_cos_window(block_size);
1395   tx_full = aom_noise_tx_malloc(block_size);
1396 
1397   if (chroma_sub[0] != 0) {
1398     init_success &= aom_flat_block_finder_init(&block_finder_chroma,
1399                                                block_size >> chroma_sub[0],
1400                                                bit_depth, use_highbd);
1401     window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
1402     tx_chroma = aom_noise_tx_malloc(block_size >> chroma_sub[0]);
1403   } else {
1404     window_chroma = window_full;
1405     tx_chroma = tx_full;
1406   }
1407 
1408   init_success &= (tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
1409                   (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
1410                   (window_full != NULL) && (window_chroma != NULL) &&
1411                   (result != NULL);
1412   for (int c = init_success ? 0 : 3; c < 3; ++c) {
1413     float *window_function = c == 0 ? window_full : window_chroma;
1414     aom_flat_block_finder_t *block_finder = &block_finder_full;
1415     const int chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
1416     const int chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
1417     struct aom_noise_tx_t *tx =
1418         (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
1419     if (!data[c] || !denoised[c]) continue;
1420     if (c > 0 && chroma_sub[0] != 0) {
1421       block_finder = &block_finder_chroma;
1422     }
1423     memset(result, 0, sizeof(*result) * result_stride * result_height);
1424     // Do overlapped block processing (half overlapped). The block rows can
1425     // easily be done in parallel
1426     for (int offsy = 0; offsy < (block_size >> chroma_sub_h);
1427          offsy += (block_size >> chroma_sub_h) / 2) {
1428       for (int offsx = 0; offsx < (block_size >> chroma_sub_w);
1429            offsx += (block_size >> chroma_sub_w) / 2) {
1430         // Pad the boundary when processing each block-set.
1431         for (int by = -1; by < num_blocks_h; ++by) {
1432           for (int bx = -1; bx < num_blocks_w; ++bx) {
1433             const int pixels_per_block =
1434                 (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
1435             aom_flat_block_finder_extract_block(
1436                 block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
1437                 stride[c], bx * (block_size >> chroma_sub_w) + offsx,
1438                 by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
1439             for (int j = 0; j < pixels_per_block; ++j) {
1440               block[j] = (float)block_d[j];
1441               plane[j] = (float)plane_d[j];
1442             }
1443             pointwise_multiply(window_function, block, pixels_per_block);
1444             aom_noise_tx_forward(tx, block);
1445             aom_noise_tx_filter(tx, noise_psd[c]);
1446             aom_noise_tx_inverse(tx, block);
1447 
1448             // Apply window function to the plane approximation (we will apply
1449             // it to the sum of plane + block when composing the results).
1450             pointwise_multiply(window_function, plane, pixels_per_block);
1451 
1452             for (int y = 0; y < (block_size >> chroma_sub_h); ++y) {
1453               const int y_result =
1454                   y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
1455               for (int x = 0; x < (block_size >> chroma_sub_w); ++x) {
1456                 const int x_result =
1457                     x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
1458                 result[y_result * result_stride + x_result] +=
1459                     (block[y * (block_size >> chroma_sub_w) + x] +
1460                      plane[y * (block_size >> chroma_sub_w) + x]) *
1461                     window_function[y * (block_size >> chroma_sub_w) + x];
1462               }
1463             }
1464           }
1465         }
1466       }
1467     }
1468     if (use_highbd) {
1469       dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
1470                                  w, h, stride[c], chroma_sub_w, chroma_sub_h,
1471                                  block_size, kBlockNormalization);
1472     } else {
1473       dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
1474                                 stride[c], chroma_sub_w, chroma_sub_h,
1475                                 block_size, kBlockNormalization);
1476     }
1477   }
1478   aom_free(result);
1479   aom_free(plane);
1480   aom_free(block);
1481   aom_free(plane_d);
1482   aom_free(block_d);
1483   aom_free(window_full);
1484 
1485   aom_noise_tx_free(tx_full);
1486 
1487   aom_flat_block_finder_free(&block_finder_full);
1488   if (chroma_sub[0] != 0) {
1489     aom_flat_block_finder_free(&block_finder_chroma);
1490     aom_free(window_chroma);
1491     aom_noise_tx_free(tx_chroma);
1492   }
1493   return init_success;
1494 }
1495 
1496 struct aom_denoise_and_model_t {
1497   int block_size;
1498   int bit_depth;
1499   float noise_level;
1500 
1501   // Size of current denoised buffer and flat_block buffer
1502   int width;
1503   int height;
1504   int y_stride;
1505   int uv_stride;
1506   int num_blocks_w;
1507   int num_blocks_h;
1508 
1509   // Buffers for image and noise_psd allocated on the fly
1510   float *noise_psd[3];
1511   uint8_t *denoised[3];
1512   uint8_t *flat_blocks;
1513 
1514   aom_flat_block_finder_t flat_block_finder;
1515   aom_noise_model_t noise_model;
1516 };
1517 
aom_denoise_and_model_alloc(int bit_depth,int block_size,float noise_level)1518 struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
1519                                                             int block_size,
1520                                                             float noise_level) {
1521   struct aom_denoise_and_model_t *ctx =
1522       (struct aom_denoise_and_model_t *)aom_malloc(
1523           sizeof(struct aom_denoise_and_model_t));
1524   if (!ctx) {
1525     fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
1526     return NULL;
1527   }
1528   memset(ctx, 0, sizeof(*ctx));
1529 
1530   ctx->block_size = block_size;
1531   ctx->noise_level = noise_level;
1532   ctx->bit_depth = bit_depth;
1533 
1534   ctx->noise_psd[0] =
1535       aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
1536   ctx->noise_psd[1] =
1537       aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
1538   ctx->noise_psd[2] =
1539       aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
1540   if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
1541     fprintf(stderr, "Unable to allocate noise PSD buffers\n");
1542     aom_denoise_and_model_free(ctx);
1543     return NULL;
1544   }
1545   return ctx;
1546 }
1547 
aom_denoise_and_model_free(struct aom_denoise_and_model_t * ctx)1548 void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
1549   aom_free(ctx->flat_blocks);
1550   for (int i = 0; i < 3; ++i) {
1551     aom_free(ctx->denoised[i]);
1552     aom_free(ctx->noise_psd[i]);
1553   }
1554   aom_noise_model_free(&ctx->noise_model);
1555   aom_flat_block_finder_free(&ctx->flat_block_finder);
1556   aom_free(ctx);
1557 }
1558 
denoise_and_model_realloc_if_necessary(struct aom_denoise_and_model_t * ctx,YV12_BUFFER_CONFIG * sd)1559 static int denoise_and_model_realloc_if_necessary(
1560     struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
1561   if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
1562       ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
1563     return 1;
1564   const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
1565   const int block_size = ctx->block_size;
1566 
1567   ctx->width = sd->y_width;
1568   ctx->height = sd->y_height;
1569   ctx->y_stride = sd->y_stride;
1570   ctx->uv_stride = sd->uv_stride;
1571 
1572   for (int i = 0; i < 3; ++i) {
1573     aom_free(ctx->denoised[i]);
1574     ctx->denoised[i] = NULL;
1575   }
1576   aom_free(ctx->flat_blocks);
1577   ctx->flat_blocks = NULL;
1578 
1579   ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
1580   ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
1581   ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
1582   if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
1583     fprintf(stderr, "Unable to allocate denoise buffers\n");
1584     return 0;
1585   }
1586   ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
1587   ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
1588   ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
1589   if (!ctx->flat_blocks) {
1590     fprintf(stderr, "Unable to allocate flat_blocks buffer\n");
1591     return 0;
1592   }
1593 
1594   aom_flat_block_finder_free(&ctx->flat_block_finder);
1595   if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
1596                                   ctx->bit_depth, use_highbd)) {
1597     fprintf(stderr, "Unable to init flat block finder\n");
1598     return 0;
1599   }
1600 
1601   const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
1602                                             ctx->bit_depth, use_highbd };
1603   aom_noise_model_free(&ctx->noise_model);
1604   if (!aom_noise_model_init(&ctx->noise_model, params)) {
1605     fprintf(stderr, "Unable to init noise model\n");
1606     return 0;
1607   }
1608 
1609   // Simply use a flat PSD (although we could use the flat blocks to estimate
1610   // PSD) those to estimate an actual noise PSD)
1611   const float y_noise_level =
1612       aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
1613   const float uv_noise_level = aom_noise_psd_get_default_value(
1614       ctx->block_size >> sd->subsampling_x, ctx->noise_level);
1615   for (int i = 0; i < block_size * block_size; ++i) {
1616     ctx->noise_psd[0][i] = y_noise_level;
1617     ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
1618   }
1619   return 1;
1620 }
1621 
aom_denoise_and_model_run(struct aom_denoise_and_model_t * ctx,YV12_BUFFER_CONFIG * sd,aom_film_grain_t * film_grain,int apply_denoise)1622 int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
1623                               YV12_BUFFER_CONFIG *sd,
1624                               aom_film_grain_t *film_grain, int apply_denoise) {
1625   const int block_size = ctx->block_size;
1626   const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
1627   uint8_t *raw_data[3] = {
1628     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
1629     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
1630     use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
1631   };
1632   const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
1633   int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
1634   int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
1635 
1636   if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
1637     fprintf(stderr, "Unable to realloc buffers\n");
1638     return 0;
1639   }
1640 
1641   aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
1642                             sd->y_height, strides[0], ctx->flat_blocks);
1643 
1644   if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
1645                              strides, chroma_sub_log2, ctx->noise_psd,
1646                              block_size, ctx->bit_depth, use_highbd)) {
1647     fprintf(stderr, "Unable to denoise image\n");
1648     return 0;
1649   }
1650 
1651   const aom_noise_status_t status = aom_noise_model_update(
1652       &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
1653       sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
1654       block_size);
1655   int have_noise_estimate = 0;
1656   if (status == AOM_NOISE_STATUS_OK) {
1657     have_noise_estimate = 1;
1658   } else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
1659     aom_noise_model_save_latest(&ctx->noise_model);
1660     have_noise_estimate = 1;
1661   } else {
1662     // Unable to update noise model; proceed if we have a previous estimate.
1663     have_noise_estimate =
1664         (ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
1665   }
1666 
1667   film_grain->apply_grain = 0;
1668   if (have_noise_estimate) {
1669     if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
1670       fprintf(stderr, "Unable to get grain parameters.\n");
1671       return 0;
1672     }
1673     if (!film_grain->random_seed) {
1674       film_grain->random_seed = 7391;
1675     }
1676     if (apply_denoise) {
1677       memcpy(raw_data[0], ctx->denoised[0],
1678              (strides[0] * sd->y_height) << use_highbd);
1679       memcpy(raw_data[1], ctx->denoised[1],
1680              (strides[1] * sd->uv_height) << use_highbd);
1681       memcpy(raw_data[2], ctx->denoised[2],
1682              (strides[2] * sd->uv_height) << use_highbd);
1683     }
1684   }
1685   return 1;
1686 }
1687