1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include "aom_dsp/aom_dsp_common.h"
18 #include "aom_dsp/noise_model.h"
19 #include "aom_dsp/noise_util.h"
20 #include "aom_mem/aom_mem.h"
21 #include "av1/common/common.h"
22 #include "av1/encoder/mathutils.h"
23
24 #define kLowPolyNumParams 3
25
26 static const int kMaxLag = 4;
27
28 // Defines a function that can be used to obtain the mean of a block for the
29 // provided data type (uint8_t, or uint16_t)
30 #define GET_BLOCK_MEAN(INT_TYPE, suffix) \
31 static double get_block_mean_##suffix(const INT_TYPE *data, int w, int h, \
32 int stride, int x_o, int y_o, \
33 int block_size) { \
34 const int max_h = AOMMIN(h - y_o, block_size); \
35 const int max_w = AOMMIN(w - x_o, block_size); \
36 double block_mean = 0; \
37 for (int y = 0; y < max_h; ++y) { \
38 for (int x = 0; x < max_w; ++x) { \
39 block_mean += data[(y_o + y) * stride + x_o + x]; \
40 } \
41 } \
42 return block_mean / (max_w * max_h); \
43 }
44
45 GET_BLOCK_MEAN(uint8_t, lowbd);
46 GET_BLOCK_MEAN(uint16_t, highbd);
47
get_block_mean(const uint8_t * data,int w,int h,int stride,int x_o,int y_o,int block_size,int use_highbd)48 static INLINE double get_block_mean(const uint8_t *data, int w, int h,
49 int stride, int x_o, int y_o,
50 int block_size, int use_highbd) {
51 if (use_highbd)
52 return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o,
53 block_size);
54 return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
55 }
56
57 // Defines a function that can be used to obtain the variance of a block
58 // for the provided data type (uint8_t, or uint16_t)
59 #define GET_NOISE_VAR(INT_TYPE, suffix) \
60 static double get_noise_var_##suffix( \
61 const INT_TYPE *data, const INT_TYPE *denoised, int stride, int w, \
62 int h, int x_o, int y_o, int block_size_x, int block_size_y) { \
63 const int max_h = AOMMIN(h - y_o, block_size_y); \
64 const int max_w = AOMMIN(w - x_o, block_size_x); \
65 double noise_var = 0; \
66 double noise_mean = 0; \
67 for (int y = 0; y < max_h; ++y) { \
68 for (int x = 0; x < max_w; ++x) { \
69 double noise = (double)data[(y_o + y) * stride + x_o + x] - \
70 denoised[(y_o + y) * stride + x_o + x]; \
71 noise_mean += noise; \
72 noise_var += noise * noise; \
73 } \
74 } \
75 noise_mean /= (max_w * max_h); \
76 return noise_var / (max_w * max_h) - noise_mean * noise_mean; \
77 }
78
79 GET_NOISE_VAR(uint8_t, lowbd);
80 GET_NOISE_VAR(uint16_t, highbd);
81
get_noise_var(const uint8_t * data,const uint8_t * denoised,int w,int h,int stride,int x_o,int y_o,int block_size_x,int block_size_y,int use_highbd)82 static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised,
83 int w, int h, int stride, int x_o, int y_o,
84 int block_size_x, int block_size_y,
85 int use_highbd) {
86 if (use_highbd)
87 return get_noise_var_highbd((const uint16_t *)data,
88 (const uint16_t *)denoised, w, h, stride, x_o,
89 y_o, block_size_x, block_size_y);
90 return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o,
91 block_size_x, block_size_y);
92 }
93
equation_system_clear(aom_equation_system_t * eqns)94 static void equation_system_clear(aom_equation_system_t *eqns) {
95 const int n = eqns->n;
96 memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
97 memset(eqns->x, 0, sizeof(*eqns->x) * n);
98 memset(eqns->b, 0, sizeof(*eqns->b) * n);
99 }
100
equation_system_copy(aom_equation_system_t * dst,const aom_equation_system_t * src)101 static void equation_system_copy(aom_equation_system_t *dst,
102 const aom_equation_system_t *src) {
103 const int n = dst->n;
104 memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
105 memcpy(dst->x, src->x, sizeof(*dst->x) * n);
106 memcpy(dst->b, src->b, sizeof(*dst->b) * n);
107 }
108
equation_system_init(aom_equation_system_t * eqns,int n)109 static int equation_system_init(aom_equation_system_t *eqns, int n) {
110 eqns->A = (double *)aom_malloc(sizeof(*eqns->A) * n * n);
111 eqns->b = (double *)aom_malloc(sizeof(*eqns->b) * n);
112 eqns->x = (double *)aom_malloc(sizeof(*eqns->x) * n);
113 eqns->n = n;
114 if (!eqns->A || !eqns->b || !eqns->x) {
115 fprintf(stderr, "Failed to allocate system of equations of size %d\n", n);
116 aom_free(eqns->A);
117 aom_free(eqns->b);
118 aom_free(eqns->x);
119 memset(eqns, 0, sizeof(*eqns));
120 return 0;
121 }
122 equation_system_clear(eqns);
123 return 1;
124 }
125
equation_system_solve(aom_equation_system_t * eqns)126 static int equation_system_solve(aom_equation_system_t *eqns) {
127 const int n = eqns->n;
128 double *b = (double *)aom_malloc(sizeof(*b) * n);
129 double *A = (double *)aom_malloc(sizeof(*A) * n * n);
130 int ret = 0;
131 if (A == NULL || b == NULL) {
132 fprintf(stderr, "Unable to allocate temp values of size %dx%d\n", n, n);
133 aom_free(b);
134 aom_free(A);
135 return 0;
136 }
137 memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
138 memcpy(b, eqns->b, sizeof(*eqns->b) * n);
139 ret = linsolve(n, A, eqns->n, b, eqns->x);
140 aom_free(b);
141 aom_free(A);
142
143 if (ret == 0) {
144 return 0;
145 }
146 return 1;
147 }
148
equation_system_add(aom_equation_system_t * dest,aom_equation_system_t * src)149 static void equation_system_add(aom_equation_system_t *dest,
150 aom_equation_system_t *src) {
151 const int n = dest->n;
152 int i, j;
153 for (i = 0; i < n; ++i) {
154 for (j = 0; j < n; ++j) {
155 dest->A[i * n + j] += src->A[i * n + j];
156 }
157 dest->b[i] += src->b[i];
158 }
159 }
160
equation_system_free(aom_equation_system_t * eqns)161 static void equation_system_free(aom_equation_system_t *eqns) {
162 if (!eqns) return;
163 aom_free(eqns->A);
164 aom_free(eqns->b);
165 aom_free(eqns->x);
166 memset(eqns, 0, sizeof(*eqns));
167 }
168
noise_strength_solver_clear(aom_noise_strength_solver_t * solver)169 static void noise_strength_solver_clear(aom_noise_strength_solver_t *solver) {
170 equation_system_clear(&solver->eqns);
171 solver->num_equations = 0;
172 solver->total = 0;
173 }
174
noise_strength_solver_add(aom_noise_strength_solver_t * dest,aom_noise_strength_solver_t * src)175 static void noise_strength_solver_add(aom_noise_strength_solver_t *dest,
176 aom_noise_strength_solver_t *src) {
177 equation_system_add(&dest->eqns, &src->eqns);
178 dest->num_equations += src->num_equations;
179 dest->total += src->total;
180 }
181
182 // Return the number of coefficients required for the given parameters
num_coeffs(const aom_noise_model_params_t params)183 static int num_coeffs(const aom_noise_model_params_t params) {
184 const int n = 2 * params.lag + 1;
185 switch (params.shape) {
186 case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
187 case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
188 }
189 return 0;
190 }
191
noise_state_init(aom_noise_state_t * state,int n,int bit_depth)192 static int noise_state_init(aom_noise_state_t *state, int n, int bit_depth) {
193 const int kNumBins = 20;
194 if (!equation_system_init(&state->eqns, n)) {
195 fprintf(stderr, "Failed initialization noise state with size %d\n", n);
196 return 0;
197 }
198 state->ar_gain = 1.0;
199 state->num_observations = 0;
200 return aom_noise_strength_solver_init(&state->strength_solver, kNumBins,
201 bit_depth);
202 }
203
set_chroma_coefficient_fallback_soln(aom_equation_system_t * eqns)204 static void set_chroma_coefficient_fallback_soln(aom_equation_system_t *eqns) {
205 const double kTolerance = 1e-6;
206 const int last = eqns->n - 1;
207 // Set all of the AR coefficients to zero, but try to solve for correlation
208 // with the luma channel
209 memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
210 if (fabs(eqns->A[last * eqns->n + last]) > kTolerance) {
211 eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
212 }
213 }
214
aom_noise_strength_lut_init(aom_noise_strength_lut_t * lut,int num_points)215 int aom_noise_strength_lut_init(aom_noise_strength_lut_t *lut, int num_points) {
216 if (!lut) return 0;
217 lut->num_points = 0;
218 lut->points = (double(*)[2])aom_malloc(num_points * sizeof(*lut->points));
219 if (!lut->points) return 0;
220 lut->num_points = num_points;
221 memset(lut->points, 0, sizeof(*lut->points) * num_points);
222 return 1;
223 }
224
aom_noise_strength_lut_free(aom_noise_strength_lut_t * lut)225 void aom_noise_strength_lut_free(aom_noise_strength_lut_t *lut) {
226 if (!lut) return;
227 aom_free(lut->points);
228 memset(lut, 0, sizeof(*lut));
229 }
230
aom_noise_strength_lut_eval(const aom_noise_strength_lut_t * lut,double x)231 double aom_noise_strength_lut_eval(const aom_noise_strength_lut_t *lut,
232 double x) {
233 int i = 0;
234 // Constant extrapolation for x < x_0.
235 if (x < lut->points[0][0]) return lut->points[0][1];
236 for (i = 0; i < lut->num_points - 1; ++i) {
237 if (x >= lut->points[i][0] && x <= lut->points[i + 1][0]) {
238 const double a =
239 (x - lut->points[i][0]) / (lut->points[i + 1][0] - lut->points[i][0]);
240 return lut->points[i + 1][1] * a + lut->points[i][1] * (1.0 - a);
241 }
242 }
243 // Constant extrapolation for x > x_{n-1}
244 return lut->points[lut->num_points - 1][1];
245 }
246
noise_strength_solver_get_bin_index(const aom_noise_strength_solver_t * solver,double value)247 static double noise_strength_solver_get_bin_index(
248 const aom_noise_strength_solver_t *solver, double value) {
249 const double val =
250 fclamp(value, solver->min_intensity, solver->max_intensity);
251 const double range = solver->max_intensity - solver->min_intensity;
252 return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
253 }
254
noise_strength_solver_get_value(const aom_noise_strength_solver_t * solver,double x)255 static double noise_strength_solver_get_value(
256 const aom_noise_strength_solver_t *solver, double x) {
257 const double bin = noise_strength_solver_get_bin_index(solver, x);
258 const int bin_i0 = (int)floor(bin);
259 const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
260 const double a = bin - bin_i0;
261 return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
262 }
263
aom_noise_strength_solver_add_measurement(aom_noise_strength_solver_t * solver,double block_mean,double noise_std)264 void aom_noise_strength_solver_add_measurement(
265 aom_noise_strength_solver_t *solver, double block_mean, double noise_std) {
266 const double bin = noise_strength_solver_get_bin_index(solver, block_mean);
267 const int bin_i0 = (int)floor(bin);
268 const int bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
269 const double a = bin - bin_i0;
270 const int n = solver->num_bins;
271 solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
272 solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
273 solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
274 solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
275 solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
276 solver->eqns.b[bin_i1] += a * noise_std;
277 solver->total += noise_std;
278 solver->num_equations++;
279 }
280
aom_noise_strength_solver_solve(aom_noise_strength_solver_t * solver)281 int aom_noise_strength_solver_solve(aom_noise_strength_solver_t *solver) {
282 // Add regularization proportional to the number of constraints
283 const int n = solver->num_bins;
284 const double kAlpha = 2.0 * (double)(solver->num_equations) / n;
285 int result = 0;
286 double mean = 0;
287
288 // Do this in a non-destructive manner so it is not confusing to the caller
289 double *old_A = solver->eqns.A;
290 double *A = (double *)aom_malloc(sizeof(*A) * n * n);
291 if (!A) {
292 fprintf(stderr, "Unable to allocate copy of A\n");
293 return 0;
294 }
295 memcpy(A, old_A, sizeof(*A) * n * n);
296
297 for (int i = 0; i < n; ++i) {
298 const int i_lo = AOMMAX(0, i - 1);
299 const int i_hi = AOMMIN(n - 1, i + 1);
300 A[i * n + i_lo] -= kAlpha;
301 A[i * n + i] += 2 * kAlpha;
302 A[i * n + i_hi] -= kAlpha;
303 }
304
305 // Small regularization to give average noise strength
306 mean = solver->total / solver->num_equations;
307 for (int i = 0; i < n; ++i) {
308 A[i * n + i] += 1.0 / 8192.;
309 solver->eqns.b[i] += mean / 8192.;
310 }
311 solver->eqns.A = A;
312 result = equation_system_solve(&solver->eqns);
313 solver->eqns.A = old_A;
314
315 aom_free(A);
316 return result;
317 }
318
aom_noise_strength_solver_init(aom_noise_strength_solver_t * solver,int num_bins,int bit_depth)319 int aom_noise_strength_solver_init(aom_noise_strength_solver_t *solver,
320 int num_bins, int bit_depth) {
321 if (!solver) return 0;
322 memset(solver, 0, sizeof(*solver));
323 solver->num_bins = num_bins;
324 solver->min_intensity = 0;
325 solver->max_intensity = (1 << bit_depth) - 1;
326 solver->total = 0;
327 solver->num_equations = 0;
328 return equation_system_init(&solver->eqns, num_bins);
329 }
330
aom_noise_strength_solver_free(aom_noise_strength_solver_t * solver)331 void aom_noise_strength_solver_free(aom_noise_strength_solver_t *solver) {
332 if (!solver) return;
333 equation_system_free(&solver->eqns);
334 }
335
aom_noise_strength_solver_get_center(const aom_noise_strength_solver_t * solver,int i)336 double aom_noise_strength_solver_get_center(
337 const aom_noise_strength_solver_t *solver, int i) {
338 const double range = solver->max_intensity - solver->min_intensity;
339 const int n = solver->num_bins;
340 return ((double)i) / (n - 1) * range + solver->min_intensity;
341 }
342
343 // Computes the residual if a point were to be removed from the lut. This is
344 // calculated as the area between the output of the solver and the line segment
345 // that would be formed between [x_{i - 1}, x_{i + 1}).
update_piecewise_linear_residual(const aom_noise_strength_solver_t * solver,const aom_noise_strength_lut_t * lut,double * residual,int start,int end)346 static void update_piecewise_linear_residual(
347 const aom_noise_strength_solver_t *solver,
348 const aom_noise_strength_lut_t *lut, double *residual, int start, int end) {
349 const double dx = 255. / solver->num_bins;
350 for (int i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
351 const int lower = AOMMAX(0, (int)floor(noise_strength_solver_get_bin_index(
352 solver, lut->points[i - 1][0])));
353 const int upper = AOMMIN(solver->num_bins - 1,
354 (int)ceil(noise_strength_solver_get_bin_index(
355 solver, lut->points[i + 1][0])));
356 double r = 0;
357 for (int j = lower; j <= upper; ++j) {
358 const double x = aom_noise_strength_solver_get_center(solver, j);
359 if (x < lut->points[i - 1][0]) continue;
360 if (x >= lut->points[i + 1][0]) continue;
361 const double y = solver->eqns.x[j];
362 const double a = (x - lut->points[i - 1][0]) /
363 (lut->points[i + 1][0] - lut->points[i - 1][0]);
364 const double estimate_y =
365 lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
366 r += fabs(y - estimate_y);
367 }
368 residual[i] = r * dx;
369 }
370 }
371
aom_noise_strength_solver_fit_piecewise(const aom_noise_strength_solver_t * solver,int max_output_points,aom_noise_strength_lut_t * lut)372 int aom_noise_strength_solver_fit_piecewise(
373 const aom_noise_strength_solver_t *solver, int max_output_points,
374 aom_noise_strength_lut_t *lut) {
375 // The tolerance is normalized to be give consistent results between
376 // different bit-depths.
377 const double kTolerance = solver->max_intensity * 0.00625 / 255.0;
378 if (!aom_noise_strength_lut_init(lut, solver->num_bins)) {
379 fprintf(stderr, "Failed to init lut\n");
380 return 0;
381 }
382 for (int i = 0; i < solver->num_bins; ++i) {
383 lut->points[i][0] = aom_noise_strength_solver_get_center(solver, i);
384 lut->points[i][1] = solver->eqns.x[i];
385 }
386 if (max_output_points < 0) {
387 max_output_points = solver->num_bins;
388 }
389
390 double *residual = aom_malloc(solver->num_bins * sizeof(*residual));
391 memset(residual, 0, sizeof(*residual) * solver->num_bins);
392
393 update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
394
395 // Greedily remove points if there are too many or if it doesn't hurt local
396 // approximation (never remove the end points)
397 while (lut->num_points > 2) {
398 int min_index = 1;
399 for (int j = 1; j < lut->num_points - 1; ++j) {
400 if (residual[j] < residual[min_index]) {
401 min_index = j;
402 }
403 }
404 const double dx =
405 lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
406 const double avg_residual = residual[min_index] / dx;
407 if (lut->num_points <= max_output_points && avg_residual > kTolerance) {
408 break;
409 }
410
411 const int num_remaining = lut->num_points - min_index - 1;
412 memmove(lut->points + min_index, lut->points + min_index + 1,
413 sizeof(lut->points[0]) * num_remaining);
414 lut->num_points--;
415
416 update_piecewise_linear_residual(solver, lut, residual, min_index - 1,
417 min_index + 1);
418 }
419 aom_free(residual);
420 return 1;
421 }
422
aom_flat_block_finder_init(aom_flat_block_finder_t * block_finder,int block_size,int bit_depth,int use_highbd)423 int aom_flat_block_finder_init(aom_flat_block_finder_t *block_finder,
424 int block_size, int bit_depth, int use_highbd) {
425 const int n = block_size * block_size;
426 aom_equation_system_t eqns;
427 double *AtA_inv = 0;
428 double *A = 0;
429 int x = 0, y = 0, i = 0, j = 0;
430 block_finder->A = NULL;
431 block_finder->AtA_inv = NULL;
432
433 if (!equation_system_init(&eqns, kLowPolyNumParams)) {
434 fprintf(stderr, "Failed to init equation system for block_size=%d\n",
435 block_size);
436 return 0;
437 }
438
439 AtA_inv = (double *)aom_malloc(kLowPolyNumParams * kLowPolyNumParams *
440 sizeof(*AtA_inv));
441 A = (double *)aom_malloc(kLowPolyNumParams * n * sizeof(*A));
442 if (AtA_inv == NULL || A == NULL) {
443 fprintf(stderr, "Failed to alloc A or AtA_inv for block_size=%d\n",
444 block_size);
445 aom_free(AtA_inv);
446 aom_free(A);
447 equation_system_free(&eqns);
448 return 0;
449 }
450
451 block_finder->A = A;
452 block_finder->AtA_inv = AtA_inv;
453 block_finder->block_size = block_size;
454 block_finder->normalization = (1 << bit_depth) - 1;
455 block_finder->use_highbd = use_highbd;
456
457 for (y = 0; y < block_size; ++y) {
458 const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
459 for (x = 0; x < block_size; ++x) {
460 const double xd = ((double)x - block_size / 2.) / (block_size / 2.);
461 const double coords[3] = { yd, xd, 1 };
462 const int row = y * block_size + x;
463 A[kLowPolyNumParams * row + 0] = yd;
464 A[kLowPolyNumParams * row + 1] = xd;
465 A[kLowPolyNumParams * row + 2] = 1;
466
467 for (i = 0; i < kLowPolyNumParams; ++i) {
468 for (j = 0; j < kLowPolyNumParams; ++j) {
469 eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
470 }
471 }
472 }
473 }
474
475 // Lazy inverse using existing equation solver.
476 for (i = 0; i < kLowPolyNumParams; ++i) {
477 memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
478 eqns.b[i] = 1;
479 equation_system_solve(&eqns);
480
481 for (j = 0; j < kLowPolyNumParams; ++j) {
482 AtA_inv[j * kLowPolyNumParams + i] = eqns.x[j];
483 }
484 }
485 equation_system_free(&eqns);
486 return 1;
487 }
488
aom_flat_block_finder_free(aom_flat_block_finder_t * block_finder)489 void aom_flat_block_finder_free(aom_flat_block_finder_t *block_finder) {
490 if (!block_finder) return;
491 aom_free(block_finder->A);
492 aom_free(block_finder->AtA_inv);
493 memset(block_finder, 0, sizeof(*block_finder));
494 }
495
aom_flat_block_finder_extract_block(const aom_flat_block_finder_t * block_finder,const uint8_t * const data,int w,int h,int stride,int offsx,int offsy,double * plane,double * block)496 void aom_flat_block_finder_extract_block(
497 const aom_flat_block_finder_t *block_finder, const uint8_t *const data,
498 int w, int h, int stride, int offsx, int offsy, double *plane,
499 double *block) {
500 const int block_size = block_finder->block_size;
501 const int n = block_size * block_size;
502 const double *A = block_finder->A;
503 const double *AtA_inv = block_finder->AtA_inv;
504 double plane_coords[kLowPolyNumParams];
505 double AtA_inv_b[kLowPolyNumParams];
506 int xi, yi, i;
507
508 if (block_finder->use_highbd) {
509 const uint16_t *const data16 = (const uint16_t *const)data;
510 for (yi = 0; yi < block_size; ++yi) {
511 const int y = clamp(offsy + yi, 0, h - 1);
512 for (xi = 0; xi < block_size; ++xi) {
513 const int x = clamp(offsx + xi, 0, w - 1);
514 block[yi * block_size + xi] =
515 ((double)data16[y * stride + x]) / block_finder->normalization;
516 }
517 }
518 } else {
519 for (yi = 0; yi < block_size; ++yi) {
520 const int y = clamp(offsy + yi, 0, h - 1);
521 for (xi = 0; xi < block_size; ++xi) {
522 const int x = clamp(offsx + xi, 0, w - 1);
523 block[yi * block_size + xi] =
524 ((double)data[y * stride + x]) / block_finder->normalization;
525 }
526 }
527 }
528 multiply_mat(block, A, AtA_inv_b, 1, n, kLowPolyNumParams);
529 multiply_mat(AtA_inv, AtA_inv_b, plane_coords, kLowPolyNumParams,
530 kLowPolyNumParams, 1);
531 multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
532
533 for (i = 0; i < n; ++i) {
534 block[i] -= plane[i];
535 }
536 }
537
538 typedef struct {
539 int index;
540 float score;
541 } index_and_score_t;
542
compare_scores(const void * a,const void * b)543 static int compare_scores(const void *a, const void *b) {
544 const float diff =
545 ((index_and_score_t *)a)->score - ((index_and_score_t *)b)->score;
546 if (diff < 0)
547 return -1;
548 else if (diff > 0)
549 return 1;
550 return 0;
551 }
552
aom_flat_block_finder_run(const aom_flat_block_finder_t * block_finder,const uint8_t * const data,int w,int h,int stride,uint8_t * flat_blocks)553 int aom_flat_block_finder_run(const aom_flat_block_finder_t *block_finder,
554 const uint8_t *const data, int w, int h,
555 int stride, uint8_t *flat_blocks) {
556 // The gradient-based features used in this code are based on:
557 // A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
558 // correlation for improved video denoising," 2012 19th, ICIP.
559 // The thresholds are more lenient to allow for correct grain modeling
560 // if extreme cases.
561 const int block_size = block_finder->block_size;
562 const int n = block_size * block_size;
563 const double kTraceThreshold = 0.15 / (32 * 32);
564 const double kRatioThreshold = 1.25;
565 const double kNormThreshold = 0.08 / (32 * 32);
566 const double kVarThreshold = 0.005 / (double)n;
567 const int num_blocks_w = (w + block_size - 1) / block_size;
568 const int num_blocks_h = (h + block_size - 1) / block_size;
569 int num_flat = 0;
570 int bx = 0, by = 0;
571 double *plane = (double *)aom_malloc(n * sizeof(*plane));
572 double *block = (double *)aom_malloc(n * sizeof(*block));
573 index_and_score_t *scores = (index_and_score_t *)aom_malloc(
574 num_blocks_w * num_blocks_h * sizeof(*scores));
575 if (plane == NULL || block == NULL || scores == NULL) {
576 fprintf(stderr, "Failed to allocate memory for block of size %d\n", n);
577 aom_free(plane);
578 aom_free(block);
579 aom_free(scores);
580 return -1;
581 }
582
583 #ifdef NOISE_MODEL_LOG_SCORE
584 fprintf(stderr, "score = [");
585 #endif
586 for (by = 0; by < num_blocks_h; ++by) {
587 for (bx = 0; bx < num_blocks_w; ++bx) {
588 // Compute gradient covariance matrix.
589 double Gxx = 0, Gxy = 0, Gyy = 0;
590 double var = 0;
591 double mean = 0;
592 int xi, yi;
593 aom_flat_block_finder_extract_block(block_finder, data, w, h, stride,
594 bx * block_size, by * block_size,
595 plane, block);
596
597 for (yi = 1; yi < block_size - 1; ++yi) {
598 for (xi = 1; xi < block_size - 1; ++xi) {
599 const double gx = (block[yi * block_size + xi + 1] -
600 block[yi * block_size + xi - 1]) /
601 2;
602 const double gy = (block[yi * block_size + xi + block_size] -
603 block[yi * block_size + xi - block_size]) /
604 2;
605 Gxx += gx * gx;
606 Gxy += gx * gy;
607 Gyy += gy * gy;
608
609 mean += block[yi * block_size + xi];
610 var += block[yi * block_size + xi] * block[yi * block_size + xi];
611 }
612 }
613 mean /= (block_size - 2) * (block_size - 2);
614
615 // Normalize gradients by block_size.
616 Gxx /= ((block_size - 2) * (block_size - 2));
617 Gxy /= ((block_size - 2) * (block_size - 2));
618 Gyy /= ((block_size - 2) * (block_size - 2));
619 var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
620
621 {
622 const double trace = Gxx + Gyy;
623 const double det = Gxx * Gyy - Gxy * Gxy;
624 const double e1 = (trace + sqrt(trace * trace - 4 * det)) / 2.;
625 const double e2 = (trace - sqrt(trace * trace - 4 * det)) / 2.;
626 const double norm = e1; // Spectral norm
627 const double ratio = (e1 / AOMMAX(e2, 1e-6));
628 const int is_flat = (trace < kTraceThreshold) &&
629 (ratio < kRatioThreshold) &&
630 (norm < kNormThreshold) && (var > kVarThreshold);
631 // The following weights are used to combine the above features to give
632 // a sigmoid score for flatness. If the input was normalized to [0,100]
633 // the magnitude of these values would be close to 1 (e.g., weights
634 // corresponding to variance would be a factor of 10000x smaller).
635 // The weights are given in the following order:
636 // [{var}, {ratio}, {trace}, {norm}, offset]
637 // with one of the most discriminative being simply the variance.
638 const double weights[5] = { -6682, -0.2056, 13087, -12434, 2.5694 };
639 double sum_weights = weights[0] * var + weights[1] * ratio +
640 weights[2] * trace + weights[3] * norm +
641 weights[4];
642 // clamp the value to [-25.0, 100.0] to prevent overflow
643 sum_weights = fclamp(sum_weights, -25.0, 100.0);
644 const float score = (float)(1.0 / (1 + exp(-sum_weights)));
645 flat_blocks[by * num_blocks_w + bx] = is_flat ? 255 : 0;
646 scores[by * num_blocks_w + bx].score = var > kVarThreshold ? score : 0;
647 scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
648 #ifdef NOISE_MODEL_LOG_SCORE
649 fprintf(stderr, "%g %g %g %g %g %d ", score, var, ratio, trace, norm,
650 is_flat);
651 #endif
652 num_flat += is_flat;
653 }
654 }
655 #ifdef NOISE_MODEL_LOG_SCORE
656 fprintf(stderr, "\n");
657 #endif
658 }
659 #ifdef NOISE_MODEL_LOG_SCORE
660 fprintf(stderr, "];\n");
661 #endif
662 // Find the top-scored blocks (most likely to be flat) and set the flat blocks
663 // be the union of the thresholded results and the top 10th percentile of the
664 // scored results.
665 qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
666 const int top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
667 const float score_threshold = scores[top_nth_percentile].score;
668 for (int i = 0; i < num_blocks_w * num_blocks_h; ++i) {
669 if (scores[i].score >= score_threshold) {
670 num_flat += flat_blocks[scores[i].index] == 0;
671 flat_blocks[scores[i].index] |= 1;
672 }
673 }
674 aom_free(block);
675 aom_free(plane);
676 aom_free(scores);
677 return num_flat;
678 }
679
aom_noise_model_init(aom_noise_model_t * model,const aom_noise_model_params_t params)680 int aom_noise_model_init(aom_noise_model_t *model,
681 const aom_noise_model_params_t params) {
682 const int n = num_coeffs(params);
683 const int lag = params.lag;
684 const int bit_depth = params.bit_depth;
685 int x = 0, y = 0, i = 0, c = 0;
686
687 memset(model, 0, sizeof(*model));
688 if (params.lag < 1) {
689 fprintf(stderr, "Invalid noise param: lag = %d must be >= 1\n", params.lag);
690 return 0;
691 }
692 if (params.lag > kMaxLag) {
693 fprintf(stderr, "Invalid noise param: lag = %d must be <= %d\n", params.lag,
694 kMaxLag);
695 return 0;
696 }
697
698 memcpy(&model->params, ¶ms, sizeof(params));
699 for (c = 0; c < 3; ++c) {
700 if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
701 fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
702 aom_noise_model_free(model);
703 return 0;
704 }
705 if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
706 fprintf(stderr, "Failed to allocate noise state for channel %d\n", c);
707 aom_noise_model_free(model);
708 return 0;
709 }
710 }
711 model->n = n;
712 model->coords = (int(*)[2])aom_malloc(sizeof(*model->coords) * n);
713
714 for (y = -lag; y <= 0; ++y) {
715 const int max_x = y == 0 ? -1 : lag;
716 for (x = -lag; x <= max_x; ++x) {
717 switch (params.shape) {
718 case AOM_NOISE_SHAPE_DIAMOND:
719 if (abs(x) <= y + lag) {
720 model->coords[i][0] = x;
721 model->coords[i][1] = y;
722 ++i;
723 }
724 break;
725 case AOM_NOISE_SHAPE_SQUARE:
726 model->coords[i][0] = x;
727 model->coords[i][1] = y;
728 ++i;
729 break;
730 default:
731 fprintf(stderr, "Invalid shape\n");
732 aom_noise_model_free(model);
733 return 0;
734 }
735 }
736 }
737 assert(i == n);
738 return 1;
739 }
740
aom_noise_model_free(aom_noise_model_t * model)741 void aom_noise_model_free(aom_noise_model_t *model) {
742 int c = 0;
743 if (!model) return;
744
745 aom_free(model->coords);
746 for (c = 0; c < 3; ++c) {
747 equation_system_free(&model->latest_state[c].eqns);
748 equation_system_free(&model->combined_state[c].eqns);
749
750 equation_system_free(&model->latest_state[c].strength_solver.eqns);
751 equation_system_free(&model->combined_state[c].strength_solver.eqns);
752 }
753 memset(model, 0, sizeof(*model));
754 }
755
756 // Extracts the neighborhood defined by coords around point (x, y) from
757 // the difference between the data and denoised images. Also extracts the
758 // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
759 #define EXTRACT_AR_ROW(INT_TYPE, suffix) \
760 static double extract_ar_row_##suffix( \
761 int(*coords)[2], int num_coords, const INT_TYPE *const data, \
762 const INT_TYPE *const denoised, int stride, int sub_log2[2], \
763 const INT_TYPE *const alt_data, const INT_TYPE *const alt_denoised, \
764 int alt_stride, int x, int y, double *buffer) { \
765 for (int i = 0; i < num_coords; ++i) { \
766 const int x_i = x + coords[i][0], y_i = y + coords[i][1]; \
767 buffer[i] = \
768 (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
769 } \
770 const double val = \
771 (double)data[y * stride + x] - denoised[y * stride + x]; \
772 \
773 if (alt_data && alt_denoised) { \
774 double avg_data = 0, avg_denoised = 0; \
775 int num_samples = 0; \
776 for (int dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) { \
777 const int y_up = (y << sub_log2[1]) + dy_i; \
778 for (int dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) { \
779 const int x_up = (x << sub_log2[0]) + dx_i; \
780 avg_data += alt_data[y_up * alt_stride + x_up]; \
781 avg_denoised += alt_denoised[y_up * alt_stride + x_up]; \
782 num_samples++; \
783 } \
784 } \
785 buffer[num_coords] = (avg_data - avg_denoised) / num_samples; \
786 } \
787 return val; \
788 }
789
790 EXTRACT_AR_ROW(uint8_t, lowbd);
791 EXTRACT_AR_ROW(uint16_t, highbd);
792
add_block_observations(aom_noise_model_t * noise_model,int c,const uint8_t * const data,const uint8_t * const denoised,int w,int h,int stride,int sub_log2[2],const uint8_t * const alt_data,const uint8_t * const alt_denoised,int alt_stride,const uint8_t * const flat_blocks,int block_size,int num_blocks_w,int num_blocks_h)793 static int add_block_observations(
794 aom_noise_model_t *noise_model, int c, const uint8_t *const data,
795 const uint8_t *const denoised, int w, int h, int stride, int sub_log2[2],
796 const uint8_t *const alt_data, const uint8_t *const alt_denoised,
797 int alt_stride, const uint8_t *const flat_blocks, int block_size,
798 int num_blocks_w, int num_blocks_h) {
799 const int lag = noise_model->params.lag;
800 const int num_coords = noise_model->n;
801 const double normalization = (1 << noise_model->params.bit_depth) - 1;
802 double *A = noise_model->latest_state[c].eqns.A;
803 double *b = noise_model->latest_state[c].eqns.b;
804 double *buffer = (double *)aom_malloc(sizeof(*buffer) * (num_coords + 1));
805 const int n = noise_model->latest_state[c].eqns.n;
806
807 if (!buffer) {
808 fprintf(stderr, "Unable to allocate buffer of size %d\n", num_coords + 1);
809 return 0;
810 }
811 for (int by = 0; by < num_blocks_h; ++by) {
812 const int y_o = by * (block_size >> sub_log2[1]);
813 for (int bx = 0; bx < num_blocks_w; ++bx) {
814 const int x_o = bx * (block_size >> sub_log2[0]);
815 if (!flat_blocks[by * num_blocks_w + bx]) {
816 continue;
817 }
818 int y_start =
819 (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
820 int x_start =
821 (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
822 int y_end = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
823 block_size >> sub_log2[1]);
824 int x_end = AOMMIN(
825 (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
826 (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
827 ? (block_size >> sub_log2[0])
828 : ((block_size >> sub_log2[0]) - lag));
829 for (int y = y_start; y < y_end; ++y) {
830 for (int x = x_start; x < x_end; ++x) {
831 const double val =
832 noise_model->params.use_highbd
833 ? extract_ar_row_highbd(noise_model->coords, num_coords,
834 (const uint16_t *const)data,
835 (const uint16_t *const)denoised,
836 stride, sub_log2,
837 (const uint16_t *const)alt_data,
838 (const uint16_t *const)alt_denoised,
839 alt_stride, x + x_o, y + y_o, buffer)
840 : extract_ar_row_lowbd(noise_model->coords, num_coords, data,
841 denoised, stride, sub_log2, alt_data,
842 alt_denoised, alt_stride, x + x_o,
843 y + y_o, buffer);
844 for (int i = 0; i < n; ++i) {
845 for (int j = 0; j < n; ++j) {
846 A[i * n + j] +=
847 (buffer[i] * buffer[j]) / (normalization * normalization);
848 }
849 b[i] += (buffer[i] * val) / (normalization * normalization);
850 }
851 noise_model->latest_state[c].num_observations++;
852 }
853 }
854 }
855 }
856 aom_free(buffer);
857 return 1;
858 }
859
add_noise_std_observations(aom_noise_model_t * noise_model,int c,const double * coeffs,const uint8_t * const data,const uint8_t * const denoised,int w,int h,int stride,int sub_log2[2],const uint8_t * const alt_data,int alt_stride,const uint8_t * const flat_blocks,int block_size,int num_blocks_w,int num_blocks_h)860 static void add_noise_std_observations(
861 aom_noise_model_t *noise_model, int c, const double *coeffs,
862 const uint8_t *const data, const uint8_t *const denoised, int w, int h,
863 int stride, int sub_log2[2], const uint8_t *const alt_data, int alt_stride,
864 const uint8_t *const flat_blocks, int block_size, int num_blocks_w,
865 int num_blocks_h) {
866 const int num_coords = noise_model->n;
867 aom_noise_strength_solver_t *noise_strength_solver =
868 &noise_model->latest_state[c].strength_solver;
869
870 const aom_noise_strength_solver_t *noise_strength_luma =
871 &noise_model->latest_state[0].strength_solver;
872 const double luma_gain = noise_model->latest_state[0].ar_gain;
873 const double noise_gain = noise_model->latest_state[c].ar_gain;
874 for (int by = 0; by < num_blocks_h; ++by) {
875 const int y_o = by * (block_size >> sub_log2[1]);
876 for (int bx = 0; bx < num_blocks_w; ++bx) {
877 const int x_o = bx * (block_size >> sub_log2[0]);
878 if (!flat_blocks[by * num_blocks_w + bx]) {
879 continue;
880 }
881 const int num_samples_h =
882 AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
883 block_size >> sub_log2[1]);
884 const int num_samples_w =
885 AOMMIN((w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]),
886 (block_size >> sub_log2[0]));
887 // Make sure that we have a reasonable amount of samples to consider the
888 // block
889 if (num_samples_w * num_samples_h > block_size) {
890 const double block_mean = get_block_mean(
891 alt_data ? alt_data : data, w, h, alt_data ? alt_stride : stride,
892 x_o << sub_log2[0], y_o << sub_log2[1], block_size,
893 noise_model->params.use_highbd);
894 const double noise_var = get_noise_var(
895 data, denoised, stride, w >> sub_log2[0], h >> sub_log2[1], x_o,
896 y_o, block_size >> sub_log2[0], block_size >> sub_log2[1],
897 noise_model->params.use_highbd);
898 // We want to remove the part of the noise that came from being
899 // correlated with luma. Note that the noise solver for luma must
900 // have already been run.
901 const double luma_strength =
902 c > 0 ? luma_gain * noise_strength_solver_get_value(
903 noise_strength_luma, block_mean)
904 : 0;
905 const double corr = c > 0 ? coeffs[num_coords] : 0;
906 // Chroma noise:
907 // N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
908 // The uncorrelated component:
909 // uncorr_var = noise_var - (corr * luma_strength)^2
910 // But don't allow fully correlated noise (hence the max), since the
911 // synthesis cannot model it.
912 const double uncorr_std = sqrt(
913 AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
914 // After we've removed correlation with luma, undo the gain that will
915 // come from running the IIR filter.
916 const double adjusted_strength = uncorr_std / noise_gain;
917 aom_noise_strength_solver_add_measurement(
918 noise_strength_solver, block_mean, adjusted_strength);
919 }
920 }
921 }
922 }
923
924 // Return true if the noise estimate appears to be different from the combined
925 // (multi-frame) estimate. The difference is measured by checking whether the
926 // AR coefficients have diverged (using a threshold on normalized cross
927 // correlation), or whether the noise strength has changed.
is_noise_model_different(aom_noise_model_t * const noise_model)928 static int is_noise_model_different(aom_noise_model_t *const noise_model) {
929 // These thresholds are kind of arbitrary and will likely need further tuning
930 // (or exported as parameters). The threshold on noise strength is a weighted
931 // difference between the noise strength histograms
932 const double kCoeffThreshold = 0.9;
933 const double kStrengthThreshold =
934 0.005 * (1 << (noise_model->params.bit_depth - 8));
935 for (int c = 0; c < 1; ++c) {
936 const double corr =
937 aom_normalized_cross_correlation(noise_model->latest_state[c].eqns.x,
938 noise_model->combined_state[c].eqns.x,
939 noise_model->combined_state[c].eqns.n);
940 if (corr < kCoeffThreshold) return 1;
941
942 const double dx =
943 1.0 / noise_model->latest_state[c].strength_solver.num_bins;
944
945 const aom_equation_system_t *latest_eqns =
946 &noise_model->latest_state[c].strength_solver.eqns;
947 const aom_equation_system_t *combined_eqns =
948 &noise_model->combined_state[c].strength_solver.eqns;
949 double diff = 0;
950 double total_weight = 0;
951 for (int j = 0; j < latest_eqns->n; ++j) {
952 double weight = 0;
953 for (int i = 0; i < latest_eqns->n; ++i) {
954 weight += latest_eqns->A[i * latest_eqns->n + j];
955 }
956 weight = sqrt(weight);
957 diff += weight * fabs(latest_eqns->x[j] - combined_eqns->x[j]);
958 total_weight += weight;
959 }
960 if (diff * dx / total_weight > kStrengthThreshold) return 1;
961 }
962 return 0;
963 }
964
ar_equation_system_solve(aom_noise_state_t * state,int is_chroma)965 static int ar_equation_system_solve(aom_noise_state_t *state, int is_chroma) {
966 const int ret = equation_system_solve(&state->eqns);
967 state->ar_gain = 1.0;
968 if (!ret) return ret;
969
970 // Update the AR gain from the equation system as it will be used to fit
971 // the noise strength as a function of intensity. In the Yule-Walker
972 // equations, the diagonal should be the variance of the correlated noise.
973 // In the case of the least squares estimate, there will be some variability
974 // in the diagonal. So use the mean of the diagonal as the estimate of
975 // overall variance (this works for least squares or Yule-Walker formulation).
976 double var = 0;
977 const int n = state->eqns.n;
978 for (int i = 0; i < (state->eqns.n - is_chroma); ++i) {
979 var += state->eqns.A[i * n + i] / state->num_observations;
980 }
981 var /= (n - is_chroma);
982
983 // Keep track of E(Y^2) = <b, x> + E(X^2)
984 // In the case that we are using chroma and have an estimate of correlation
985 // with luma we adjust that estimate slightly to remove the correlated bits by
986 // subtracting out the last column of a scaled by our correlation estimate
987 // from b. E(y^2) = <b - A(:, end)*x(end), x>
988 double sum_covar = 0;
989 for (int i = 0; i < state->eqns.n - is_chroma; ++i) {
990 double bi = state->eqns.b[i];
991 if (is_chroma) {
992 bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
993 }
994 sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
995 }
996 // Now, get an estimate of the variance of uncorrelated noise signal and use
997 // it to determine the gain of the AR filter.
998 const double noise_var = AOMMAX(var - sum_covar, 1e-6);
999 state->ar_gain = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
1000 return ret;
1001 }
1002
aom_noise_model_update(aom_noise_model_t * const noise_model,const uint8_t * const data[3],const uint8_t * const denoised[3],int w,int h,int stride[3],int chroma_sub_log2[2],const uint8_t * const flat_blocks,int block_size)1003 aom_noise_status_t aom_noise_model_update(
1004 aom_noise_model_t *const noise_model, const uint8_t *const data[3],
1005 const uint8_t *const denoised[3], int w, int h, int stride[3],
1006 int chroma_sub_log2[2], const uint8_t *const flat_blocks, int block_size) {
1007 const int num_blocks_w = (w + block_size - 1) / block_size;
1008 const int num_blocks_h = (h + block_size - 1) / block_size;
1009 int y_model_different = 0;
1010 int num_blocks = 0;
1011 int i = 0, channel = 0;
1012
1013 if (block_size <= 1) {
1014 fprintf(stderr, "block_size = %d must be > 1\n", block_size);
1015 return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1016 }
1017
1018 if (block_size < noise_model->params.lag * 2 + 1) {
1019 fprintf(stderr, "block_size = %d must be >= %d\n", block_size,
1020 noise_model->params.lag * 2 + 1);
1021 return AOM_NOISE_STATUS_INVALID_ARGUMENT;
1022 }
1023
1024 // Clear the latest equation system
1025 for (i = 0; i < 3; ++i) {
1026 equation_system_clear(&noise_model->latest_state[i].eqns);
1027 noise_model->latest_state[i].num_observations = 0;
1028 noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
1029 }
1030
1031 // Check that we have enough flat blocks
1032 for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
1033 if (flat_blocks[i]) {
1034 num_blocks++;
1035 }
1036 }
1037
1038 if (num_blocks <= 1) {
1039 fprintf(stderr, "Not enough flat blocks to update noise estimate\n");
1040 return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
1041 }
1042
1043 for (channel = 0; channel < 3; ++channel) {
1044 int no_subsampling[2] = { 0, 0 };
1045 const uint8_t *alt_data = channel > 0 ? data[0] : 0;
1046 const uint8_t *alt_denoised = channel > 0 ? denoised[0] : 0;
1047 int *sub = channel > 0 ? chroma_sub_log2 : no_subsampling;
1048 const int is_chroma = channel != 0;
1049 if (!data[channel] || !denoised[channel]) break;
1050 if (!add_block_observations(noise_model, channel, data[channel],
1051 denoised[channel], w, h, stride[channel], sub,
1052 alt_data, alt_denoised, stride[0], flat_blocks,
1053 block_size, num_blocks_w, num_blocks_h)) {
1054 fprintf(stderr, "Adding block observation failed\n");
1055 return AOM_NOISE_STATUS_INTERNAL_ERROR;
1056 }
1057
1058 if (!ar_equation_system_solve(&noise_model->latest_state[channel],
1059 is_chroma)) {
1060 if (is_chroma) {
1061 set_chroma_coefficient_fallback_soln(
1062 &noise_model->latest_state[channel].eqns);
1063 } else {
1064 fprintf(stderr, "Solving latest noise equation system failed %d!\n",
1065 channel);
1066 return AOM_NOISE_STATUS_INTERNAL_ERROR;
1067 }
1068 }
1069
1070 add_noise_std_observations(
1071 noise_model, channel, noise_model->latest_state[channel].eqns.x,
1072 data[channel], denoised[channel], w, h, stride[channel], sub, alt_data,
1073 stride[0], flat_blocks, block_size, num_blocks_w, num_blocks_h);
1074
1075 if (!aom_noise_strength_solver_solve(
1076 &noise_model->latest_state[channel].strength_solver)) {
1077 fprintf(stderr, "Solving latest noise strength failed!\n");
1078 return AOM_NOISE_STATUS_INTERNAL_ERROR;
1079 }
1080
1081 // Check noise characteristics and return if error.
1082 if (channel == 0 &&
1083 noise_model->combined_state[channel].strength_solver.num_equations >
1084 0 &&
1085 is_noise_model_different(noise_model)) {
1086 y_model_different = 1;
1087 }
1088
1089 // Don't update the combined stats if the y model is different.
1090 if (y_model_different) continue;
1091
1092 noise_model->combined_state[channel].num_observations +=
1093 noise_model->latest_state[channel].num_observations;
1094 equation_system_add(&noise_model->combined_state[channel].eqns,
1095 &noise_model->latest_state[channel].eqns);
1096 if (!ar_equation_system_solve(&noise_model->combined_state[channel],
1097 is_chroma)) {
1098 if (is_chroma) {
1099 set_chroma_coefficient_fallback_soln(
1100 &noise_model->combined_state[channel].eqns);
1101 } else {
1102 fprintf(stderr, "Solving combined noise equation system failed %d!\n",
1103 channel);
1104 return AOM_NOISE_STATUS_INTERNAL_ERROR;
1105 }
1106 }
1107
1108 noise_strength_solver_add(
1109 &noise_model->combined_state[channel].strength_solver,
1110 &noise_model->latest_state[channel].strength_solver);
1111
1112 if (!aom_noise_strength_solver_solve(
1113 &noise_model->combined_state[channel].strength_solver)) {
1114 fprintf(stderr, "Solving combined noise strength failed!\n");
1115 return AOM_NOISE_STATUS_INTERNAL_ERROR;
1116 }
1117 }
1118
1119 return y_model_different ? AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE
1120 : AOM_NOISE_STATUS_OK;
1121 }
1122
aom_noise_model_save_latest(aom_noise_model_t * noise_model)1123 void aom_noise_model_save_latest(aom_noise_model_t *noise_model) {
1124 for (int c = 0; c < 3; c++) {
1125 equation_system_copy(&noise_model->combined_state[c].eqns,
1126 &noise_model->latest_state[c].eqns);
1127 equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
1128 &noise_model->latest_state[c].strength_solver.eqns);
1129 noise_model->combined_state[c].strength_solver.num_equations =
1130 noise_model->latest_state[c].strength_solver.num_equations;
1131 noise_model->combined_state[c].num_observations =
1132 noise_model->latest_state[c].num_observations;
1133 noise_model->combined_state[c].ar_gain =
1134 noise_model->latest_state[c].ar_gain;
1135 }
1136 }
1137
aom_noise_model_get_grain_parameters(aom_noise_model_t * const noise_model,aom_film_grain_t * film_grain)1138 int aom_noise_model_get_grain_parameters(aom_noise_model_t *const noise_model,
1139 aom_film_grain_t *film_grain) {
1140 if (noise_model->params.lag > 3) {
1141 fprintf(stderr, "params.lag = %d > 3\n", noise_model->params.lag);
1142 return 0;
1143 }
1144 uint16_t random_seed = film_grain->random_seed;
1145 memset(film_grain, 0, sizeof(*film_grain));
1146 film_grain->random_seed = random_seed;
1147
1148 film_grain->apply_grain = 1;
1149 film_grain->update_parameters = 1;
1150
1151 film_grain->ar_coeff_lag = noise_model->params.lag;
1152
1153 // Convert the scaling functions to 8 bit values
1154 aom_noise_strength_lut_t scaling_points[3];
1155 aom_noise_strength_solver_fit_piecewise(
1156 &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0);
1157 aom_noise_strength_solver_fit_piecewise(
1158 &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1);
1159 aom_noise_strength_solver_fit_piecewise(
1160 &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2);
1161
1162 // Both the domain and the range of the scaling functions in the film_grain
1163 // are normalized to 8-bit (e.g., they are implicitly scaled during grain
1164 // synthesis).
1165 const double strength_divisor = 1 << (noise_model->params.bit_depth - 8);
1166 double max_scaling_value = 1e-4;
1167 for (int c = 0; c < 3; ++c) {
1168 for (int i = 0; i < scaling_points[c].num_points; ++i) {
1169 scaling_points[c].points[i][0] =
1170 AOMMIN(255, scaling_points[c].points[i][0] / strength_divisor);
1171 scaling_points[c].points[i][1] =
1172 AOMMIN(255, scaling_points[c].points[i][1] / strength_divisor);
1173 max_scaling_value =
1174 AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
1175 }
1176 }
1177
1178 // Scaling_shift values are in the range [8,11]
1179 const int max_scaling_value_log2 =
1180 clamp((int)floor(log2(max_scaling_value) + 1), 2, 5);
1181 film_grain->scaling_shift = 5 + (8 - max_scaling_value_log2);
1182
1183 const double scale_factor = 1 << (8 - max_scaling_value_log2);
1184 film_grain->num_y_points = scaling_points[0].num_points;
1185 film_grain->num_cb_points = scaling_points[1].num_points;
1186 film_grain->num_cr_points = scaling_points[2].num_points;
1187
1188 int(*film_grain_scaling[3])[2] = {
1189 film_grain->scaling_points_y,
1190 film_grain->scaling_points_cb,
1191 film_grain->scaling_points_cr,
1192 };
1193 for (int c = 0; c < 3; c++) {
1194 for (int i = 0; i < scaling_points[c].num_points; ++i) {
1195 film_grain_scaling[c][i][0] = (int)(scaling_points[c].points[i][0] + 0.5);
1196 film_grain_scaling[c][i][1] = clamp(
1197 (int)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
1198 }
1199 }
1200 aom_noise_strength_lut_free(scaling_points + 0);
1201 aom_noise_strength_lut_free(scaling_points + 1);
1202 aom_noise_strength_lut_free(scaling_points + 2);
1203
1204 // Convert the ar_coeffs into 8-bit values
1205 const int n_coeff = noise_model->combined_state[0].eqns.n;
1206 double max_coeff = 1e-4, min_coeff = -1e-4;
1207 double y_corr[2] = { 0, 0 };
1208 double avg_luma_strength = 0;
1209 for (int c = 0; c < 3; c++) {
1210 aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1211 for (int i = 0; i < n_coeff; ++i) {
1212 max_coeff = AOMMAX(max_coeff, eqns->x[i]);
1213 min_coeff = AOMMIN(min_coeff, eqns->x[i]);
1214 }
1215 // Since the correlation between luma/chroma was computed in an already
1216 // scaled space, we adjust it in the un-scaled space.
1217 aom_noise_strength_solver_t *solver =
1218 &noise_model->combined_state[c].strength_solver;
1219 // Compute a weighted average of the strength for the channel.
1220 double average_strength = 0, total_weight = 0;
1221 for (int i = 0; i < solver->eqns.n; ++i) {
1222 double w = 0;
1223 for (int j = 0; j < solver->eqns.n; ++j) {
1224 w += solver->eqns.A[i * solver->eqns.n + j];
1225 }
1226 w = sqrt(w);
1227 average_strength += solver->eqns.x[i] * w;
1228 total_weight += w;
1229 }
1230 if (total_weight == 0)
1231 average_strength = 1;
1232 else
1233 average_strength /= total_weight;
1234 if (c == 0) {
1235 avg_luma_strength = average_strength;
1236 } else {
1237 y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
1238 max_coeff = AOMMAX(max_coeff, y_corr[c - 1]);
1239 min_coeff = AOMMIN(min_coeff, y_corr[c - 1]);
1240 }
1241 }
1242 // Shift value: AR coeffs range (values 6-9)
1243 // 6: [-2, 2), 7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
1244 film_grain->ar_coeff_shift =
1245 clamp(7 - (int)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))),
1246 6, 9);
1247 double scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
1248 int *ar_coeffs[3] = {
1249 film_grain->ar_coeffs_y,
1250 film_grain->ar_coeffs_cb,
1251 film_grain->ar_coeffs_cr,
1252 };
1253 for (int c = 0; c < 3; ++c) {
1254 aom_equation_system_t *eqns = &noise_model->combined_state[c].eqns;
1255 for (int i = 0; i < n_coeff; ++i) {
1256 ar_coeffs[c][i] =
1257 clamp((int)round(scale_ar_coeff * eqns->x[i]), -128, 127);
1258 }
1259 if (c > 0) {
1260 ar_coeffs[c][n_coeff] =
1261 clamp((int)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
1262 }
1263 }
1264
1265 // At the moment, the noise modeling code assumes that the chroma scaling
1266 // functions are a function of luma.
1267 film_grain->cb_mult = 128; // 8 bits
1268 film_grain->cb_luma_mult = 192; // 8 bits
1269 film_grain->cb_offset = 256; // 9 bits
1270
1271 film_grain->cr_mult = 128; // 8 bits
1272 film_grain->cr_luma_mult = 192; // 8 bits
1273 film_grain->cr_offset = 256; // 9 bits
1274
1275 film_grain->chroma_scaling_from_luma = 0;
1276 film_grain->grain_scale_shift = 0;
1277 film_grain->overlap_flag = 1;
1278 return 1;
1279 }
1280
pointwise_multiply(const float * a,float * b,int n)1281 static void pointwise_multiply(const float *a, float *b, int n) {
1282 for (int i = 0; i < n; ++i) {
1283 b[i] *= a[i];
1284 }
1285 }
1286
get_half_cos_window(int block_size)1287 static float *get_half_cos_window(int block_size) {
1288 float *window_function =
1289 (float *)aom_malloc(block_size * block_size * sizeof(*window_function));
1290 for (int y = 0; y < block_size; ++y) {
1291 const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
1292 for (int x = 0; x < block_size; ++x) {
1293 const double cos_xd = cos((.5 + x) * PI / block_size - PI / 2);
1294 window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
1295 }
1296 }
1297 return window_function;
1298 }
1299
1300 #define DITHER_AND_QUANTIZE(INT_TYPE, suffix) \
1301 static void dither_and_quantize_##suffix( \
1302 float *result, int result_stride, INT_TYPE *denoised, int w, int h, \
1303 int stride, int chroma_sub_w, int chroma_sub_h, int block_size, \
1304 float block_normalization) { \
1305 for (int y = 0; y < (h >> chroma_sub_h); ++y) { \
1306 for (int x = 0; x < (w >> chroma_sub_w); ++x) { \
1307 const int result_idx = \
1308 (y + (block_size >> chroma_sub_h)) * result_stride + x + \
1309 (block_size >> chroma_sub_w); \
1310 INT_TYPE new_val = (INT_TYPE)AOMMIN( \
1311 AOMMAX(result[result_idx] * block_normalization + 0.5f, 0), \
1312 block_normalization); \
1313 const float err = \
1314 -(((float)new_val) / block_normalization - result[result_idx]); \
1315 denoised[y * stride + x] = new_val; \
1316 if (x + 1 < (w >> chroma_sub_w)) { \
1317 result[result_idx + 1] += err * 7.0f / 16.0f; \
1318 } \
1319 if (y + 1 < (h >> chroma_sub_h)) { \
1320 if (x > 0) { \
1321 result[result_idx + result_stride - 1] += err * 3.0f / 16.0f; \
1322 } \
1323 result[result_idx + result_stride] += err * 5.0f / 16.0f; \
1324 if (x + 1 < (w >> chroma_sub_w)) { \
1325 result[result_idx + result_stride + 1] += err * 1.0f / 16.0f; \
1326 } \
1327 } \
1328 } \
1329 } \
1330 }
1331
1332 DITHER_AND_QUANTIZE(uint8_t, lowbd);
1333 DITHER_AND_QUANTIZE(uint16_t, highbd);
1334
aom_wiener_denoise_2d(const uint8_t * const data[3],uint8_t * denoised[3],int w,int h,int stride[3],int chroma_sub[2],float * noise_psd[3],int block_size,int bit_depth,int use_highbd)1335 int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
1336 int w, int h, int stride[3], int chroma_sub[2],
1337 float *noise_psd[3], int block_size, int bit_depth,
1338 int use_highbd) {
1339 float *plane = NULL, *block = NULL, *window_full = NULL,
1340 *window_chroma = NULL;
1341 double *block_d = NULL, *plane_d = NULL;
1342 struct aom_noise_tx_t *tx_full = NULL;
1343 struct aom_noise_tx_t *tx_chroma = NULL;
1344 const int num_blocks_w = (w + block_size - 1) / block_size;
1345 const int num_blocks_h = (h + block_size - 1) / block_size;
1346 const int result_stride = (num_blocks_w + 2) * block_size;
1347 const int result_height = (num_blocks_h + 2) * block_size;
1348 float *result = NULL;
1349 int init_success = 1;
1350 aom_flat_block_finder_t block_finder_full;
1351 aom_flat_block_finder_t block_finder_chroma;
1352 const float kBlockNormalization = (float)((1 << bit_depth) - 1);
1353 if (chroma_sub[0] != chroma_sub[1]) {
1354 fprintf(stderr,
1355 "aom_wiener_denoise_2d doesn't handle different chroma "
1356 "subsampling");
1357 return 0;
1358 }
1359 init_success &= aom_flat_block_finder_init(&block_finder_full, block_size,
1360 bit_depth, use_highbd);
1361 result = (float *)aom_malloc((num_blocks_h + 2) * block_size * result_stride *
1362 sizeof(*result));
1363 plane = (float *)aom_malloc(block_size * block_size * sizeof(*plane));
1364 block =
1365 (float *)aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
1366 block_d = (double *)aom_malloc(block_size * block_size * sizeof(*block_d));
1367 plane_d = (double *)aom_malloc(block_size * block_size * sizeof(*plane_d));
1368 window_full = get_half_cos_window(block_size);
1369 tx_full = aom_noise_tx_malloc(block_size);
1370
1371 if (chroma_sub[0] != 0) {
1372 init_success &= aom_flat_block_finder_init(&block_finder_chroma,
1373 block_size >> chroma_sub[0],
1374 bit_depth, use_highbd);
1375 window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
1376 tx_chroma = aom_noise_tx_malloc(block_size >> chroma_sub[0]);
1377 } else {
1378 window_chroma = window_full;
1379 tx_chroma = tx_full;
1380 }
1381
1382 init_success &= (tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
1383 (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
1384 (window_full != NULL) && (window_chroma != NULL) &&
1385 (result != NULL);
1386 for (int c = init_success ? 0 : 3; c < 3; ++c) {
1387 float *window_function = c == 0 ? window_full : window_chroma;
1388 aom_flat_block_finder_t *block_finder = &block_finder_full;
1389 const int chroma_sub_h = c > 0 ? chroma_sub[1] : 0;
1390 const int chroma_sub_w = c > 0 ? chroma_sub[0] : 0;
1391 struct aom_noise_tx_t *tx =
1392 (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
1393 if (!data[c] || !denoised[c]) continue;
1394 if (c > 0 && chroma_sub[0] != 0) {
1395 block_finder = &block_finder_chroma;
1396 }
1397 memset(result, 0, sizeof(*result) * result_stride * result_height);
1398 // Do overlapped block processing (half overlapped). The block rows can
1399 // easily be done in parallel
1400 for (int offsy = 0; offsy < (block_size >> chroma_sub_h);
1401 offsy += (block_size >> chroma_sub_h) / 2) {
1402 for (int offsx = 0; offsx < (block_size >> chroma_sub_w);
1403 offsx += (block_size >> chroma_sub_w) / 2) {
1404 // Pad the boundary when processing each block-set.
1405 for (int by = -1; by < num_blocks_h; ++by) {
1406 for (int bx = -1; bx < num_blocks_w; ++bx) {
1407 const int pixels_per_block =
1408 (block_size >> chroma_sub_w) * (block_size >> chroma_sub_h);
1409 aom_flat_block_finder_extract_block(
1410 block_finder, data[c], w >> chroma_sub_w, h >> chroma_sub_h,
1411 stride[c], bx * (block_size >> chroma_sub_w) + offsx,
1412 by * (block_size >> chroma_sub_h) + offsy, plane_d, block_d);
1413 for (int j = 0; j < pixels_per_block; ++j) {
1414 block[j] = (float)block_d[j];
1415 plane[j] = (float)plane_d[j];
1416 }
1417 pointwise_multiply(window_function, block, pixels_per_block);
1418 aom_noise_tx_forward(tx, block);
1419 aom_noise_tx_filter(tx, noise_psd[c]);
1420 aom_noise_tx_inverse(tx, block);
1421
1422 // Apply window function to the plane approximation (we will apply
1423 // it to the sum of plane + block when composing the results).
1424 pointwise_multiply(window_function, plane, pixels_per_block);
1425
1426 for (int y = 0; y < (block_size >> chroma_sub_h); ++y) {
1427 const int y_result =
1428 y + (by + 1) * (block_size >> chroma_sub_h) + offsy;
1429 for (int x = 0; x < (block_size >> chroma_sub_w); ++x) {
1430 const int x_result =
1431 x + (bx + 1) * (block_size >> chroma_sub_w) + offsx;
1432 result[y_result * result_stride + x_result] +=
1433 (block[y * (block_size >> chroma_sub_w) + x] +
1434 plane[y * (block_size >> chroma_sub_w) + x]) *
1435 window_function[y * (block_size >> chroma_sub_w) + x];
1436 }
1437 }
1438 }
1439 }
1440 }
1441 }
1442 if (use_highbd) {
1443 dither_and_quantize_highbd(result, result_stride, (uint16_t *)denoised[c],
1444 w, h, stride[c], chroma_sub_w, chroma_sub_h,
1445 block_size, kBlockNormalization);
1446 } else {
1447 dither_and_quantize_lowbd(result, result_stride, denoised[c], w, h,
1448 stride[c], chroma_sub_w, chroma_sub_h,
1449 block_size, kBlockNormalization);
1450 }
1451 }
1452 aom_free(result);
1453 aom_free(plane);
1454 aom_free(block);
1455 aom_free(plane_d);
1456 aom_free(block_d);
1457 aom_free(window_full);
1458
1459 aom_noise_tx_free(tx_full);
1460
1461 aom_flat_block_finder_free(&block_finder_full);
1462 if (chroma_sub[0] != 0) {
1463 aom_flat_block_finder_free(&block_finder_chroma);
1464 aom_free(window_chroma);
1465 aom_noise_tx_free(tx_chroma);
1466 }
1467 return init_success;
1468 }
1469
1470 struct aom_denoise_and_model_t {
1471 int block_size;
1472 int bit_depth;
1473 float noise_level;
1474
1475 // Size of current denoised buffer and flat_block buffer
1476 int width;
1477 int height;
1478 int y_stride;
1479 int uv_stride;
1480 int num_blocks_w;
1481 int num_blocks_h;
1482
1483 // Buffers for image and noise_psd allocated on the fly
1484 float *noise_psd[3];
1485 uint8_t *denoised[3];
1486 uint8_t *flat_blocks;
1487
1488 aom_flat_block_finder_t flat_block_finder;
1489 aom_noise_model_t noise_model;
1490 };
1491
aom_denoise_and_model_alloc(int bit_depth,int block_size,float noise_level)1492 struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
1493 int block_size,
1494 float noise_level) {
1495 struct aom_denoise_and_model_t *ctx =
1496 (struct aom_denoise_and_model_t *)aom_malloc(
1497 sizeof(struct aom_denoise_and_model_t));
1498 if (!ctx) {
1499 fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
1500 return NULL;
1501 }
1502 memset(ctx, 0, sizeof(*ctx));
1503
1504 ctx->block_size = block_size;
1505 ctx->noise_level = noise_level;
1506 ctx->bit_depth = bit_depth;
1507
1508 ctx->noise_psd[0] =
1509 aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
1510 ctx->noise_psd[1] =
1511 aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
1512 ctx->noise_psd[2] =
1513 aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
1514 if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
1515 fprintf(stderr, "Unable to allocate noise PSD buffers\n");
1516 aom_denoise_and_model_free(ctx);
1517 return NULL;
1518 }
1519 return ctx;
1520 }
1521
aom_denoise_and_model_free(struct aom_denoise_and_model_t * ctx)1522 void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
1523 aom_free(ctx->flat_blocks);
1524 for (int i = 0; i < 3; ++i) {
1525 aom_free(ctx->denoised[i]);
1526 aom_free(ctx->noise_psd[i]);
1527 }
1528 aom_noise_model_free(&ctx->noise_model);
1529 aom_flat_block_finder_free(&ctx->flat_block_finder);
1530 aom_free(ctx);
1531 }
1532
denoise_and_model_realloc_if_necessary(struct aom_denoise_and_model_t * ctx,YV12_BUFFER_CONFIG * sd)1533 static int denoise_and_model_realloc_if_necessary(
1534 struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
1535 if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
1536 ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
1537 return 1;
1538 const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
1539 const int block_size = ctx->block_size;
1540
1541 ctx->width = sd->y_width;
1542 ctx->height = sd->y_height;
1543 ctx->y_stride = sd->y_stride;
1544 ctx->uv_stride = sd->uv_stride;
1545
1546 for (int i = 0; i < 3; ++i) {
1547 aom_free(ctx->denoised[i]);
1548 ctx->denoised[i] = NULL;
1549 }
1550 aom_free(ctx->flat_blocks);
1551 ctx->flat_blocks = NULL;
1552
1553 ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
1554 ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
1555 ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
1556 if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
1557 fprintf(stderr, "Unable to allocate denoise buffers\n");
1558 return 0;
1559 }
1560 ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
1561 ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
1562 ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
1563
1564 aom_flat_block_finder_free(&ctx->flat_block_finder);
1565 if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
1566 ctx->bit_depth, use_highbd)) {
1567 fprintf(stderr, "Unable to init flat block finder\n");
1568 return 0;
1569 }
1570
1571 const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
1572 ctx->bit_depth, use_highbd };
1573 aom_noise_model_free(&ctx->noise_model);
1574 if (!aom_noise_model_init(&ctx->noise_model, params)) {
1575 fprintf(stderr, "Unable to init noise model\n");
1576 return 0;
1577 }
1578
1579 // Simply use a flat PSD (although we could use the flat blocks to estimate
1580 // PSD) those to estimate an actual noise PSD)
1581 const float y_noise_level =
1582 aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
1583 const float uv_noise_level = aom_noise_psd_get_default_value(
1584 ctx->block_size >> sd->subsampling_x, ctx->noise_level);
1585 for (int i = 0; i < block_size * block_size; ++i) {
1586 ctx->noise_psd[0][i] = y_noise_level;
1587 ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
1588 }
1589 return 1;
1590 }
1591
aom_denoise_and_model_run(struct aom_denoise_and_model_t * ctx,YV12_BUFFER_CONFIG * sd,aom_film_grain_t * film_grain)1592 int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
1593 YV12_BUFFER_CONFIG *sd,
1594 aom_film_grain_t *film_grain) {
1595 const int block_size = ctx->block_size;
1596 const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
1597 uint8_t *raw_data[3] = {
1598 use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
1599 use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
1600 use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
1601 };
1602 const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
1603 int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
1604 int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
1605
1606 if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
1607 fprintf(stderr, "Unable to realloc buffers\n");
1608 return 0;
1609 }
1610
1611 aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
1612 sd->y_height, strides[0], ctx->flat_blocks);
1613
1614 if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
1615 strides, chroma_sub_log2, ctx->noise_psd,
1616 block_size, ctx->bit_depth, use_highbd)) {
1617 fprintf(stderr, "Unable to denoise image\n");
1618 return 0;
1619 }
1620
1621 const aom_noise_status_t status = aom_noise_model_update(
1622 &ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
1623 sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
1624 block_size);
1625 int have_noise_estimate = 0;
1626 if (status == AOM_NOISE_STATUS_OK) {
1627 have_noise_estimate = 1;
1628 } else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
1629 aom_noise_model_save_latest(&ctx->noise_model);
1630 have_noise_estimate = 1;
1631 } else {
1632 // Unable to update noise model; proceed if we have a previous estimate.
1633 have_noise_estimate =
1634 (ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
1635 }
1636
1637 film_grain->apply_grain = 0;
1638 if (have_noise_estimate) {
1639 if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
1640 fprintf(stderr, "Unable to get grain parameters.\n");
1641 return 0;
1642 }
1643 if (!film_grain->random_seed) {
1644 film_grain->random_seed = 7391;
1645 }
1646 memcpy(raw_data[0], ctx->denoised[0],
1647 (strides[0] * sd->y_height) << use_highbd);
1648 memcpy(raw_data[1], ctx->denoised[1],
1649 (strides[1] * sd->uv_height) << use_highbd);
1650 memcpy(raw_data[2], ctx->denoised[2],
1651 (strides[2] * sd->uv_height) << use_highbd);
1652 }
1653 return 1;
1654 }
1655