• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <string.h>
14 
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/aom_scale_rtcd.h"
17 
18 #include "aom/aom_integer.h"
19 #include "av1/common/av1_common_int.h"
20 #include "av1/common/reconinter.h"
21 #include "av1/encoder/encoder.h"
22 #include "av1/encoder/ethread.h"
23 #include "av1/encoder/pickcdef.h"
24 
25 // Get primary and secondary filter strength for the given strength index and
26 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)27 static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
28                                              int *pri_strength,
29                                              int *sec_strength,
30                                              int strength_idx) {
31   const int tot_sec_filter = (pick_method >= CDEF_FAST_SEARCH_LVL3)
32                                  ? REDUCED_SEC_STRENGTHS_LVL3
33                                  : CDEF_SEC_STRENGTHS;
34   const int pri_idx = strength_idx / tot_sec_filter;
35   const int sec_idx = strength_idx % tot_sec_filter;
36   *pri_strength = pri_idx;
37   *sec_strength = sec_idx;
38   if (pick_method == CDEF_FULL_SEARCH) return;
39 
40   switch (pick_method) {
41     case CDEF_FAST_SEARCH_LVL1: *pri_strength = priconv_lvl1[pri_idx]; break;
42     case CDEF_FAST_SEARCH_LVL2: *pri_strength = priconv_lvl2[pri_idx]; break;
43     case CDEF_FAST_SEARCH_LVL3:
44       *pri_strength = priconv_lvl2[pri_idx];
45       *sec_strength = secconv_lvl3[sec_idx];
46       break;
47     case CDEF_FAST_SEARCH_LVL4:
48       *pri_strength = priconv_lvl4[pri_idx];
49       *sec_strength = secconv_lvl3[sec_idx];
50       break;
51     default: assert(0 && "Invalid CDEF search method");
52   }
53 }
54 
55 // Store CDEF filter strength calculated from strength index for given search
56 // method
57 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
58   get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength,     \
59                             (strength_idx));                                 \
60   cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;
61 
62 /* Search for the best strength to add as an option, knowing we
63    already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)64 static uint64_t search_one(int *lev, int nb_strengths,
65                            uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
66                            CDEF_PICK_METHOD pick_method) {
67   uint64_t tot_mse[TOTAL_STRENGTHS];
68   const int total_strengths = nb_cdef_strengths[pick_method];
69   int i, j;
70   uint64_t best_tot_mse = (uint64_t)1 << 63;
71   int best_id = 0;
72   memset(tot_mse, 0, sizeof(tot_mse));
73   for (i = 0; i < sb_count; i++) {
74     int gi;
75     uint64_t best_mse = (uint64_t)1 << 63;
76     /* Find best mse among already selected options. */
77     for (gi = 0; gi < nb_strengths; gi++) {
78       if (mse[i][lev[gi]] < best_mse) {
79         best_mse = mse[i][lev[gi]];
80       }
81     }
82     /* Find best mse when adding each possible new option. */
83     for (j = 0; j < total_strengths; j++) {
84       uint64_t best = best_mse;
85       if (mse[i][j] < best) best = mse[i][j];
86       tot_mse[j] += best;
87     }
88   }
89   for (j = 0; j < total_strengths; j++) {
90     if (tot_mse[j] < best_tot_mse) {
91       best_tot_mse = tot_mse[j];
92       best_id = j;
93     }
94   }
95   lev[nb_strengths] = best_id;
96   return best_tot_mse;
97 }
98 
99 /* Search for the best luma+chroma strength to add as an option, knowing we
100    already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)101 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
102                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
103                                 CDEF_PICK_METHOD pick_method) {
104   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
105   int i, j;
106   uint64_t best_tot_mse = (uint64_t)1 << 63;
107   int best_id0 = 0;
108   int best_id1 = 0;
109   const int total_strengths = nb_cdef_strengths[pick_method];
110   memset(tot_mse, 0, sizeof(tot_mse));
111   for (i = 0; i < sb_count; i++) {
112     int gi;
113     uint64_t best_mse = (uint64_t)1 << 63;
114     /* Find best mse among already selected options. */
115     for (gi = 0; gi < nb_strengths; gi++) {
116       uint64_t curr = mse[0][i][lev0[gi]];
117       curr += mse[1][i][lev1[gi]];
118       if (curr < best_mse) {
119         best_mse = curr;
120       }
121     }
122     /* Find best mse when adding each possible new option. */
123     for (j = 0; j < total_strengths; j++) {
124       int k;
125       for (k = 0; k < total_strengths; k++) {
126         uint64_t best = best_mse;
127         uint64_t curr = mse[0][i][j];
128         curr += mse[1][i][k];
129         if (curr < best) best = curr;
130         tot_mse[j][k] += best;
131       }
132     }
133   }
134   for (j = 0; j < total_strengths; j++) {
135     int k;
136     for (k = 0; k < total_strengths; k++) {
137       if (tot_mse[j][k] < best_tot_mse) {
138         best_tot_mse = tot_mse[j][k];
139         best_id0 = j;
140         best_id1 = k;
141       }
142     }
143   }
144   lev0[nb_strengths] = best_id0;
145   lev1[nb_strengths] = best_id1;
146   return best_tot_mse;
147 }
148 
149 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)150 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
151                                       uint64_t mse[][TOTAL_STRENGTHS],
152                                       int sb_count,
153                                       CDEF_PICK_METHOD pick_method) {
154   uint64_t best_tot_mse;
155   int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
156               pick_method <= CDEF_FAST_SEARCH_LVL4);
157   int i;
158   best_tot_mse = (uint64_t)1 << 63;
159   /* Greedy search: add one strength options at a time. */
160   for (i = 0; i < nb_strengths; i++) {
161     best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
162   }
163   /* Trying to refine the greedy search by reconsidering each
164      already-selected option. */
165   if (!fast) {
166     for (i = 0; i < 4 * nb_strengths; i++) {
167       int j;
168       for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
169       best_tot_mse =
170           search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
171     }
172   }
173   return best_tot_mse;
174 }
175 
176 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)177 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
178                                            int nb_strengths,
179                                            uint64_t (**mse)[TOTAL_STRENGTHS],
180                                            int sb_count,
181                                            CDEF_PICK_METHOD pick_method) {
182   uint64_t best_tot_mse;
183   int i;
184   best_tot_mse = (uint64_t)1 << 63;
185   /* Greedy search: add one strength options at a time. */
186   for (i = 0; i < nb_strengths; i++) {
187     best_tot_mse =
188         search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
189   }
190   /* Trying to refine the greedy search by reconsidering each
191      already-selected option. */
192   for (i = 0; i < 4 * nb_strengths; i++) {
193     int j;
194     for (j = 0; j < nb_strengths - 1; j++) {
195       best_lev0[j] = best_lev0[j + 1];
196       best_lev1[j] = best_lev1[j + 1];
197     }
198     best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
199                                    sb_count, pick_method);
200   }
201   return best_tot_mse;
202 }
203 
204 #if CONFIG_AV1_HIGHBITDEPTH
copy_sb16_16_highbd(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)205 static void copy_sb16_16_highbd(uint16_t *dst, int dstride, const void *src,
206                                 int src_voffset, int src_hoffset, int sstride,
207                                 int vsize, int hsize) {
208   int r;
209   const uint16_t *src16 = CONVERT_TO_SHORTPTR((uint8_t *)src);
210   const uint16_t *base = &src16[src_voffset * sstride + src_hoffset];
211   for (r = 0; r < vsize; r++)
212     memcpy(dst + r * dstride, base + r * sstride, hsize * sizeof(*base));
213 }
214 #endif
215 
copy_sb16_16(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)216 static void copy_sb16_16(uint16_t *dst, int dstride, const void *src,
217                          int src_voffset, int src_hoffset, int sstride,
218                          int vsize, int hsize) {
219   int r, c;
220   const uint8_t *src8 = (uint8_t *)src;
221   const uint8_t *base = &src8[src_voffset * sstride + src_hoffset];
222   for (r = 0; r < vsize; r++)
223     for (c = 0; c < hsize; c++)
224       dst[r * dstride + c] = (uint16_t)base[r * sstride + c];
225 }
226 
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)227 static INLINE void init_src_params(int *src_stride, int *width, int *height,
228                                    int *width_log2, int *height_log2,
229                                    BLOCK_SIZE bsize) {
230   *src_stride = block_size_wide[bsize];
231   *width = block_size_wide[bsize];
232   *height = block_size_high[bsize];
233   *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
234   *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
235 }
236 #if CONFIG_AV1_HIGHBITDEPTH
237 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)238 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
239                                          cdef_list *dlist, int cdef_count,
240                                          BLOCK_SIZE bsize, int coeff_shift,
241                                          int row, int col) {
242   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
243          bsize == BLOCK_8X8);
244   uint64_t sum = 0;
245   int bi, bx, by;
246   uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
247   uint16_t *dst_buff = &dst16[row * dstride + col];
248   int src_stride, width, height, width_log2, height_log2;
249   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
250                   bsize);
251   for (bi = 0; bi < cdef_count; bi++) {
252     by = dlist[bi].by;
253     bx = dlist[bi].bx;
254     sum += aom_mse_wxh_16bit_highbd(
255         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
256         &src[bi << (height_log2 + width_log2)], src_stride, width, height);
257   }
258   return sum >> 2 * coeff_shift;
259 }
260 #endif
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)261 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
262                                   cdef_list *dlist, int cdef_count,
263                                   BLOCK_SIZE bsize, int coeff_shift, int row,
264                                   int col) {
265   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
266          bsize == BLOCK_8X8);
267   uint64_t sum = 0;
268   int bi, bx, by;
269   uint8_t *dst8 = (uint8_t *)dst;
270   uint8_t *dst_buff = &dst8[row * dstride + col];
271   int src_stride, width, height, width_log2, height_log2;
272   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
273                   bsize);
274   for (bi = 0; bi < cdef_count; bi++) {
275     by = dlist[bi].by;
276     bx = dlist[bi].bx;
277     sum += aom_mse_wxh_16bit(
278         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
279         &src[bi << (height_log2 + width_log2)], src_stride, width, height);
280   }
281   return sum >> 2 * coeff_shift;
282 }
283 
284 // Calculates MSE at block level.
285 // Inputs:
286 //   cdef_search_ctx: Pointer to the structure containing parameters related to
287 //   CDEF search context.
288 //   fbr: Row index in units of 64x64 block
289 //   fbc: Column index in units of 64x64 block
290 // Returns:
291 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,int fbr,int fbc,int sb_count)292 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
293                              int sb_count) {
294   const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
295   const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
296   const int coeff_shift = cdef_search_ctx->coeff_shift;
297   const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
298   const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
299 
300   // Declare and initialize the temporary buffers.
301   DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
302   DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
303   cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
304   int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
305   int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
306   uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
307   int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
308   int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
309   int hb_step = 1, vb_step = 1;
310   BLOCK_SIZE bs;
311 
312   const MB_MODE_INFO *const mbmi =
313       mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
314                               MI_SIZE_64X64 * fbc];
315 
316   uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
317                                         ref->v_buffer };
318   int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
319                                    ref->uv_stride };
320 
321   if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
322       mbmi->bsize == BLOCK_64X128) {
323     bs = mbmi->bsize;
324     if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
325       nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
326       hb_step = 2;
327     }
328     if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
329       nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
330       vb_step = 2;
331     }
332   } else {
333     bs = BLOCK_64X64;
334   }
335   // Get number of 8x8 blocks which are not skip. Cdef processing happens for
336   // 8x8 blocks which are not skip.
337   const int cdef_count = av1_cdef_compute_sb_list(
338       mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
339 
340   const int yoff = CDEF_VBORDER * (fbr != 0);
341   const int xoff = CDEF_HBORDER * (fbc != 0);
342   int dirinit = 0;
343   for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
344     for (int i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
345     /* We avoid filtering the pixels for which some of the pixels to
346     average are outside the frame. We could change the filter instead,
347     but it would add special cases for any future vectorization. */
348     const int ysize = (nvb << mi_high_l2[pli]) +
349                       CDEF_VBORDER * (fbr + vb_step < cdef_search_ctx->nvfb) +
350                       yoff;
351     const int xsize = (nhb << mi_wide_l2[pli]) +
352                       CDEF_HBORDER * (fbc + hb_step < cdef_search_ctx->nhfb) +
353                       xoff;
354     const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
355     const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
356     struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
357     cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
358                              pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
359                              ysize, xsize);
360     for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
361       int pri_strength, sec_strength;
362       get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
363                                 &sec_strength, gi);
364       av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
365                          cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
366                          dir, &dirinit, var, pli, dlist, cdef_count,
367                          pri_strength, sec_strength + (sec_strength == 3),
368                          cdef_search_ctx->damping, coeff_shift);
369       const uint64_t curr_mse = cdef_search_ctx->compute_cdef_dist_fn(
370           ref_buffer[pli], ref_stride[pli], tmp_dst, dlist, cdef_count,
371           cdef_search_ctx->bsize[pli], coeff_shift, row, col);
372       if (pli < 2)
373         cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
374       else
375         cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
376     }
377   }
378   cdef_search_ctx->sb_index[sb_count] =
379       MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
380 }
381 
382 // MSE calculation at frame level.
383 // Inputs:
384 //   cdef_search_ctx: Pointer to the structure containing parameters related to
385 //   CDEF search context.
386 // Returns:
387 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx)388 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
389   // Loop over each sb.
390   for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
391     for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
392       // Checks if cdef processing can be skipped for particular sb.
393       if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
394       // Calculate mse for each sb and store the relevant sb index.
395       av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc,
396                               cdef_search_ctx->sb_count);
397       cdef_search_ctx->sb_count++;
398     }
399   }
400 }
401 
402 // Allocates memory for members of CdefSearchCtx.
403 // Inputs:
404 //   cdef_search_ctx: Pointer to the structure containing parameters
405 //   related to CDEF search context.
406 // Returns:
407 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(CdefSearchCtx * cdef_search_ctx)408 static AOM_INLINE void cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
409   const int nvfb = cdef_search_ctx->nvfb;
410   const int nhfb = cdef_search_ctx->nhfb;
411   cdef_search_ctx->sb_index =
412       aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index));
413   cdef_search_ctx->sb_count = 0;
414   cdef_search_ctx->mse[0] =
415       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
416   cdef_search_ctx->mse[1] =
417       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
418 }
419 
420 // Deallocates the memory allocated for members of CdefSearchCtx.
421 // Inputs:
422 //   cdef_search_ctx: Pointer to the structure containing parameters
423 //   related to CDEF search context.
424 // Returns:
425 //   Nothing will be returned.
cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)426 static AOM_INLINE void cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
427   aom_free(cdef_search_ctx->mse[0]);
428   aom_free(cdef_search_ctx->mse[1]);
429   aom_free(cdef_search_ctx->sb_index);
430 }
431 
432 // Initialize the parameters related to CDEF search context.
433 // Inputs:
434 //   frame: Pointer to compressed frame buffer
435 //   ref: Pointer to the frame buffer holding the source frame
436 //   cm: Pointer to top level common structure
437 //   xd: Pointer to common current coding block structure
438 //   cdef_search_ctx: Pointer to the structure containing parameters related to
439 //   CDEF search context.
440 //   pick_method: Search method used to select CDEF parameters
441 // Returns:
442 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)443 static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
444                                         const YV12_BUFFER_CONFIG *ref,
445                                         AV1_COMMON *cm, MACROBLOCKD *xd,
446                                         CdefSearchCtx *cdef_search_ctx,
447                                         CDEF_PICK_METHOD pick_method) {
448   const CommonModeInfoParams *const mi_params = &cm->mi_params;
449   const int num_planes = av1_num_planes(cm);
450   cdef_search_ctx->mi_params = &cm->mi_params;
451   cdef_search_ctx->ref = ref;
452   cdef_search_ctx->nvfb =
453       (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
454   cdef_search_ctx->nhfb =
455       (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
456   cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
457   cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
458   cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
459   cdef_search_ctx->num_planes = num_planes;
460   cdef_search_ctx->pick_method = pick_method;
461   cdef_search_ctx->sb_count = 0;
462   av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
463                        num_planes);
464   // Initialize plane wise information.
465   for (int pli = 0; pli < num_planes; pli++) {
466     cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
467     cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
468     cdef_search_ctx->bsize[pli] =
469         cdef_search_ctx->ydec[pli]
470             ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
471             : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
472     cdef_search_ctx->mi_wide_l2[pli] =
473         MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
474     cdef_search_ctx->mi_high_l2[pli] =
475         MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
476     cdef_search_ctx->plane[pli] = xd->plane[pli];
477   }
478   // Function pointer initialization.
479 #if CONFIG_AV1_HIGHBITDEPTH
480   if (cm->seq_params->use_highbitdepth) {
481     cdef_search_ctx->copy_fn = copy_sb16_16_highbd;
482     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
483   } else {
484     cdef_search_ctx->copy_fn = copy_sb16_16;
485     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
486   }
487 #else
488   cdef_search_ctx->copy_fn = copy_sb16_16;
489   cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
490 #endif
491 }
492 
pick_cdef_from_qp(AV1_COMMON * const cm,int skip_cdef,int frames_since_key)493 static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
494                               int frames_since_key) {
495   const int bd = cm->seq_params->bit_depth;
496   const int q =
497       av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
498   CdefInfo *const cdef_info = &cm->cdef_info;
499   // Check the speed feature to avoid extra signaling.
500   if (skip_cdef) {
501     cdef_info->cdef_bits = 1;
502     cdef_info->nb_cdef_strengths = 2;
503   } else {
504     cdef_info->cdef_bits = 0;
505     cdef_info->nb_cdef_strengths = 1;
506   }
507   cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
508 
509   int predicted_y_f1 = 0;
510   int predicted_y_f2 = 0;
511   int predicted_uv_f1 = 0;
512   int predicted_uv_f2 = 0;
513   if (!frame_is_intra_only(cm)) {
514     predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
515                                        q * 0.0068615186f + 0.02709886f),
516                            0, 15);
517     predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
518                                        q * 0.0013993345f + 0.03831067f),
519                            0, 3);
520     predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
521                                         q * 0.0034628846f + 0.00887099f),
522                             0, 15);
523     predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
524                                         q * 0.00028223585f + 0.05576307f),
525                             0, 3);
526   } else {
527     predicted_y_f1 = clamp(
528         (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
529         0, 15);
530     predicted_y_f2 = clamp(
531         (int)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f),
532         0, 3);
533     predicted_uv_f1 = clamp(
534         (int)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f),
535         0, 15);
536     predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
537                                         q * 0.00035520183f + 0.00228092f),
538                             0, 3);
539   }
540   cdef_info->cdef_strengths[0] =
541       predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
542   cdef_info->cdef_uv_strengths[0] =
543       predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
544 
545   if (skip_cdef) {
546     cdef_info->cdef_strengths[1] = 0;
547     cdef_info->cdef_uv_strengths[1] = 0;
548   }
549   const CommonModeInfoParams *const mi_params = &cm->mi_params;
550   const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
551   const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
552   MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
553   for (int r = 0; r < nvfb; ++r) {
554     for (int c = 0; c < nhfb; ++c) {
555       MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
556       current_mbmi->cdef_strength = 0;
557       if (skip_cdef && current_mbmi->skip_cdef_curr_sb &&
558           frames_since_key > 10) {
559         current_mbmi->cdef_strength = 1;
560       }
561     }
562     mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
563   }
564 }
565 
av1_cdef_search(MultiThreadInfo * mt_info,const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CDEF_PICK_METHOD pick_method,int rdmult,int skip_cdef_feature,int frames_since_key)566 void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
567                      const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
568                      MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
569                      int skip_cdef_feature, int frames_since_key) {
570   if (pick_method == CDEF_PICK_FROM_Q) {
571     pick_cdef_from_qp(cm, skip_cdef_feature, frames_since_key);
572     return;
573   }
574   const CommonModeInfoParams *const mi_params = &cm->mi_params;
575   const int damping = 3 + (cm->quant_params.base_qindex >> 6);
576   const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
577                     pick_method <= CDEF_FAST_SEARCH_LVL4);
578   const int num_planes = av1_num_planes(cm);
579   CdefSearchCtx cdef_search_ctx;
580   // Initialize parameters related to CDEF search context.
581   cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method);
582   // Allocate CDEF search context buffers.
583   cdef_alloc_data(&cdef_search_ctx);
584   // Frame level mse calculation.
585   if (mt_info->num_workers > 1) {
586     av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
587   } else {
588     cdef_mse_calc_frame(&cdef_search_ctx);
589   }
590 
591   /* Search for different number of signaling bits. */
592   int nb_strength_bits = 0;
593   uint64_t best_rd = UINT64_MAX;
594   CdefInfo *const cdef_info = &cm->cdef_info;
595   int sb_count = cdef_search_ctx.sb_count;
596   uint64_t(*mse[2])[TOTAL_STRENGTHS];
597   mse[0] = cdef_search_ctx.mse[0];
598   mse[1] = cdef_search_ctx.mse[1];
599   for (int i = 0; i <= 3; i++) {
600     int best_lev0[CDEF_MAX_STRENGTHS];
601     int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
602     const int nb_strengths = 1 << i;
603     uint64_t tot_mse;
604     if (num_planes > 1) {
605       tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
606                                            mse, sb_count, pick_method);
607     } else {
608       tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
609                                       pick_method);
610     }
611 
612     const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
613                                               (num_planes > 1 ? 2 : 1);
614     const int rate_cost = av1_cost_literal(total_bits);
615     const uint64_t dist = tot_mse * 16;
616     const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
617     if (rd < best_rd) {
618       best_rd = rd;
619       nb_strength_bits = i;
620       memcpy(cdef_info->cdef_strengths, best_lev0,
621              nb_strengths * sizeof(best_lev0[0]));
622       if (num_planes > 1) {
623         memcpy(cdef_info->cdef_uv_strengths, best_lev1,
624                nb_strengths * sizeof(best_lev1[0]));
625       }
626     }
627   }
628 
629   cdef_info->cdef_bits = nb_strength_bits;
630   cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
631   for (int i = 0; i < sb_count; i++) {
632     uint64_t best_mse = UINT64_MAX;
633     int best_gi = 0;
634     for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
635       uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
636       if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
637       if (curr < best_mse) {
638         best_gi = gi;
639         best_mse = curr;
640       }
641     }
642     mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength =
643         best_gi;
644   }
645 
646   if (fast) {
647     for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
648       const int luma_strength = cdef_info->cdef_strengths[j];
649       const int chroma_strength = cdef_info->cdef_uv_strengths[j];
650       int pri_strength, sec_strength;
651 
652       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
653                                  luma_strength);
654       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
655                                  chroma_strength);
656     }
657   }
658 
659   cdef_info->cdef_damping = damping;
660   // Deallocate CDEF search context buffers.
661   cdef_dealloc_data(&cdef_search_ctx);
662 }
663