• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <stdbool.h>
14 #include <string.h>
15 
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/aom_scale_rtcd.h"
18 
19 #include "aom/aom_integer.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/encoder/encoder.h"
23 #include "av1/encoder/ethread.h"
24 #include "av1/encoder/pickcdef.h"
25 #include "av1/encoder/mcomp.h"
26 
27 // Get primary and secondary filter strength for the given strength index and
28 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)29 static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
30                                              int *pri_strength,
31                                              int *sec_strength,
32                                              int strength_idx) {
33   const int tot_sec_filter =
34       (pick_method == CDEF_FAST_SEARCH_LVL5)
35           ? REDUCED_SEC_STRENGTHS_LVL5
36           : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
37                                                     : CDEF_SEC_STRENGTHS);
38   const int pri_idx = strength_idx / tot_sec_filter;
39   const int sec_idx = strength_idx % tot_sec_filter;
40   *pri_strength = pri_idx;
41   *sec_strength = sec_idx;
42   if (pick_method == CDEF_FULL_SEARCH) return;
43 
44   switch (pick_method) {
45     case CDEF_FAST_SEARCH_LVL1:
46       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
47       *pri_strength = priconv_lvl1[pri_idx];
48       break;
49     case CDEF_FAST_SEARCH_LVL2:
50       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
51       *pri_strength = priconv_lvl2[pri_idx];
52       break;
53     case CDEF_FAST_SEARCH_LVL3:
54       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
55       assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
56       *pri_strength = priconv_lvl2[pri_idx];
57       *sec_strength = secconv_lvl3[sec_idx];
58       break;
59     case CDEF_FAST_SEARCH_LVL4:
60       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
61       assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
62       *pri_strength = priconv_lvl4[pri_idx];
63       *sec_strength = secconv_lvl3[sec_idx];
64       break;
65     case CDEF_FAST_SEARCH_LVL5:
66       assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
67       assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
68       *pri_strength = priconv_lvl5[pri_idx];
69       *sec_strength = secconv_lvl5[sec_idx];
70       break;
71     default: assert(0 && "Invalid CDEF search method");
72   }
73 }
74 
75 // Store CDEF filter strength calculated from strength index for given search
76 // method
77 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
78   do {                                                                       \
79     get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength,   \
80                               (strength_idx));                               \
81     cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;        \
82   } while (0)
83 
84 /* Search for the best strength to add as an option, knowing we
85    already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)86 static uint64_t search_one(int *lev, int nb_strengths,
87                            uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
88                            CDEF_PICK_METHOD pick_method) {
89   uint64_t tot_mse[TOTAL_STRENGTHS];
90   const int total_strengths = nb_cdef_strengths[pick_method];
91   int i, j;
92   uint64_t best_tot_mse = (uint64_t)1 << 63;
93   int best_id = 0;
94   memset(tot_mse, 0, sizeof(tot_mse));
95   for (i = 0; i < sb_count; i++) {
96     int gi;
97     uint64_t best_mse = (uint64_t)1 << 63;
98     /* Find best mse among already selected options. */
99     for (gi = 0; gi < nb_strengths; gi++) {
100       if (mse[i][lev[gi]] < best_mse) {
101         best_mse = mse[i][lev[gi]];
102       }
103     }
104     /* Find best mse when adding each possible new option. */
105     for (j = 0; j < total_strengths; j++) {
106       uint64_t best = best_mse;
107       if (mse[i][j] < best) best = mse[i][j];
108       tot_mse[j] += best;
109     }
110   }
111   for (j = 0; j < total_strengths; j++) {
112     if (tot_mse[j] < best_tot_mse) {
113       best_tot_mse = tot_mse[j];
114       best_id = j;
115     }
116   }
117   lev[nb_strengths] = best_id;
118   return best_tot_mse;
119 }
120 
121 /* Search for the best luma+chroma strength to add as an option, knowing we
122    already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)123 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
124                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
125                                 CDEF_PICK_METHOD pick_method) {
126   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
127   int i, j;
128   uint64_t best_tot_mse = (uint64_t)1 << 63;
129   int best_id0 = 0;
130   int best_id1 = 0;
131   const int total_strengths = nb_cdef_strengths[pick_method];
132   memset(tot_mse, 0, sizeof(tot_mse));
133   for (i = 0; i < sb_count; i++) {
134     int gi;
135     uint64_t best_mse = (uint64_t)1 << 63;
136     /* Find best mse among already selected options. */
137     for (gi = 0; gi < nb_strengths; gi++) {
138       uint64_t curr = mse[0][i][lev0[gi]];
139       curr += mse[1][i][lev1[gi]];
140       if (curr < best_mse) {
141         best_mse = curr;
142       }
143     }
144     /* Find best mse when adding each possible new option. */
145     for (j = 0; j < total_strengths; j++) {
146       int k;
147       for (k = 0; k < total_strengths; k++) {
148         uint64_t best = best_mse;
149         uint64_t curr = mse[0][i][j];
150         curr += mse[1][i][k];
151         if (curr < best) best = curr;
152         tot_mse[j][k] += best;
153       }
154     }
155   }
156   for (j = 0; j < total_strengths; j++) {
157     int k;
158     for (k = 0; k < total_strengths; k++) {
159       if (tot_mse[j][k] < best_tot_mse) {
160         best_tot_mse = tot_mse[j][k];
161         best_id0 = j;
162         best_id1 = k;
163       }
164     }
165   }
166   lev0[nb_strengths] = best_id0;
167   lev1[nb_strengths] = best_id1;
168   return best_tot_mse;
169 }
170 
171 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)172 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
173                                       uint64_t mse[][TOTAL_STRENGTHS],
174                                       int sb_count,
175                                       CDEF_PICK_METHOD pick_method) {
176   uint64_t best_tot_mse;
177   int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
178               pick_method <= CDEF_FAST_SEARCH_LVL5);
179   int i;
180   best_tot_mse = (uint64_t)1 << 63;
181   /* Greedy search: add one strength options at a time. */
182   for (i = 0; i < nb_strengths; i++) {
183     best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
184   }
185   /* Trying to refine the greedy search by reconsidering each
186      already-selected option. */
187   if (!fast) {
188     for (i = 0; i < 4 * nb_strengths; i++) {
189       int j;
190       for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
191       best_tot_mse =
192           search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
193     }
194   }
195   return best_tot_mse;
196 }
197 
198 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)199 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
200                                            int nb_strengths,
201                                            uint64_t (**mse)[TOTAL_STRENGTHS],
202                                            int sb_count,
203                                            CDEF_PICK_METHOD pick_method) {
204   uint64_t best_tot_mse;
205   int i;
206   best_tot_mse = (uint64_t)1 << 63;
207   /* Greedy search: add one strength options at a time. */
208   for (i = 0; i < nb_strengths; i++) {
209     best_tot_mse =
210         search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
211   }
212   /* Trying to refine the greedy search by reconsidering each
213      already-selected option. */
214   for (i = 0; i < 4 * nb_strengths; i++) {
215     int j;
216     for (j = 0; j < nb_strengths - 1; j++) {
217       best_lev0[j] = best_lev0[j + 1];
218       best_lev1[j] = best_lev1[j + 1];
219     }
220     best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
221                                    sb_count, pick_method);
222   }
223   return best_tot_mse;
224 }
225 
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)226 static INLINE void init_src_params(int *src_stride, int *width, int *height,
227                                    int *width_log2, int *height_log2,
228                                    BLOCK_SIZE bsize) {
229   *src_stride = block_size_wide[bsize];
230   *width = block_size_wide[bsize];
231   *height = block_size_high[bsize];
232   *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
233   *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
234 }
235 #if CONFIG_AV1_HIGHBITDEPTH
236 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)237 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
238                                          cdef_list *dlist, int cdef_count,
239                                          BLOCK_SIZE bsize, int coeff_shift,
240                                          int row, int col) {
241   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
242          bsize == BLOCK_8X8);
243   uint64_t sum = 0;
244   int bi, bx, by;
245   uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
246   uint16_t *dst_buff = &dst16[row * dstride + col];
247   int src_stride, width, height, width_log2, height_log2;
248   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
249                   bsize);
250   for (bi = 0; bi < cdef_count; bi++) {
251     by = dlist[bi].by;
252     bx = dlist[bi].bx;
253     sum += aom_mse_wxh_16bit_highbd(
254         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
255         &src[bi << (height_log2 + width_log2)], src_stride, width, height);
256   }
257   return sum >> 2 * coeff_shift;
258 }
259 #endif
260 
261 // Checks dual and quad block processing is applicable for block widths 8 and 4
262 // respectively.
is_dual_or_quad_applicable(cdef_list * dlist,int width,int cdef_count,int bi,int iter)263 static INLINE int is_dual_or_quad_applicable(cdef_list *dlist, int width,
264                                              int cdef_count, int bi, int iter) {
265   assert(width == 8 || width == 4);
266   const int blk_offset = (width == 8) ? 1 : 3;
267   if ((iter + blk_offset) >= cdef_count) return 0;
268 
269   if (dlist[bi].by == dlist[bi + blk_offset].by &&
270       dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
271     return 1;
272 
273   return 0;
274 }
275 
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)276 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
277                                   cdef_list *dlist, int cdef_count,
278                                   BLOCK_SIZE bsize, int coeff_shift, int row,
279                                   int col) {
280   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
281          bsize == BLOCK_8X8);
282   uint64_t sum = 0;
283   int bi, bx, by;
284   int iter = 0;
285   int inc = 1;
286   uint8_t *dst8 = (uint8_t *)dst;
287   uint8_t *dst_buff = &dst8[row * dstride + col];
288   int src_stride, width, height, width_log2, height_log2;
289   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
290                   bsize);
291 
292   const int num_blks = 16 / width;
293   for (bi = 0; bi < cdef_count; bi += inc) {
294     by = dlist[bi].by;
295     bx = dlist[bi].bx;
296     uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
297     uint8_t *dst_tmp =
298         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
299 
300     if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
301       sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
302       iter += num_blks;
303       inc = num_blks;
304     } else {
305       sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
306                                height);
307       iter += 1;
308       inc = 1;
309     }
310   }
311 
312   return sum >> 2 * coeff_shift;
313 }
314 
315 // Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
316 // region is outside frame boundary
fill_borders_for_fbs_on_frame_boundary(uint16_t * inbuf,int hfilt_size,int vfilt_size,bool is_fb_on_frm_left_boundary,bool is_fb_on_frm_right_boundary,bool is_fb_on_frm_top_boundary,bool is_fb_on_frm_bottom_boundary)317 static INLINE void fill_borders_for_fbs_on_frame_boundary(
318     uint16_t *inbuf, int hfilt_size, int vfilt_size,
319     bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
320     bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
321   if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
322       !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
323     return;
324   if (is_fb_on_frm_bottom_boundary) {
325     // Fill bottom region of the block
326     const int buf_offset =
327         (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
328     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
329               CDEF_VERY_LARGE);
330   }
331   if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
332     const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
333     // Fill bottom-left region of the block
334     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
335               CDEF_VERY_LARGE);
336   }
337   if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
338     const int buf_offset =
339         (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
340     // Fill bottom-right region of the block
341     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
342               CDEF_VERY_LARGE);
343   }
344   if (is_fb_on_frm_top_boundary) {
345     // Fill top region of the block
346     fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
347               CDEF_VERY_LARGE);
348   }
349   if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
350     // Fill top-left region of the block
351     fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
352   }
353   if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
354     const int buf_offset = hfilt_size + CDEF_HBORDER;
355     // Fill top-right region of the block
356     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
357               CDEF_VERY_LARGE);
358   }
359   if (is_fb_on_frm_left_boundary) {
360     const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
361     // Fill left region of the block
362     fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
363               CDEF_VERY_LARGE);
364   }
365   if (is_fb_on_frm_right_boundary) {
366     const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
367     // Fill right region of the block
368     fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
369               vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
370   }
371 }
372 
373 // Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
374 // after CDEF filtering in single function call
get_error_calc_width_in_filt_units(cdef_list * dlist,int cdef_count,int bi,int subsampling_x,int subsampling_y)375 static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
376     cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
377     int subsampling_y) {
378   // TODO(Ranjit): Extend the optimization for 422
379   if (subsampling_x != subsampling_y) return 1;
380 
381   // Combining more blocks seems to increase encode time due to increase in
382   // control code
383   if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
384       dlist[bi].bx + 3 == dlist[bi + 3].bx) {
385     /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
386      * logic if y co-ordinates match and x co-ordinates are
387      * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
388     return 4;
389   }
390   if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
391       dlist[bi].bx + 1 == dlist[bi + 1].bx) {
392     /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
393      * logic if their y co-ordinates match and x co-ordinates are
394      * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
395     return 2;
396   }
397   return 1;
398 }
399 
400 // Returns the block error after CDEF filtering for a given strength
get_filt_error(const CdefSearchCtx * cdef_search_ctx,const struct macroblockd_plane * pd,cdef_list * dlist,int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],int * dirinit,int var[CDEF_NBLOCKS][CDEF_NBLOCKS],uint16_t * in,uint8_t * ref_buffer,int ref_stride,int row,int col,int pri_strength,int sec_strength,int cdef_count,int pli,int coeff_shift,BLOCK_SIZE bs)401 static INLINE uint64_t get_filt_error(
402     const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
403     cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
404     int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
405     int ref_stride, int row, int col, int pri_strength, int sec_strength,
406     int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
407   uint64_t curr_sse = 0;
408   const BLOCK_SIZE plane_bsize =
409       get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
410   const int bw_log2 = 3 - pd->subsampling_x;
411   const int bh_log2 = 3 - pd->subsampling_y;
412 
413   // TODO(Ranjit): Extend this optimization for HBD
414   if (!cdef_search_ctx->use_highbitdepth) {
415     // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
416     // error at CDEF block level
417     const int tot_blk_count =
418         (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
419         (bw_log2 + bh_log2);
420     if (cdef_count == tot_blk_count) {
421       // Calculate the offset in the buffer based on block position
422       const FULLPEL_MV this_mv = { row, col };
423       const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
424       if (pri_strength == 0 && sec_strength == 0) {
425         // When CDEF strength is zero, filtering is not applied. Hence
426         // error is calculated between source and unfiltered pixels
427         curr_sse =
428             aom_sse(&ref_buffer[buf_offset], ref_stride,
429                     get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
430                     block_size_wide[plane_bsize], block_size_high[plane_bsize]);
431       } else {
432         DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
433 
434         av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
435                            cdef_search_ctx->xdec[pli],
436                            cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
437                            dlist, cdef_count, pri_strength,
438                            sec_strength + (sec_strength == 3),
439                            cdef_search_ctx->damping, coeff_shift);
440         curr_sse =
441             aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
442                     (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
443                     block_size_high[plane_bsize]);
444       }
445     } else {
446       // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
447       // functions produce 8-bit output and the error is calculated in 8-bit
448       // domain
449       if (pri_strength == 0 && sec_strength == 0) {
450         int num_error_calc_filt_units = 1;
451         for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
452           const uint8_t by = dlist[bi].by;
453           const uint8_t bx = dlist[bi].bx;
454           const int16_t by_pos = (by << bh_log2);
455           const int16_t bx_pos = (bx << bw_log2);
456           // Calculate the offset in the buffer based on block position
457           const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
458           const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
459           num_error_calc_filt_units = get_error_calc_width_in_filt_units(
460               dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
461           curr_sse += aom_sse(
462               &ref_buffer[buf_offset], ref_stride,
463               get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
464               num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
465         }
466       } else {
467         DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
468         av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
469                            cdef_search_ctx->xdec[pli],
470                            cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
471                            dlist, cdef_count, pri_strength,
472                            sec_strength + (sec_strength == 3),
473                            cdef_search_ctx->damping, coeff_shift);
474         int num_error_calc_filt_units = 1;
475         for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
476           const uint8_t by = dlist[bi].by;
477           const uint8_t bx = dlist[bi].bx;
478           const int16_t by_pos = (by << bh_log2);
479           const int16_t bx_pos = (bx << bw_log2);
480           // Calculate the offset in the buffer based on block position
481           const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
482           const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
483           const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
484           const int tmp_buf_offset =
485               get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
486           num_error_calc_filt_units = get_error_calc_width_in_filt_units(
487               dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
488           curr_sse += aom_sse(
489               &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
490               (1 << MAX_SB_SIZE_LOG2),
491               num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
492         }
493       }
494     }
495   } else {
496     DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
497 
498     av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
499                        cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
500                        dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
501                        sec_strength + (sec_strength == 3),
502                        cdef_search_ctx->damping, coeff_shift);
503     curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
504         ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
505         cdef_search_ctx->bsize[pli], coeff_shift, row, col);
506   }
507   return curr_sse;
508 }
509 
510 // Calculates MSE at block level.
511 // Inputs:
512 //   cdef_search_ctx: Pointer to the structure containing parameters related to
513 //   CDEF search context.
514 //   fbr: Row index in units of 64x64 block
515 //   fbc: Column index in units of 64x64 block
516 // Returns:
517 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,int fbr,int fbc,int sb_count)518 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
519                              int sb_count) {
520   const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
521   const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
522   const int coeff_shift = cdef_search_ctx->coeff_shift;
523   const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
524   const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
525 
526   // Declare and initialize the temporary buffers.
527   DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
528   cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
529   int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
530   int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
531   uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
532   int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
533   int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
534   int hb_step = 1, vb_step = 1;
535   BLOCK_SIZE bs;
536 
537   const MB_MODE_INFO *const mbmi =
538       mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
539                               MI_SIZE_64X64 * fbc];
540 
541   uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
542                                         ref->v_buffer };
543   int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
544                                    ref->uv_stride };
545 
546   if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
547       mbmi->bsize == BLOCK_64X128) {
548     bs = mbmi->bsize;
549     if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
550       nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
551       hb_step = 2;
552     }
553     if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
554       nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
555       vb_step = 2;
556     }
557   } else {
558     bs = BLOCK_64X64;
559   }
560   // Get number of 8x8 blocks which are not skip. Cdef processing happens for
561   // 8x8 blocks which are not skip.
562   const int cdef_count = av1_cdef_compute_sb_list(
563       mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
564   const bool is_fb_on_frm_left_boundary = (fbc == 0);
565   const bool is_fb_on_frm_right_boundary =
566       (fbc + hb_step == cdef_search_ctx->nhfb);
567   const bool is_fb_on_frm_top_boundary = (fbr == 0);
568   const bool is_fb_on_frm_bottom_boundary =
569       (fbr + vb_step == cdef_search_ctx->nvfb);
570   const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
571   const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
572   int dirinit = 0;
573   for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
574     /* We avoid filtering the pixels for which some of the pixels to
575     average are outside the frame. We could change the filter instead,
576     but it would add special cases for any future vectorization. */
577     const int hfilt_size = (nhb << mi_wide_l2[pli]);
578     const int vfilt_size = (nvb << mi_high_l2[pli]);
579     const int ysize =
580         vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
581     const int xsize =
582         hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
583     const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
584     const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
585     struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
586     cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
587                              pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
588                              ysize, xsize);
589     fill_borders_for_fbs_on_frame_boundary(
590         inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
591         is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
592         is_fb_on_frm_bottom_boundary);
593     for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
594       int pri_strength, sec_strength;
595       get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
596                                 &sec_strength, gi);
597       const uint64_t curr_mse = get_filt_error(
598           cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
599           ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
600           pli, coeff_shift, bs);
601       if (pli < 2)
602         cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
603       else
604         cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
605     }
606   }
607   cdef_search_ctx->sb_index[sb_count] =
608       MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
609 }
610 
611 // MSE calculation at frame level.
612 // Inputs:
613 //   cdef_search_ctx: Pointer to the structure containing parameters related to
614 //   CDEF search context.
615 // Returns:
616 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx)617 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
618   // Loop over each sb.
619   for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
620     for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
621       // Checks if cdef processing can be skipped for particular sb.
622       if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
623       // Calculate mse for each sb and store the relevant sb index.
624       av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc,
625                               cdef_search_ctx->sb_count);
626       cdef_search_ctx->sb_count++;
627     }
628   }
629 }
630 
631 // Allocates memory for members of CdefSearchCtx.
632 // Inputs:
633 //   cdef_search_ctx: Pointer to the structure containing parameters
634 //   related to CDEF search context.
635 // Returns:
636 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(CdefSearchCtx * cdef_search_ctx)637 static AOM_INLINE bool cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
638   const int nvfb = cdef_search_ctx->nvfb;
639   const int nhfb = cdef_search_ctx->nhfb;
640   cdef_search_ctx->sb_index =
641       aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index));
642   cdef_search_ctx->sb_count = 0;
643   cdef_search_ctx->mse[0] =
644       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
645   cdef_search_ctx->mse[1] =
646       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
647   if (!(cdef_search_ctx->sb_index && cdef_search_ctx->mse[0] &&
648         cdef_search_ctx->mse[1])) {
649     aom_free(cdef_search_ctx->sb_index);
650     aom_free(cdef_search_ctx->mse[0]);
651     aom_free(cdef_search_ctx->mse[1]);
652     return false;
653   }
654   return true;
655 }
656 
657 // Deallocates the memory allocated for members of CdefSearchCtx.
658 // Inputs:
659 //   cdef_search_ctx: Pointer to the structure containing parameters
660 //   related to CDEF search context.
661 // Returns:
662 //   Nothing will be returned.
cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)663 static AOM_INLINE void cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
664   aom_free(cdef_search_ctx->mse[0]);
665   aom_free(cdef_search_ctx->mse[1]);
666   aom_free(cdef_search_ctx->sb_index);
667 }
668 
669 // Initialize the parameters related to CDEF search context.
670 // Inputs:
671 //   frame: Pointer to compressed frame buffer
672 //   ref: Pointer to the frame buffer holding the source frame
673 //   cm: Pointer to top level common structure
674 //   xd: Pointer to common current coding block structure
675 //   cdef_search_ctx: Pointer to the structure containing parameters related to
676 //   CDEF search context.
677 //   pick_method: Search method used to select CDEF parameters
678 // Returns:
679 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)680 static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
681                                         const YV12_BUFFER_CONFIG *ref,
682                                         AV1_COMMON *cm, MACROBLOCKD *xd,
683                                         CdefSearchCtx *cdef_search_ctx,
684                                         CDEF_PICK_METHOD pick_method) {
685   const CommonModeInfoParams *const mi_params = &cm->mi_params;
686   const int num_planes = av1_num_planes(cm);
687   cdef_search_ctx->mi_params = &cm->mi_params;
688   cdef_search_ctx->ref = ref;
689   cdef_search_ctx->nvfb =
690       (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
691   cdef_search_ctx->nhfb =
692       (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
693   cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
694   cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
695   cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
696   cdef_search_ctx->num_planes = num_planes;
697   cdef_search_ctx->pick_method = pick_method;
698   cdef_search_ctx->sb_count = 0;
699   cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
700   av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
701                        num_planes);
702   // Initialize plane wise information.
703   for (int pli = 0; pli < num_planes; pli++) {
704     cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
705     cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
706     cdef_search_ctx->bsize[pli] =
707         cdef_search_ctx->ydec[pli]
708             ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
709             : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
710     cdef_search_ctx->mi_wide_l2[pli] =
711         MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
712     cdef_search_ctx->mi_high_l2[pli] =
713         MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
714     cdef_search_ctx->plane[pli] = xd->plane[pli];
715   }
716   // Function pointer initialization.
717 #if CONFIG_AV1_HIGHBITDEPTH
718   if (cm->seq_params->use_highbitdepth) {
719     cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
720     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
721   } else {
722     cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
723     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
724   }
725 #else
726   cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
727   cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
728 #endif
729 }
730 
pick_cdef_from_qp(AV1_COMMON * const cm,int skip_cdef,int is_screen_content)731 static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
732                               int is_screen_content) {
733   const int bd = cm->seq_params->bit_depth;
734   const int q =
735       av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
736   CdefInfo *const cdef_info = &cm->cdef_info;
737   // Check the speed feature to avoid extra signaling.
738   if (skip_cdef) {
739     cdef_info->cdef_bits = 1;
740     cdef_info->nb_cdef_strengths = 2;
741   } else {
742     cdef_info->cdef_bits = 0;
743     cdef_info->nb_cdef_strengths = 1;
744   }
745   cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
746 
747   int predicted_y_f1 = 0;
748   int predicted_y_f2 = 0;
749   int predicted_uv_f1 = 0;
750   int predicted_uv_f2 = 0;
751   if (is_screen_content) {
752     predicted_y_f1 =
753         (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
754     predicted_y_f2 =
755         (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
756     predicted_uv_f1 =
757         (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
758     predicted_uv_f2 =
759         (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
760     predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
761     predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
762     predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
763     predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
764   } else {
765     if (!frame_is_intra_only(cm)) {
766       predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
767                                          q * 0.0068615186f + 0.02709886f),
768                              0, 15);
769       predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
770                                          q * 0.0013993345f + 0.03831067f),
771                              0, 3);
772       predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
773                                           q * 0.0034628846f + 0.00887099f),
774                               0, 15);
775       predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
776                                           q * 0.00028223585f + 0.05576307f),
777                               0, 3);
778     } else {
779       predicted_y_f1 = clamp(
780           (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
781           0, 15);
782       predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
783                                          q * 0.0027798624f + 0.0079405f),
784                              0, 3);
785       predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
786                                           q * 0.012892405f - 0.00748388f),
787                               0, 15);
788       predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
789                                           q * 0.00035520183f + 0.00228092f),
790                               0, 3);
791     }
792   }
793   cdef_info->cdef_strengths[0] =
794       predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
795   cdef_info->cdef_uv_strengths[0] =
796       predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
797 
798   // mbmi->cdef_strength is already set in the encoding stage. We don't need to
799   // set it again here.
800   if (skip_cdef) {
801     cdef_info->cdef_strengths[1] = 0;
802     cdef_info->cdef_uv_strengths[1] = 0;
803     return;
804   }
805 
806   const CommonModeInfoParams *const mi_params = &cm->mi_params;
807   const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
808   const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
809   MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
810   for (int r = 0; r < nvfb; ++r) {
811     for (int c = 0; c < nhfb; ++c) {
812       MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
813       current_mbmi->cdef_strength = 0;
814     }
815     mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
816   }
817 }
818 
av1_cdef_search(MultiThreadInfo * mt_info,const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CDEF_PICK_METHOD pick_method,int rdmult,int skip_cdef_feature,CDEF_CONTROL cdef_control,const int is_screen_content,int non_reference_frame)819 void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
820                      const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
821                      MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
822                      int skip_cdef_feature, CDEF_CONTROL cdef_control,
823                      const int is_screen_content, int non_reference_frame) {
824   assert(cdef_control != CDEF_NONE);
825   if (cdef_control == CDEF_REFERENCE && non_reference_frame) {
826     CdefInfo *const cdef_info = &cm->cdef_info;
827     cdef_info->nb_cdef_strengths = 1;
828     cdef_info->cdef_bits = 0;
829     cdef_info->cdef_strengths[0] = 0;
830     cdef_info->cdef_uv_strengths[0] = 0;
831     return;
832   }
833 
834   if (pick_method == CDEF_PICK_FROM_Q) {
835     pick_cdef_from_qp(cm, skip_cdef_feature, is_screen_content);
836     return;
837   }
838   const CommonModeInfoParams *const mi_params = &cm->mi_params;
839   const int damping = 3 + (cm->quant_params.base_qindex >> 6);
840   const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
841                     pick_method <= CDEF_FAST_SEARCH_LVL5);
842   const int num_planes = av1_num_planes(cm);
843   CdefSearchCtx cdef_search_ctx;
844   // Initialize parameters related to CDEF search context.
845   cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method);
846   // Allocate CDEF search context buffers.
847   if (!cdef_alloc_data(&cdef_search_ctx)) {
848     CdefInfo *const cdef_info = &cm->cdef_info;
849     cdef_info->nb_cdef_strengths = 0;
850     cdef_info->cdef_bits = 0;
851     cdef_info->cdef_strengths[0] = 0;
852     cdef_info->cdef_uv_strengths[0] = 0;
853     return;
854   }
855   // Frame level mse calculation.
856   if (mt_info->num_workers > 1) {
857     av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
858   } else {
859     cdef_mse_calc_frame(&cdef_search_ctx);
860   }
861 
862   /* Search for different number of signaling bits. */
863   int nb_strength_bits = 0;
864   uint64_t best_rd = UINT64_MAX;
865   CdefInfo *const cdef_info = &cm->cdef_info;
866   int sb_count = cdef_search_ctx.sb_count;
867   uint64_t(*mse[2])[TOTAL_STRENGTHS];
868   mse[0] = cdef_search_ctx.mse[0];
869   mse[1] = cdef_search_ctx.mse[1];
870   /* Calculate the maximum number of bits required to signal CDEF strengths at
871    * block level */
872   const int total_strengths = nb_cdef_strengths[pick_method];
873   const int joint_strengths =
874       num_planes > 1 ? total_strengths * total_strengths : total_strengths;
875   const int max_signaling_bits =
876       joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
877   for (int i = 0; i <= 3; i++) {
878     if (i > max_signaling_bits) break;
879     int best_lev0[CDEF_MAX_STRENGTHS];
880     int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
881     const int nb_strengths = 1 << i;
882     uint64_t tot_mse;
883     if (num_planes > 1) {
884       tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
885                                            mse, sb_count, pick_method);
886     } else {
887       tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
888                                       pick_method);
889     }
890 
891     const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
892                                               (num_planes > 1 ? 2 : 1);
893     const int rate_cost = av1_cost_literal(total_bits);
894     const uint64_t dist = tot_mse * 16;
895     const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
896     if (rd < best_rd) {
897       best_rd = rd;
898       nb_strength_bits = i;
899       memcpy(cdef_info->cdef_strengths, best_lev0,
900              nb_strengths * sizeof(best_lev0[0]));
901       if (num_planes > 1) {
902         memcpy(cdef_info->cdef_uv_strengths, best_lev1,
903                nb_strengths * sizeof(best_lev1[0]));
904       }
905     }
906   }
907 
908   cdef_info->cdef_bits = nb_strength_bits;
909   cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
910   for (int i = 0; i < sb_count; i++) {
911     uint64_t best_mse = UINT64_MAX;
912     int best_gi = 0;
913     for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
914       uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
915       if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
916       if (curr < best_mse) {
917         best_gi = gi;
918         best_mse = curr;
919       }
920     }
921     mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength =
922         best_gi;
923   }
924   if (fast) {
925     for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
926       const int luma_strength = cdef_info->cdef_strengths[j];
927       const int chroma_strength = cdef_info->cdef_uv_strengths[j];
928       int pri_strength, sec_strength;
929 
930       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
931                                  luma_strength);
932       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
933                                  chroma_strength);
934     }
935   }
936 
937   cdef_info->cdef_damping = damping;
938   // Deallocate CDEF search context buffers.
939   cdef_dealloc_data(&cdef_search_ctx);
940 }
941