1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <stdbool.h>
14 #include <string.h>
15
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/aom_scale_rtcd.h"
18
19 #include "aom/aom_integer.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/encoder/encoder.h"
23 #include "av1/encoder/ethread.h"
24 #include "av1/encoder/pickcdef.h"
25 #include "av1/encoder/mcomp.h"
26
27 // Get primary and secondary filter strength for the given strength index and
28 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)29 static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
30 int *pri_strength,
31 int *sec_strength,
32 int strength_idx) {
33 const int tot_sec_filter =
34 (pick_method == CDEF_FAST_SEARCH_LVL5)
35 ? REDUCED_SEC_STRENGTHS_LVL5
36 : ((pick_method >= CDEF_FAST_SEARCH_LVL3) ? REDUCED_SEC_STRENGTHS_LVL3
37 : CDEF_SEC_STRENGTHS);
38 const int pri_idx = strength_idx / tot_sec_filter;
39 const int sec_idx = strength_idx % tot_sec_filter;
40 *pri_strength = pri_idx;
41 *sec_strength = sec_idx;
42 if (pick_method == CDEF_FULL_SEARCH) return;
43
44 switch (pick_method) {
45 case CDEF_FAST_SEARCH_LVL1:
46 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL1);
47 *pri_strength = priconv_lvl1[pri_idx];
48 break;
49 case CDEF_FAST_SEARCH_LVL2:
50 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
51 *pri_strength = priconv_lvl2[pri_idx];
52 break;
53 case CDEF_FAST_SEARCH_LVL3:
54 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL2);
55 assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
56 *pri_strength = priconv_lvl2[pri_idx];
57 *sec_strength = secconv_lvl3[sec_idx];
58 break;
59 case CDEF_FAST_SEARCH_LVL4:
60 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
61 assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL3);
62 *pri_strength = priconv_lvl4[pri_idx];
63 *sec_strength = secconv_lvl3[sec_idx];
64 break;
65 case CDEF_FAST_SEARCH_LVL5:
66 assert(pri_idx < REDUCED_PRI_STRENGTHS_LVL4);
67 assert(sec_idx < REDUCED_SEC_STRENGTHS_LVL5);
68 *pri_strength = priconv_lvl5[pri_idx];
69 *sec_strength = secconv_lvl5[sec_idx];
70 break;
71 default: assert(0 && "Invalid CDEF search method");
72 }
73 }
74
75 // Store CDEF filter strength calculated from strength index for given search
76 // method
77 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
78 do { \
79 get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \
80 (strength_idx)); \
81 cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength; \
82 } while (0)
83
84 /* Search for the best strength to add as an option, knowing we
85 already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)86 static uint64_t search_one(int *lev, int nb_strengths,
87 uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
88 CDEF_PICK_METHOD pick_method) {
89 uint64_t tot_mse[TOTAL_STRENGTHS];
90 const int total_strengths = nb_cdef_strengths[pick_method];
91 int i, j;
92 uint64_t best_tot_mse = (uint64_t)1 << 63;
93 int best_id = 0;
94 memset(tot_mse, 0, sizeof(tot_mse));
95 for (i = 0; i < sb_count; i++) {
96 int gi;
97 uint64_t best_mse = (uint64_t)1 << 63;
98 /* Find best mse among already selected options. */
99 for (gi = 0; gi < nb_strengths; gi++) {
100 if (mse[i][lev[gi]] < best_mse) {
101 best_mse = mse[i][lev[gi]];
102 }
103 }
104 /* Find best mse when adding each possible new option. */
105 for (j = 0; j < total_strengths; j++) {
106 uint64_t best = best_mse;
107 if (mse[i][j] < best) best = mse[i][j];
108 tot_mse[j] += best;
109 }
110 }
111 for (j = 0; j < total_strengths; j++) {
112 if (tot_mse[j] < best_tot_mse) {
113 best_tot_mse = tot_mse[j];
114 best_id = j;
115 }
116 }
117 lev[nb_strengths] = best_id;
118 return best_tot_mse;
119 }
120
121 /* Search for the best luma+chroma strength to add as an option, knowing we
122 already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)123 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
124 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
125 CDEF_PICK_METHOD pick_method) {
126 uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
127 int i, j;
128 uint64_t best_tot_mse = (uint64_t)1 << 63;
129 int best_id0 = 0;
130 int best_id1 = 0;
131 const int total_strengths = nb_cdef_strengths[pick_method];
132 memset(tot_mse, 0, sizeof(tot_mse));
133 for (i = 0; i < sb_count; i++) {
134 int gi;
135 uint64_t best_mse = (uint64_t)1 << 63;
136 /* Find best mse among already selected options. */
137 for (gi = 0; gi < nb_strengths; gi++) {
138 uint64_t curr = mse[0][i][lev0[gi]];
139 curr += mse[1][i][lev1[gi]];
140 if (curr < best_mse) {
141 best_mse = curr;
142 }
143 }
144 /* Find best mse when adding each possible new option. */
145 for (j = 0; j < total_strengths; j++) {
146 int k;
147 for (k = 0; k < total_strengths; k++) {
148 uint64_t best = best_mse;
149 uint64_t curr = mse[0][i][j];
150 curr += mse[1][i][k];
151 if (curr < best) best = curr;
152 tot_mse[j][k] += best;
153 }
154 }
155 }
156 for (j = 0; j < total_strengths; j++) {
157 int k;
158 for (k = 0; k < total_strengths; k++) {
159 if (tot_mse[j][k] < best_tot_mse) {
160 best_tot_mse = tot_mse[j][k];
161 best_id0 = j;
162 best_id1 = k;
163 }
164 }
165 }
166 lev0[nb_strengths] = best_id0;
167 lev1[nb_strengths] = best_id1;
168 return best_tot_mse;
169 }
170
171 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)172 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
173 uint64_t mse[][TOTAL_STRENGTHS],
174 int sb_count,
175 CDEF_PICK_METHOD pick_method) {
176 uint64_t best_tot_mse;
177 int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
178 pick_method <= CDEF_FAST_SEARCH_LVL5);
179 int i;
180 best_tot_mse = (uint64_t)1 << 63;
181 /* Greedy search: add one strength options at a time. */
182 for (i = 0; i < nb_strengths; i++) {
183 best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
184 }
185 /* Trying to refine the greedy search by reconsidering each
186 already-selected option. */
187 if (!fast) {
188 for (i = 0; i < 4 * nb_strengths; i++) {
189 int j;
190 for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
191 best_tot_mse =
192 search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
193 }
194 }
195 return best_tot_mse;
196 }
197
198 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)199 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
200 int nb_strengths,
201 uint64_t (**mse)[TOTAL_STRENGTHS],
202 int sb_count,
203 CDEF_PICK_METHOD pick_method) {
204 uint64_t best_tot_mse;
205 int i;
206 best_tot_mse = (uint64_t)1 << 63;
207 /* Greedy search: add one strength options at a time. */
208 for (i = 0; i < nb_strengths; i++) {
209 best_tot_mse =
210 search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
211 }
212 /* Trying to refine the greedy search by reconsidering each
213 already-selected option. */
214 for (i = 0; i < 4 * nb_strengths; i++) {
215 int j;
216 for (j = 0; j < nb_strengths - 1; j++) {
217 best_lev0[j] = best_lev0[j + 1];
218 best_lev1[j] = best_lev1[j + 1];
219 }
220 best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
221 sb_count, pick_method);
222 }
223 return best_tot_mse;
224 }
225
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)226 static INLINE void init_src_params(int *src_stride, int *width, int *height,
227 int *width_log2, int *height_log2,
228 BLOCK_SIZE bsize) {
229 *src_stride = block_size_wide[bsize];
230 *width = block_size_wide[bsize];
231 *height = block_size_high[bsize];
232 *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
233 *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
234 }
235 #if CONFIG_AV1_HIGHBITDEPTH
236 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)237 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
238 cdef_list *dlist, int cdef_count,
239 BLOCK_SIZE bsize, int coeff_shift,
240 int row, int col) {
241 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
242 bsize == BLOCK_8X8);
243 uint64_t sum = 0;
244 int bi, bx, by;
245 uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
246 uint16_t *dst_buff = &dst16[row * dstride + col];
247 int src_stride, width, height, width_log2, height_log2;
248 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
249 bsize);
250 for (bi = 0; bi < cdef_count; bi++) {
251 by = dlist[bi].by;
252 bx = dlist[bi].bx;
253 sum += aom_mse_wxh_16bit_highbd(
254 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
255 &src[bi << (height_log2 + width_log2)], src_stride, width, height);
256 }
257 return sum >> 2 * coeff_shift;
258 }
259 #endif
260
261 // Checks dual and quad block processing is applicable for block widths 8 and 4
262 // respectively.
is_dual_or_quad_applicable(cdef_list * dlist,int width,int cdef_count,int bi,int iter)263 static INLINE int is_dual_or_quad_applicable(cdef_list *dlist, int width,
264 int cdef_count, int bi, int iter) {
265 assert(width == 8 || width == 4);
266 const int blk_offset = (width == 8) ? 1 : 3;
267 if ((iter + blk_offset) >= cdef_count) return 0;
268
269 if (dlist[bi].by == dlist[bi + blk_offset].by &&
270 dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
271 return 1;
272
273 return 0;
274 }
275
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)276 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
277 cdef_list *dlist, int cdef_count,
278 BLOCK_SIZE bsize, int coeff_shift, int row,
279 int col) {
280 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
281 bsize == BLOCK_8X8);
282 uint64_t sum = 0;
283 int bi, bx, by;
284 int iter = 0;
285 int inc = 1;
286 uint8_t *dst8 = (uint8_t *)dst;
287 uint8_t *dst_buff = &dst8[row * dstride + col];
288 int src_stride, width, height, width_log2, height_log2;
289 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
290 bsize);
291
292 const int num_blks = 16 / width;
293 for (bi = 0; bi < cdef_count; bi += inc) {
294 by = dlist[bi].by;
295 bx = dlist[bi].bx;
296 uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
297 uint8_t *dst_tmp =
298 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
299
300 if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
301 sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
302 iter += num_blks;
303 inc = num_blks;
304 } else {
305 sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
306 height);
307 iter += 1;
308 inc = 1;
309 }
310 }
311
312 return sum >> 2 * coeff_shift;
313 }
314
315 // Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
316 // region is outside frame boundary
fill_borders_for_fbs_on_frame_boundary(uint16_t * inbuf,int hfilt_size,int vfilt_size,bool is_fb_on_frm_left_boundary,bool is_fb_on_frm_right_boundary,bool is_fb_on_frm_top_boundary,bool is_fb_on_frm_bottom_boundary)317 static INLINE void fill_borders_for_fbs_on_frame_boundary(
318 uint16_t *inbuf, int hfilt_size, int vfilt_size,
319 bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
320 bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
321 if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
322 !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
323 return;
324 if (is_fb_on_frm_bottom_boundary) {
325 // Fill bottom region of the block
326 const int buf_offset =
327 (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
328 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
329 CDEF_VERY_LARGE);
330 }
331 if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
332 const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
333 // Fill bottom-left region of the block
334 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
335 CDEF_VERY_LARGE);
336 }
337 if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
338 const int buf_offset =
339 (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
340 // Fill bottom-right region of the block
341 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
342 CDEF_VERY_LARGE);
343 }
344 if (is_fb_on_frm_top_boundary) {
345 // Fill top region of the block
346 fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
347 CDEF_VERY_LARGE);
348 }
349 if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
350 // Fill top-left region of the block
351 fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
352 }
353 if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
354 const int buf_offset = hfilt_size + CDEF_HBORDER;
355 // Fill top-right region of the block
356 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
357 CDEF_VERY_LARGE);
358 }
359 if (is_fb_on_frm_left_boundary) {
360 const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
361 // Fill left region of the block
362 fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
363 CDEF_VERY_LARGE);
364 }
365 if (is_fb_on_frm_right_boundary) {
366 const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
367 // Fill right region of the block
368 fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
369 vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
370 }
371 }
372
373 // Calculate the number of 8x8/4x4 filter units for which SSE can be calculated
374 // after CDEF filtering in single function call
get_error_calc_width_in_filt_units(cdef_list * dlist,int cdef_count,int bi,int subsampling_x,int subsampling_y)375 static AOM_FORCE_INLINE int get_error_calc_width_in_filt_units(
376 cdef_list *dlist, int cdef_count, int bi, int subsampling_x,
377 int subsampling_y) {
378 // TODO(Ranjit): Extend the optimization for 422
379 if (subsampling_x != subsampling_y) return 1;
380
381 // Combining more blocks seems to increase encode time due to increase in
382 // control code
383 if (bi + 3 < cdef_count && dlist[bi].by == dlist[bi + 3].by &&
384 dlist[bi].bx + 3 == dlist[bi + 3].bx) {
385 /* Calculate error for four 8x8/4x4 blocks using 32x8/16x4 block specific
386 * logic if y co-ordinates match and x co-ordinates are
387 * separated by 3 for first and fourth 8x8/4x4 blocks in dlist[]. */
388 return 4;
389 }
390 if (bi + 1 < cdef_count && dlist[bi].by == dlist[bi + 1].by &&
391 dlist[bi].bx + 1 == dlist[bi + 1].bx) {
392 /* Calculate error for two 8x8/4x4 blocks using 16x8/8x4 block specific
393 * logic if their y co-ordinates match and x co-ordinates are
394 * separated by 1 for first and second 8x8/4x4 blocks in dlist[]. */
395 return 2;
396 }
397 return 1;
398 }
399
400 // Returns the block error after CDEF filtering for a given strength
get_filt_error(const CdefSearchCtx * cdef_search_ctx,const struct macroblockd_plane * pd,cdef_list * dlist,int dir[CDEF_NBLOCKS][CDEF_NBLOCKS],int * dirinit,int var[CDEF_NBLOCKS][CDEF_NBLOCKS],uint16_t * in,uint8_t * ref_buffer,int ref_stride,int row,int col,int pri_strength,int sec_strength,int cdef_count,int pli,int coeff_shift,BLOCK_SIZE bs)401 static INLINE uint64_t get_filt_error(
402 const CdefSearchCtx *cdef_search_ctx, const struct macroblockd_plane *pd,
403 cdef_list *dlist, int dir[CDEF_NBLOCKS][CDEF_NBLOCKS], int *dirinit,
404 int var[CDEF_NBLOCKS][CDEF_NBLOCKS], uint16_t *in, uint8_t *ref_buffer,
405 int ref_stride, int row, int col, int pri_strength, int sec_strength,
406 int cdef_count, int pli, int coeff_shift, BLOCK_SIZE bs) {
407 uint64_t curr_sse = 0;
408 const BLOCK_SIZE plane_bsize =
409 get_plane_block_size(bs, pd->subsampling_x, pd->subsampling_y);
410 const int bw_log2 = 3 - pd->subsampling_x;
411 const int bh_log2 = 3 - pd->subsampling_y;
412
413 // TODO(Ranjit): Extend this optimization for HBD
414 if (!cdef_search_ctx->use_highbitdepth) {
415 // If all 8x8/4x4 blocks in CDEF block need to be filtered, calculate the
416 // error at CDEF block level
417 const int tot_blk_count =
418 (block_size_wide[plane_bsize] * block_size_high[plane_bsize]) >>
419 (bw_log2 + bh_log2);
420 if (cdef_count == tot_blk_count) {
421 // Calculate the offset in the buffer based on block position
422 const FULLPEL_MV this_mv = { row, col };
423 const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
424 if (pri_strength == 0 && sec_strength == 0) {
425 // When CDEF strength is zero, filtering is not applied. Hence
426 // error is calculated between source and unfiltered pixels
427 curr_sse =
428 aom_sse(&ref_buffer[buf_offset], ref_stride,
429 get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
430 block_size_wide[plane_bsize], block_size_high[plane_bsize]);
431 } else {
432 DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
433
434 av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
435 cdef_search_ctx->xdec[pli],
436 cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
437 dlist, cdef_count, pri_strength,
438 sec_strength + (sec_strength == 3),
439 cdef_search_ctx->damping, coeff_shift);
440 curr_sse =
441 aom_sse(&ref_buffer[buf_offset], ref_stride, tmp_dst8,
442 (1 << MAX_SB_SIZE_LOG2), block_size_wide[plane_bsize],
443 block_size_high[plane_bsize]);
444 }
445 } else {
446 // If few 8x8/4x4 blocks in CDEF block need to be filtered, filtering
447 // functions produce 8-bit output and the error is calculated in 8-bit
448 // domain
449 if (pri_strength == 0 && sec_strength == 0) {
450 int num_error_calc_filt_units = 1;
451 for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
452 const uint8_t by = dlist[bi].by;
453 const uint8_t bx = dlist[bi].bx;
454 const int16_t by_pos = (by << bh_log2);
455 const int16_t bx_pos = (bx << bw_log2);
456 // Calculate the offset in the buffer based on block position
457 const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
458 const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
459 num_error_calc_filt_units = get_error_calc_width_in_filt_units(
460 dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
461 curr_sse += aom_sse(
462 &ref_buffer[buf_offset], ref_stride,
463 get_buf_from_fullmv(&pd->dst, &this_mv), pd->dst.stride,
464 num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
465 }
466 } else {
467 DECLARE_ALIGNED(32, uint8_t, tmp_dst8[1 << (MAX_SB_SIZE_LOG2 * 2)]);
468 av1_cdef_filter_fb(tmp_dst8, NULL, (1 << MAX_SB_SIZE_LOG2), in,
469 cdef_search_ctx->xdec[pli],
470 cdef_search_ctx->ydec[pli], dir, dirinit, var, pli,
471 dlist, cdef_count, pri_strength,
472 sec_strength + (sec_strength == 3),
473 cdef_search_ctx->damping, coeff_shift);
474 int num_error_calc_filt_units = 1;
475 for (int bi = 0; bi < cdef_count; bi = bi + num_error_calc_filt_units) {
476 const uint8_t by = dlist[bi].by;
477 const uint8_t bx = dlist[bi].bx;
478 const int16_t by_pos = (by << bh_log2);
479 const int16_t bx_pos = (bx << bw_log2);
480 // Calculate the offset in the buffer based on block position
481 const FULLPEL_MV this_mv = { row + by_pos, col + bx_pos };
482 const FULLPEL_MV tmp_buf_pos = { by_pos, bx_pos };
483 const int buf_offset = get_offset_from_fullmv(&this_mv, ref_stride);
484 const int tmp_buf_offset =
485 get_offset_from_fullmv(&tmp_buf_pos, (1 << MAX_SB_SIZE_LOG2));
486 num_error_calc_filt_units = get_error_calc_width_in_filt_units(
487 dlist, cdef_count, bi, pd->subsampling_x, pd->subsampling_y);
488 curr_sse += aom_sse(
489 &ref_buffer[buf_offset], ref_stride, &tmp_dst8[tmp_buf_offset],
490 (1 << MAX_SB_SIZE_LOG2),
491 num_error_calc_filt_units * (1 << bw_log2), (1 << bh_log2));
492 }
493 }
494 }
495 } else {
496 DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
497
498 av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
499 cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
500 dir, dirinit, var, pli, dlist, cdef_count, pri_strength,
501 sec_strength + (sec_strength == 3),
502 cdef_search_ctx->damping, coeff_shift);
503 curr_sse = cdef_search_ctx->compute_cdef_dist_fn(
504 ref_buffer, ref_stride, tmp_dst, dlist, cdef_count,
505 cdef_search_ctx->bsize[pli], coeff_shift, row, col);
506 }
507 return curr_sse;
508 }
509
510 // Calculates MSE at block level.
511 // Inputs:
512 // cdef_search_ctx: Pointer to the structure containing parameters related to
513 // CDEF search context.
514 // fbr: Row index in units of 64x64 block
515 // fbc: Column index in units of 64x64 block
516 // Returns:
517 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,int fbr,int fbc,int sb_count)518 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
519 int sb_count) {
520 const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
521 const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
522 const int coeff_shift = cdef_search_ctx->coeff_shift;
523 const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
524 const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
525
526 // Declare and initialize the temporary buffers.
527 DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
528 cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
529 int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
530 int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
531 uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
532 int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
533 int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
534 int hb_step = 1, vb_step = 1;
535 BLOCK_SIZE bs;
536
537 const MB_MODE_INFO *const mbmi =
538 mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
539 MI_SIZE_64X64 * fbc];
540
541 uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
542 ref->v_buffer };
543 int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
544 ref->uv_stride };
545
546 if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
547 mbmi->bsize == BLOCK_64X128) {
548 bs = mbmi->bsize;
549 if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
550 nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
551 hb_step = 2;
552 }
553 if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
554 nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
555 vb_step = 2;
556 }
557 } else {
558 bs = BLOCK_64X64;
559 }
560 // Get number of 8x8 blocks which are not skip. Cdef processing happens for
561 // 8x8 blocks which are not skip.
562 const int cdef_count = av1_cdef_compute_sb_list(
563 mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
564 const bool is_fb_on_frm_left_boundary = (fbc == 0);
565 const bool is_fb_on_frm_right_boundary =
566 (fbc + hb_step == cdef_search_ctx->nhfb);
567 const bool is_fb_on_frm_top_boundary = (fbr == 0);
568 const bool is_fb_on_frm_bottom_boundary =
569 (fbr + vb_step == cdef_search_ctx->nvfb);
570 const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
571 const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
572 int dirinit = 0;
573 for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
574 /* We avoid filtering the pixels for which some of the pixels to
575 average are outside the frame. We could change the filter instead,
576 but it would add special cases for any future vectorization. */
577 const int hfilt_size = (nhb << mi_wide_l2[pli]);
578 const int vfilt_size = (nvb << mi_high_l2[pli]);
579 const int ysize =
580 vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
581 const int xsize =
582 hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
583 const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
584 const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
585 struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
586 cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
587 pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
588 ysize, xsize);
589 fill_borders_for_fbs_on_frame_boundary(
590 inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
591 is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
592 is_fb_on_frm_bottom_boundary);
593 for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
594 int pri_strength, sec_strength;
595 get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
596 &sec_strength, gi);
597 const uint64_t curr_mse = get_filt_error(
598 cdef_search_ctx, &pd, dlist, dir, &dirinit, var, in, ref_buffer[pli],
599 ref_stride[pli], row, col, pri_strength, sec_strength, cdef_count,
600 pli, coeff_shift, bs);
601 if (pli < 2)
602 cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
603 else
604 cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
605 }
606 }
607 cdef_search_ctx->sb_index[sb_count] =
608 MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
609 }
610
611 // MSE calculation at frame level.
612 // Inputs:
613 // cdef_search_ctx: Pointer to the structure containing parameters related to
614 // CDEF search context.
615 // Returns:
616 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx)617 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
618 // Loop over each sb.
619 for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
620 for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
621 // Checks if cdef processing can be skipped for particular sb.
622 if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
623 // Calculate mse for each sb and store the relevant sb index.
624 av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc,
625 cdef_search_ctx->sb_count);
626 cdef_search_ctx->sb_count++;
627 }
628 }
629 }
630
631 // Allocates memory for members of CdefSearchCtx.
632 // Inputs:
633 // cdef_search_ctx: Pointer to the structure containing parameters
634 // related to CDEF search context.
635 // Returns:
636 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(CdefSearchCtx * cdef_search_ctx)637 static AOM_INLINE bool cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
638 const int nvfb = cdef_search_ctx->nvfb;
639 const int nhfb = cdef_search_ctx->nhfb;
640 cdef_search_ctx->sb_index =
641 aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index));
642 cdef_search_ctx->sb_count = 0;
643 cdef_search_ctx->mse[0] =
644 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
645 cdef_search_ctx->mse[1] =
646 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
647 if (!(cdef_search_ctx->sb_index && cdef_search_ctx->mse[0] &&
648 cdef_search_ctx->mse[1])) {
649 aom_free(cdef_search_ctx->sb_index);
650 aom_free(cdef_search_ctx->mse[0]);
651 aom_free(cdef_search_ctx->mse[1]);
652 return false;
653 }
654 return true;
655 }
656
657 // Deallocates the memory allocated for members of CdefSearchCtx.
658 // Inputs:
659 // cdef_search_ctx: Pointer to the structure containing parameters
660 // related to CDEF search context.
661 // Returns:
662 // Nothing will be returned.
cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)663 static AOM_INLINE void cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
664 aom_free(cdef_search_ctx->mse[0]);
665 aom_free(cdef_search_ctx->mse[1]);
666 aom_free(cdef_search_ctx->sb_index);
667 }
668
669 // Initialize the parameters related to CDEF search context.
670 // Inputs:
671 // frame: Pointer to compressed frame buffer
672 // ref: Pointer to the frame buffer holding the source frame
673 // cm: Pointer to top level common structure
674 // xd: Pointer to common current coding block structure
675 // cdef_search_ctx: Pointer to the structure containing parameters related to
676 // CDEF search context.
677 // pick_method: Search method used to select CDEF parameters
678 // Returns:
679 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)680 static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
681 const YV12_BUFFER_CONFIG *ref,
682 AV1_COMMON *cm, MACROBLOCKD *xd,
683 CdefSearchCtx *cdef_search_ctx,
684 CDEF_PICK_METHOD pick_method) {
685 const CommonModeInfoParams *const mi_params = &cm->mi_params;
686 const int num_planes = av1_num_planes(cm);
687 cdef_search_ctx->mi_params = &cm->mi_params;
688 cdef_search_ctx->ref = ref;
689 cdef_search_ctx->nvfb =
690 (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
691 cdef_search_ctx->nhfb =
692 (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
693 cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
694 cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
695 cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
696 cdef_search_ctx->num_planes = num_planes;
697 cdef_search_ctx->pick_method = pick_method;
698 cdef_search_ctx->sb_count = 0;
699 cdef_search_ctx->use_highbitdepth = cm->seq_params->use_highbitdepth;
700 av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
701 num_planes);
702 // Initialize plane wise information.
703 for (int pli = 0; pli < num_planes; pli++) {
704 cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
705 cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
706 cdef_search_ctx->bsize[pli] =
707 cdef_search_ctx->ydec[pli]
708 ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
709 : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
710 cdef_search_ctx->mi_wide_l2[pli] =
711 MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
712 cdef_search_ctx->mi_high_l2[pli] =
713 MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
714 cdef_search_ctx->plane[pli] = xd->plane[pli];
715 }
716 // Function pointer initialization.
717 #if CONFIG_AV1_HIGHBITDEPTH
718 if (cm->seq_params->use_highbitdepth) {
719 cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_highbd;
720 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
721 } else {
722 cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
723 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
724 }
725 #else
726 cdef_search_ctx->copy_fn = av1_cdef_copy_sb8_16_lowbd;
727 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
728 #endif
729 }
730
pick_cdef_from_qp(AV1_COMMON * const cm,int skip_cdef,int is_screen_content)731 static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
732 int is_screen_content) {
733 const int bd = cm->seq_params->bit_depth;
734 const int q =
735 av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
736 CdefInfo *const cdef_info = &cm->cdef_info;
737 // Check the speed feature to avoid extra signaling.
738 if (skip_cdef) {
739 cdef_info->cdef_bits = 1;
740 cdef_info->nb_cdef_strengths = 2;
741 } else {
742 cdef_info->cdef_bits = 0;
743 cdef_info->nb_cdef_strengths = 1;
744 }
745 cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
746
747 int predicted_y_f1 = 0;
748 int predicted_y_f2 = 0;
749 int predicted_uv_f1 = 0;
750 int predicted_uv_f2 = 0;
751 if (is_screen_content) {
752 predicted_y_f1 =
753 (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
754 predicted_y_f2 =
755 (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
756 predicted_uv_f1 =
757 (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
758 predicted_uv_f2 =
759 (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
760 predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
761 predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
762 predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
763 predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
764 } else {
765 if (!frame_is_intra_only(cm)) {
766 predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
767 q * 0.0068615186f + 0.02709886f),
768 0, 15);
769 predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
770 q * 0.0013993345f + 0.03831067f),
771 0, 3);
772 predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
773 q * 0.0034628846f + 0.00887099f),
774 0, 15);
775 predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
776 q * 0.00028223585f + 0.05576307f),
777 0, 3);
778 } else {
779 predicted_y_f1 = clamp(
780 (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
781 0, 15);
782 predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
783 q * 0.0027798624f + 0.0079405f),
784 0, 3);
785 predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
786 q * 0.012892405f - 0.00748388f),
787 0, 15);
788 predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
789 q * 0.00035520183f + 0.00228092f),
790 0, 3);
791 }
792 }
793 cdef_info->cdef_strengths[0] =
794 predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
795 cdef_info->cdef_uv_strengths[0] =
796 predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
797
798 // mbmi->cdef_strength is already set in the encoding stage. We don't need to
799 // set it again here.
800 if (skip_cdef) {
801 cdef_info->cdef_strengths[1] = 0;
802 cdef_info->cdef_uv_strengths[1] = 0;
803 return;
804 }
805
806 const CommonModeInfoParams *const mi_params = &cm->mi_params;
807 const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
808 const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
809 MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
810 for (int r = 0; r < nvfb; ++r) {
811 for (int c = 0; c < nhfb; ++c) {
812 MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
813 current_mbmi->cdef_strength = 0;
814 }
815 mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
816 }
817 }
818
av1_cdef_search(MultiThreadInfo * mt_info,const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CDEF_PICK_METHOD pick_method,int rdmult,int skip_cdef_feature,CDEF_CONTROL cdef_control,const int is_screen_content,int non_reference_frame)819 void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
820 const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
821 MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
822 int skip_cdef_feature, CDEF_CONTROL cdef_control,
823 const int is_screen_content, int non_reference_frame) {
824 assert(cdef_control != CDEF_NONE);
825 if (cdef_control == CDEF_REFERENCE && non_reference_frame) {
826 CdefInfo *const cdef_info = &cm->cdef_info;
827 cdef_info->nb_cdef_strengths = 1;
828 cdef_info->cdef_bits = 0;
829 cdef_info->cdef_strengths[0] = 0;
830 cdef_info->cdef_uv_strengths[0] = 0;
831 return;
832 }
833
834 if (pick_method == CDEF_PICK_FROM_Q) {
835 pick_cdef_from_qp(cm, skip_cdef_feature, is_screen_content);
836 return;
837 }
838 const CommonModeInfoParams *const mi_params = &cm->mi_params;
839 const int damping = 3 + (cm->quant_params.base_qindex >> 6);
840 const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
841 pick_method <= CDEF_FAST_SEARCH_LVL5);
842 const int num_planes = av1_num_planes(cm);
843 CdefSearchCtx cdef_search_ctx;
844 // Initialize parameters related to CDEF search context.
845 cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method);
846 // Allocate CDEF search context buffers.
847 if (!cdef_alloc_data(&cdef_search_ctx)) {
848 CdefInfo *const cdef_info = &cm->cdef_info;
849 cdef_info->nb_cdef_strengths = 0;
850 cdef_info->cdef_bits = 0;
851 cdef_info->cdef_strengths[0] = 0;
852 cdef_info->cdef_uv_strengths[0] = 0;
853 return;
854 }
855 // Frame level mse calculation.
856 if (mt_info->num_workers > 1) {
857 av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
858 } else {
859 cdef_mse_calc_frame(&cdef_search_ctx);
860 }
861
862 /* Search for different number of signaling bits. */
863 int nb_strength_bits = 0;
864 uint64_t best_rd = UINT64_MAX;
865 CdefInfo *const cdef_info = &cm->cdef_info;
866 int sb_count = cdef_search_ctx.sb_count;
867 uint64_t(*mse[2])[TOTAL_STRENGTHS];
868 mse[0] = cdef_search_ctx.mse[0];
869 mse[1] = cdef_search_ctx.mse[1];
870 /* Calculate the maximum number of bits required to signal CDEF strengths at
871 * block level */
872 const int total_strengths = nb_cdef_strengths[pick_method];
873 const int joint_strengths =
874 num_planes > 1 ? total_strengths * total_strengths : total_strengths;
875 const int max_signaling_bits =
876 joint_strengths == 1 ? 0 : get_msb(joint_strengths - 1) + 1;
877 for (int i = 0; i <= 3; i++) {
878 if (i > max_signaling_bits) break;
879 int best_lev0[CDEF_MAX_STRENGTHS];
880 int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
881 const int nb_strengths = 1 << i;
882 uint64_t tot_mse;
883 if (num_planes > 1) {
884 tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
885 mse, sb_count, pick_method);
886 } else {
887 tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
888 pick_method);
889 }
890
891 const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
892 (num_planes > 1 ? 2 : 1);
893 const int rate_cost = av1_cost_literal(total_bits);
894 const uint64_t dist = tot_mse * 16;
895 const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
896 if (rd < best_rd) {
897 best_rd = rd;
898 nb_strength_bits = i;
899 memcpy(cdef_info->cdef_strengths, best_lev0,
900 nb_strengths * sizeof(best_lev0[0]));
901 if (num_planes > 1) {
902 memcpy(cdef_info->cdef_uv_strengths, best_lev1,
903 nb_strengths * sizeof(best_lev1[0]));
904 }
905 }
906 }
907
908 cdef_info->cdef_bits = nb_strength_bits;
909 cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
910 for (int i = 0; i < sb_count; i++) {
911 uint64_t best_mse = UINT64_MAX;
912 int best_gi = 0;
913 for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
914 uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
915 if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
916 if (curr < best_mse) {
917 best_gi = gi;
918 best_mse = curr;
919 }
920 }
921 mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength =
922 best_gi;
923 }
924 if (fast) {
925 for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
926 const int luma_strength = cdef_info->cdef_strengths[j];
927 const int chroma_strength = cdef_info->cdef_uv_strengths[j];
928 int pri_strength, sec_strength;
929
930 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
931 luma_strength);
932 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
933 chroma_strength);
934 }
935 }
936
937 cdef_info->cdef_damping = damping;
938 // Deallocate CDEF search context buffers.
939 cdef_dealloc_data(&cdef_search_ctx);
940 }
941