1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <string.h>
14
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/aom_scale_rtcd.h"
17
18 #include "aom/aom_integer.h"
19 #include "av1/common/av1_common_int.h"
20 #include "av1/common/reconinter.h"
21 #include "av1/encoder/encoder.h"
22 #include "av1/encoder/ethread.h"
23 #include "av1/encoder/pickcdef.h"
24
25 // Get primary and secondary filter strength for the given strength index and
26 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)27 static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
28 int *pri_strength,
29 int *sec_strength,
30 int strength_idx) {
31 const int tot_sec_filter = (pick_method >= CDEF_FAST_SEARCH_LVL3)
32 ? REDUCED_SEC_STRENGTHS_LVL3
33 : CDEF_SEC_STRENGTHS;
34 const int pri_idx = strength_idx / tot_sec_filter;
35 const int sec_idx = strength_idx % tot_sec_filter;
36 *pri_strength = pri_idx;
37 *sec_strength = sec_idx;
38 if (pick_method == CDEF_FULL_SEARCH) return;
39
40 switch (pick_method) {
41 case CDEF_FAST_SEARCH_LVL1: *pri_strength = priconv_lvl1[pri_idx]; break;
42 case CDEF_FAST_SEARCH_LVL2: *pri_strength = priconv_lvl2[pri_idx]; break;
43 case CDEF_FAST_SEARCH_LVL3:
44 *pri_strength = priconv_lvl2[pri_idx];
45 *sec_strength = secconv_lvl3[sec_idx];
46 break;
47 case CDEF_FAST_SEARCH_LVL4:
48 *pri_strength = priconv_lvl4[pri_idx];
49 *sec_strength = secconv_lvl3[sec_idx];
50 break;
51 default: assert(0 && "Invalid CDEF search method");
52 }
53 }
54
55 // Store CDEF filter strength calculated from strength index for given search
56 // method
57 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
58 get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \
59 (strength_idx)); \
60 cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;
61
62 /* Search for the best strength to add as an option, knowing we
63 already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)64 static uint64_t search_one(int *lev, int nb_strengths,
65 uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
66 CDEF_PICK_METHOD pick_method) {
67 uint64_t tot_mse[TOTAL_STRENGTHS];
68 const int total_strengths = nb_cdef_strengths[pick_method];
69 int i, j;
70 uint64_t best_tot_mse = (uint64_t)1 << 63;
71 int best_id = 0;
72 memset(tot_mse, 0, sizeof(tot_mse));
73 for (i = 0; i < sb_count; i++) {
74 int gi;
75 uint64_t best_mse = (uint64_t)1 << 63;
76 /* Find best mse among already selected options. */
77 for (gi = 0; gi < nb_strengths; gi++) {
78 if (mse[i][lev[gi]] < best_mse) {
79 best_mse = mse[i][lev[gi]];
80 }
81 }
82 /* Find best mse when adding each possible new option. */
83 for (j = 0; j < total_strengths; j++) {
84 uint64_t best = best_mse;
85 if (mse[i][j] < best) best = mse[i][j];
86 tot_mse[j] += best;
87 }
88 }
89 for (j = 0; j < total_strengths; j++) {
90 if (tot_mse[j] < best_tot_mse) {
91 best_tot_mse = tot_mse[j];
92 best_id = j;
93 }
94 }
95 lev[nb_strengths] = best_id;
96 return best_tot_mse;
97 }
98
99 /* Search for the best luma+chroma strength to add as an option, knowing we
100 already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)101 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
102 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
103 CDEF_PICK_METHOD pick_method) {
104 uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
105 int i, j;
106 uint64_t best_tot_mse = (uint64_t)1 << 63;
107 int best_id0 = 0;
108 int best_id1 = 0;
109 const int total_strengths = nb_cdef_strengths[pick_method];
110 memset(tot_mse, 0, sizeof(tot_mse));
111 for (i = 0; i < sb_count; i++) {
112 int gi;
113 uint64_t best_mse = (uint64_t)1 << 63;
114 /* Find best mse among already selected options. */
115 for (gi = 0; gi < nb_strengths; gi++) {
116 uint64_t curr = mse[0][i][lev0[gi]];
117 curr += mse[1][i][lev1[gi]];
118 if (curr < best_mse) {
119 best_mse = curr;
120 }
121 }
122 /* Find best mse when adding each possible new option. */
123 for (j = 0; j < total_strengths; j++) {
124 int k;
125 for (k = 0; k < total_strengths; k++) {
126 uint64_t best = best_mse;
127 uint64_t curr = mse[0][i][j];
128 curr += mse[1][i][k];
129 if (curr < best) best = curr;
130 tot_mse[j][k] += best;
131 }
132 }
133 }
134 for (j = 0; j < total_strengths; j++) {
135 int k;
136 for (k = 0; k < total_strengths; k++) {
137 if (tot_mse[j][k] < best_tot_mse) {
138 best_tot_mse = tot_mse[j][k];
139 best_id0 = j;
140 best_id1 = k;
141 }
142 }
143 }
144 lev0[nb_strengths] = best_id0;
145 lev1[nb_strengths] = best_id1;
146 return best_tot_mse;
147 }
148
149 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)150 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
151 uint64_t mse[][TOTAL_STRENGTHS],
152 int sb_count,
153 CDEF_PICK_METHOD pick_method) {
154 uint64_t best_tot_mse;
155 int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
156 pick_method <= CDEF_FAST_SEARCH_LVL4);
157 int i;
158 best_tot_mse = (uint64_t)1 << 63;
159 /* Greedy search: add one strength options at a time. */
160 for (i = 0; i < nb_strengths; i++) {
161 best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
162 }
163 /* Trying to refine the greedy search by reconsidering each
164 already-selected option. */
165 if (!fast) {
166 for (i = 0; i < 4 * nb_strengths; i++) {
167 int j;
168 for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
169 best_tot_mse =
170 search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
171 }
172 }
173 return best_tot_mse;
174 }
175
176 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)177 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
178 int nb_strengths,
179 uint64_t (**mse)[TOTAL_STRENGTHS],
180 int sb_count,
181 CDEF_PICK_METHOD pick_method) {
182 uint64_t best_tot_mse;
183 int i;
184 best_tot_mse = (uint64_t)1 << 63;
185 /* Greedy search: add one strength options at a time. */
186 for (i = 0; i < nb_strengths; i++) {
187 best_tot_mse =
188 search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
189 }
190 /* Trying to refine the greedy search by reconsidering each
191 already-selected option. */
192 for (i = 0; i < 4 * nb_strengths; i++) {
193 int j;
194 for (j = 0; j < nb_strengths - 1; j++) {
195 best_lev0[j] = best_lev0[j + 1];
196 best_lev1[j] = best_lev1[j + 1];
197 }
198 best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
199 sb_count, pick_method);
200 }
201 return best_tot_mse;
202 }
203
204 #if CONFIG_AV1_HIGHBITDEPTH
copy_sb16_16_highbd(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)205 static void copy_sb16_16_highbd(uint16_t *dst, int dstride, const void *src,
206 int src_voffset, int src_hoffset, int sstride,
207 int vsize, int hsize) {
208 int r;
209 const uint16_t *src16 = CONVERT_TO_SHORTPTR((uint8_t *)src);
210 const uint16_t *base = &src16[src_voffset * sstride + src_hoffset];
211 for (r = 0; r < vsize; r++)
212 memcpy(dst + r * dstride, base + r * sstride, hsize * sizeof(*base));
213 }
214 #endif
215
copy_sb16_16(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)216 static void copy_sb16_16(uint16_t *dst, int dstride, const void *src,
217 int src_voffset, int src_hoffset, int sstride,
218 int vsize, int hsize) {
219 int r, c;
220 const uint8_t *src8 = (uint8_t *)src;
221 const uint8_t *base = &src8[src_voffset * sstride + src_hoffset];
222 for (r = 0; r < vsize; r++)
223 for (c = 0; c < hsize; c++)
224 dst[r * dstride + c] = (uint16_t)base[r * sstride + c];
225 }
226
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)227 static INLINE void init_src_params(int *src_stride, int *width, int *height,
228 int *width_log2, int *height_log2,
229 BLOCK_SIZE bsize) {
230 *src_stride = block_size_wide[bsize];
231 *width = block_size_wide[bsize];
232 *height = block_size_high[bsize];
233 *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
234 *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
235 }
236 #if CONFIG_AV1_HIGHBITDEPTH
237 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)238 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
239 cdef_list *dlist, int cdef_count,
240 BLOCK_SIZE bsize, int coeff_shift,
241 int row, int col) {
242 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
243 bsize == BLOCK_8X8);
244 uint64_t sum = 0;
245 int bi, bx, by;
246 uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
247 uint16_t *dst_buff = &dst16[row * dstride + col];
248 int src_stride, width, height, width_log2, height_log2;
249 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
250 bsize);
251 for (bi = 0; bi < cdef_count; bi++) {
252 by = dlist[bi].by;
253 bx = dlist[bi].bx;
254 sum += aom_mse_wxh_16bit_highbd(
255 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
256 &src[bi << (height_log2 + width_log2)], src_stride, width, height);
257 }
258 return sum >> 2 * coeff_shift;
259 }
260 #endif
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)261 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
262 cdef_list *dlist, int cdef_count,
263 BLOCK_SIZE bsize, int coeff_shift, int row,
264 int col) {
265 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
266 bsize == BLOCK_8X8);
267 uint64_t sum = 0;
268 int bi, bx, by;
269 uint8_t *dst8 = (uint8_t *)dst;
270 uint8_t *dst_buff = &dst8[row * dstride + col];
271 int src_stride, width, height, width_log2, height_log2;
272 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
273 bsize);
274 for (bi = 0; bi < cdef_count; bi++) {
275 by = dlist[bi].by;
276 bx = dlist[bi].bx;
277 sum += aom_mse_wxh_16bit(
278 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
279 &src[bi << (height_log2 + width_log2)], src_stride, width, height);
280 }
281 return sum >> 2 * coeff_shift;
282 }
283
284 // Calculates MSE at block level.
285 // Inputs:
286 // cdef_search_ctx: Pointer to the structure containing parameters related to
287 // CDEF search context.
288 // fbr: Row index in units of 64x64 block
289 // fbc: Column index in units of 64x64 block
290 // Returns:
291 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,int fbr,int fbc,int sb_count)292 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
293 int sb_count) {
294 const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
295 const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
296 const int coeff_shift = cdef_search_ctx->coeff_shift;
297 const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
298 const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
299
300 // Declare and initialize the temporary buffers.
301 DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
302 DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
303 cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
304 int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
305 int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
306 uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
307 int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
308 int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
309 int hb_step = 1, vb_step = 1;
310 BLOCK_SIZE bs;
311
312 const MB_MODE_INFO *const mbmi =
313 mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
314 MI_SIZE_64X64 * fbc];
315
316 uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
317 ref->v_buffer };
318 int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
319 ref->uv_stride };
320
321 if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
322 mbmi->bsize == BLOCK_64X128) {
323 bs = mbmi->bsize;
324 if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
325 nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
326 hb_step = 2;
327 }
328 if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
329 nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
330 vb_step = 2;
331 }
332 } else {
333 bs = BLOCK_64X64;
334 }
335 // Get number of 8x8 blocks which are not skip. Cdef processing happens for
336 // 8x8 blocks which are not skip.
337 const int cdef_count = av1_cdef_compute_sb_list(
338 mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
339
340 const int yoff = CDEF_VBORDER * (fbr != 0);
341 const int xoff = CDEF_HBORDER * (fbc != 0);
342 int dirinit = 0;
343 for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
344 for (int i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
345 /* We avoid filtering the pixels for which some of the pixels to
346 average are outside the frame. We could change the filter instead,
347 but it would add special cases for any future vectorization. */
348 const int ysize = (nvb << mi_high_l2[pli]) +
349 CDEF_VBORDER * (fbr + vb_step < cdef_search_ctx->nvfb) +
350 yoff;
351 const int xsize = (nhb << mi_wide_l2[pli]) +
352 CDEF_HBORDER * (fbc + hb_step < cdef_search_ctx->nhfb) +
353 xoff;
354 const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
355 const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
356 struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
357 cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
358 pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
359 ysize, xsize);
360 for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
361 int pri_strength, sec_strength;
362 get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
363 &sec_strength, gi);
364 av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
365 cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
366 dir, &dirinit, var, pli, dlist, cdef_count,
367 pri_strength, sec_strength + (sec_strength == 3),
368 cdef_search_ctx->damping, coeff_shift);
369 const uint64_t curr_mse = cdef_search_ctx->compute_cdef_dist_fn(
370 ref_buffer[pli], ref_stride[pli], tmp_dst, dlist, cdef_count,
371 cdef_search_ctx->bsize[pli], coeff_shift, row, col);
372 if (pli < 2)
373 cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
374 else
375 cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
376 }
377 }
378 cdef_search_ctx->sb_index[sb_count] =
379 MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
380 }
381
382 // MSE calculation at frame level.
383 // Inputs:
384 // cdef_search_ctx: Pointer to the structure containing parameters related to
385 // CDEF search context.
386 // Returns:
387 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx)388 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
389 // Loop over each sb.
390 for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
391 for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
392 // Checks if cdef processing can be skipped for particular sb.
393 if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
394 // Calculate mse for each sb and store the relevant sb index.
395 av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc,
396 cdef_search_ctx->sb_count);
397 cdef_search_ctx->sb_count++;
398 }
399 }
400 }
401
402 // Allocates memory for members of CdefSearchCtx.
403 // Inputs:
404 // cdef_search_ctx: Pointer to the structure containing parameters
405 // related to CDEF search context.
406 // Returns:
407 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(CdefSearchCtx * cdef_search_ctx)408 static AOM_INLINE void cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
409 const int nvfb = cdef_search_ctx->nvfb;
410 const int nhfb = cdef_search_ctx->nhfb;
411 cdef_search_ctx->sb_index =
412 aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index));
413 cdef_search_ctx->sb_count = 0;
414 cdef_search_ctx->mse[0] =
415 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
416 cdef_search_ctx->mse[1] =
417 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
418 }
419
420 // Deallocates the memory allocated for members of CdefSearchCtx.
421 // Inputs:
422 // cdef_search_ctx: Pointer to the structure containing parameters
423 // related to CDEF search context.
424 // Returns:
425 // Nothing will be returned.
cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)426 static AOM_INLINE void cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
427 aom_free(cdef_search_ctx->mse[0]);
428 aom_free(cdef_search_ctx->mse[1]);
429 aom_free(cdef_search_ctx->sb_index);
430 }
431
432 // Initialize the parameters related to CDEF search context.
433 // Inputs:
434 // frame: Pointer to compressed frame buffer
435 // ref: Pointer to the frame buffer holding the source frame
436 // cm: Pointer to top level common structure
437 // xd: Pointer to common current coding block structure
438 // cdef_search_ctx: Pointer to the structure containing parameters related to
439 // CDEF search context.
440 // pick_method: Search method used to select CDEF parameters
441 // Returns:
442 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)443 static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
444 const YV12_BUFFER_CONFIG *ref,
445 AV1_COMMON *cm, MACROBLOCKD *xd,
446 CdefSearchCtx *cdef_search_ctx,
447 CDEF_PICK_METHOD pick_method) {
448 const CommonModeInfoParams *const mi_params = &cm->mi_params;
449 const int num_planes = av1_num_planes(cm);
450 cdef_search_ctx->mi_params = &cm->mi_params;
451 cdef_search_ctx->ref = ref;
452 cdef_search_ctx->nvfb =
453 (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
454 cdef_search_ctx->nhfb =
455 (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
456 cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
457 cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
458 cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
459 cdef_search_ctx->num_planes = num_planes;
460 cdef_search_ctx->pick_method = pick_method;
461 cdef_search_ctx->sb_count = 0;
462 av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
463 num_planes);
464 // Initialize plane wise information.
465 for (int pli = 0; pli < num_planes; pli++) {
466 cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
467 cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
468 cdef_search_ctx->bsize[pli] =
469 cdef_search_ctx->ydec[pli]
470 ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
471 : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
472 cdef_search_ctx->mi_wide_l2[pli] =
473 MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
474 cdef_search_ctx->mi_high_l2[pli] =
475 MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
476 cdef_search_ctx->plane[pli] = xd->plane[pli];
477 }
478 // Function pointer initialization.
479 #if CONFIG_AV1_HIGHBITDEPTH
480 if (cm->seq_params->use_highbitdepth) {
481 cdef_search_ctx->copy_fn = copy_sb16_16_highbd;
482 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
483 } else {
484 cdef_search_ctx->copy_fn = copy_sb16_16;
485 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
486 }
487 #else
488 cdef_search_ctx->copy_fn = copy_sb16_16;
489 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
490 #endif
491 }
492
pick_cdef_from_qp(AV1_COMMON * const cm,int skip_cdef,int frames_since_key)493 static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
494 int frames_since_key) {
495 const int bd = cm->seq_params->bit_depth;
496 const int q =
497 av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
498 CdefInfo *const cdef_info = &cm->cdef_info;
499 // Check the speed feature to avoid extra signaling.
500 if (skip_cdef) {
501 cdef_info->cdef_bits = 1;
502 cdef_info->nb_cdef_strengths = 2;
503 } else {
504 cdef_info->cdef_bits = 0;
505 cdef_info->nb_cdef_strengths = 1;
506 }
507 cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
508
509 int predicted_y_f1 = 0;
510 int predicted_y_f2 = 0;
511 int predicted_uv_f1 = 0;
512 int predicted_uv_f2 = 0;
513 if (!frame_is_intra_only(cm)) {
514 predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
515 q * 0.0068615186f + 0.02709886f),
516 0, 15);
517 predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
518 q * 0.0013993345f + 0.03831067f),
519 0, 3);
520 predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
521 q * 0.0034628846f + 0.00887099f),
522 0, 15);
523 predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
524 q * 0.00028223585f + 0.05576307f),
525 0, 3);
526 } else {
527 predicted_y_f1 = clamp(
528 (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
529 0, 15);
530 predicted_y_f2 = clamp(
531 (int)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f),
532 0, 3);
533 predicted_uv_f1 = clamp(
534 (int)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f),
535 0, 15);
536 predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
537 q * 0.00035520183f + 0.00228092f),
538 0, 3);
539 }
540 cdef_info->cdef_strengths[0] =
541 predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
542 cdef_info->cdef_uv_strengths[0] =
543 predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
544
545 if (skip_cdef) {
546 cdef_info->cdef_strengths[1] = 0;
547 cdef_info->cdef_uv_strengths[1] = 0;
548 }
549 const CommonModeInfoParams *const mi_params = &cm->mi_params;
550 const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
551 const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
552 MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
553 for (int r = 0; r < nvfb; ++r) {
554 for (int c = 0; c < nhfb; ++c) {
555 MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
556 current_mbmi->cdef_strength = 0;
557 if (skip_cdef && current_mbmi->skip_cdef_curr_sb &&
558 frames_since_key > 10) {
559 current_mbmi->cdef_strength = 1;
560 }
561 }
562 mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
563 }
564 }
565
av1_cdef_search(MultiThreadInfo * mt_info,const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CDEF_PICK_METHOD pick_method,int rdmult,int skip_cdef_feature,int frames_since_key)566 void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
567 const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
568 MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
569 int skip_cdef_feature, int frames_since_key) {
570 if (pick_method == CDEF_PICK_FROM_Q) {
571 pick_cdef_from_qp(cm, skip_cdef_feature, frames_since_key);
572 return;
573 }
574 const CommonModeInfoParams *const mi_params = &cm->mi_params;
575 const int damping = 3 + (cm->quant_params.base_qindex >> 6);
576 const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
577 pick_method <= CDEF_FAST_SEARCH_LVL4);
578 const int num_planes = av1_num_planes(cm);
579 CdefSearchCtx cdef_search_ctx;
580 // Initialize parameters related to CDEF search context.
581 cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method);
582 // Allocate CDEF search context buffers.
583 cdef_alloc_data(&cdef_search_ctx);
584 // Frame level mse calculation.
585 if (mt_info->num_workers > 1) {
586 av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
587 } else {
588 cdef_mse_calc_frame(&cdef_search_ctx);
589 }
590
591 /* Search for different number of signaling bits. */
592 int nb_strength_bits = 0;
593 uint64_t best_rd = UINT64_MAX;
594 CdefInfo *const cdef_info = &cm->cdef_info;
595 int sb_count = cdef_search_ctx.sb_count;
596 uint64_t(*mse[2])[TOTAL_STRENGTHS];
597 mse[0] = cdef_search_ctx.mse[0];
598 mse[1] = cdef_search_ctx.mse[1];
599 for (int i = 0; i <= 3; i++) {
600 int best_lev0[CDEF_MAX_STRENGTHS];
601 int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
602 const int nb_strengths = 1 << i;
603 uint64_t tot_mse;
604 if (num_planes > 1) {
605 tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
606 mse, sb_count, pick_method);
607 } else {
608 tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
609 pick_method);
610 }
611
612 const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
613 (num_planes > 1 ? 2 : 1);
614 const int rate_cost = av1_cost_literal(total_bits);
615 const uint64_t dist = tot_mse * 16;
616 const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
617 if (rd < best_rd) {
618 best_rd = rd;
619 nb_strength_bits = i;
620 memcpy(cdef_info->cdef_strengths, best_lev0,
621 nb_strengths * sizeof(best_lev0[0]));
622 if (num_planes > 1) {
623 memcpy(cdef_info->cdef_uv_strengths, best_lev1,
624 nb_strengths * sizeof(best_lev1[0]));
625 }
626 }
627 }
628
629 cdef_info->cdef_bits = nb_strength_bits;
630 cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
631 for (int i = 0; i < sb_count; i++) {
632 uint64_t best_mse = UINT64_MAX;
633 int best_gi = 0;
634 for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
635 uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
636 if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
637 if (curr < best_mse) {
638 best_gi = gi;
639 best_mse = curr;
640 }
641 }
642 mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength =
643 best_gi;
644 }
645
646 if (fast) {
647 for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
648 const int luma_strength = cdef_info->cdef_strengths[j];
649 const int chroma_strength = cdef_info->cdef_uv_strengths[j];
650 int pri_strength, sec_strength;
651
652 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
653 luma_strength);
654 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
655 chroma_strength);
656 }
657 }
658
659 cdef_info->cdef_damping = damping;
660 // Deallocate CDEF search context buffers.
661 cdef_dealloc_data(&cdef_search_ctx);
662 }
663