1 /*
2 * Copyright (c) 2020, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "av1/common/common_data.h"
13 #include "av1/common/quant_common.h"
14 #include "av1/common/reconintra.h"
15
16 #include "av1/encoder/encoder.h"
17 #include "av1/encoder/encodeframe_utils.h"
18 #include "av1/encoder/encoder_utils.h"
19 #include "av1/encoder/rdopt.h"
20
av1_set_ssim_rdmult(const AV1_COMP * const cpi,int * errorperbit,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int * const rdmult)21 void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
22 const BLOCK_SIZE bsize, const int mi_row,
23 const int mi_col, int *const rdmult) {
24 const AV1_COMMON *const cm = &cpi->common;
25
26 const BLOCK_SIZE bsize_base = BLOCK_16X16;
27 const int num_mi_w = mi_size_wide[bsize_base];
28 const int num_mi_h = mi_size_high[bsize_base];
29 const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
30 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
31 const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w;
32 const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
33 int row, col;
34 double num_of_mi = 0.0;
35 double geom_mean_of_scale = 1.0;
36
37 // To avoid overflow of 'geom_mean_of_scale', bsize_base must be at least
38 // BLOCK_8X8.
39 //
40 // For bsize=BLOCK_128X128 and bsize_base=BLOCK_8X8, the loop below would
41 // iterate 256 times. Considering the maximum value of
42 // cpi->ssim_rdmult_scaling_factors (see av1_set_mb_ssim_rdmult_scaling()),
43 // geom_mean_of_scale can go up to 4.8323^256, which is within DBL_MAX
44 // (maximum value a double data type can hold). If bsize_base is modified to
45 // BLOCK_4X4 (minimum possible block size), geom_mean_of_scale can go up
46 // to 4.8323^1024 and exceed DBL_MAX, resulting in data overflow.
47 assert(bsize_base >= BLOCK_8X8);
48 assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM);
49
50 for (row = mi_row / num_mi_w;
51 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
52 for (col = mi_col / num_mi_h;
53 col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
54 const int index = row * num_cols + col;
55 assert(cpi->ssim_rdmult_scaling_factors[index] != 0.0);
56 geom_mean_of_scale *= cpi->ssim_rdmult_scaling_factors[index];
57 num_of_mi += 1.0;
58 }
59 }
60 geom_mean_of_scale = pow(geom_mean_of_scale, (1.0 / num_of_mi));
61
62 *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
63 *rdmult = AOMMAX(*rdmult, 0);
64 av1_set_error_per_bit(errorperbit, *rdmult);
65 }
66
67 #if CONFIG_SALIENCY_MAP
av1_set_saliency_map_vmaf_rdmult(const AV1_COMP * const cpi,int * errorperbit,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int * const rdmult)68 void av1_set_saliency_map_vmaf_rdmult(const AV1_COMP *const cpi,
69 int *errorperbit, const BLOCK_SIZE bsize,
70 const int mi_row, const int mi_col,
71 int *const rdmult) {
72 const AV1_COMMON *const cm = &cpi->common;
73 const int num_mi_w = mi_size_wide[bsize];
74 const int num_mi_h = mi_size_high[bsize];
75 const int num_cols = (cm->mi_params.mi_cols + num_mi_w - 1) / num_mi_w;
76
77 *rdmult =
78 (int)(*rdmult * cpi->sm_scaling_factor[(mi_row / num_mi_h) * num_cols +
79 (mi_col / num_mi_w)]);
80
81 *rdmult = AOMMAX(*rdmult, 0);
82 av1_set_error_per_bit(errorperbit, *rdmult);
83 }
84 #endif
85
86 // TODO(angiebird): Move these function to tpl_model.c
87 #if !CONFIG_REALTIME_ONLY
88 // Return the end column for the current superblock, in unit of TPL blocks.
get_superblock_tpl_column_end(const AV1_COMMON * const cm,int mi_col,int num_mi_w)89 static int get_superblock_tpl_column_end(const AV1_COMMON *const cm, int mi_col,
90 int num_mi_w) {
91 // Find the start column of this superblock.
92 const int sb_mi_col_start = (mi_col >> cm->seq_params->mib_size_log2)
93 << cm->seq_params->mib_size_log2;
94 // Same but in superres upscaled dimension.
95 const int sb_mi_col_start_sr =
96 coded_to_superres_mi(sb_mi_col_start, cm->superres_scale_denominator);
97 // Width of this superblock in mi units.
98 const int sb_mi_width = mi_size_wide[cm->seq_params->sb_size];
99 // Same but in superres upscaled dimension.
100 const int sb_mi_width_sr =
101 coded_to_superres_mi(sb_mi_width, cm->superres_scale_denominator);
102 // Superblock end in mi units.
103 const int sb_mi_end = sb_mi_col_start_sr + sb_mi_width_sr;
104 // Superblock end in TPL units.
105 return (sb_mi_end + num_mi_w - 1) / num_mi_w;
106 }
107
av1_get_cb_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col)108 int av1_get_cb_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
109 const BLOCK_SIZE bsize, const int mi_row,
110 const int mi_col) {
111 const AV1_COMMON *const cm = &cpi->common;
112 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
113 cpi->gf_frame_index < cpi->ppi->gf_group.size));
114 const int tpl_idx = cpi->gf_frame_index;
115 int deltaq_rdmult = set_rdmult(cpi, x, -1);
116 if (!av1_tpl_stats_ready(&cpi->ppi->tpl_data, tpl_idx)) return deltaq_rdmult;
117 if (cm->superres_scale_denominator != SCALE_NUMERATOR) return deltaq_rdmult;
118 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return deltaq_rdmult;
119 if (x->rb == 0) return deltaq_rdmult;
120
121 TplParams *const tpl_data = &cpi->ppi->tpl_data;
122 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
123 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
124
125 const int mi_wide = mi_size_wide[bsize];
126 const int mi_high = mi_size_high[bsize];
127
128 int tpl_stride = tpl_frame->stride;
129 double intra_cost_base = 0;
130 double mc_dep_cost_base = 0;
131 double cbcmp_base = 0;
132 const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
133
134 for (int row = mi_row; row < mi_row + mi_high; row += step) {
135 for (int col = mi_col; col < mi_col + mi_wide; col += step) {
136 if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols)
137 continue;
138
139 TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
140 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
141
142 double cbcmp = (double)this_stats->srcrf_dist;
143 int64_t mc_dep_delta =
144 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
145 this_stats->mc_dep_dist);
146 double dist_scaled = (double)(this_stats->recrf_dist << RDDIV_BITS);
147 intra_cost_base += log(dist_scaled) * cbcmp;
148 mc_dep_cost_base += log(3 * dist_scaled + mc_dep_delta) * cbcmp;
149 cbcmp_base += cbcmp;
150 }
151 }
152
153 if (cbcmp_base == 0) return deltaq_rdmult;
154
155 double rk = exp((intra_cost_base - mc_dep_cost_base) / cbcmp_base);
156 deltaq_rdmult = (int)(deltaq_rdmult * (rk / x->rb));
157
158 return AOMMAX(deltaq_rdmult, 1);
159 }
160
av1_get_hier_tpl_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,const int mi_row,const int mi_col,int orig_rdmult)161 int av1_get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
162 const BLOCK_SIZE bsize, const int mi_row,
163 const int mi_col, int orig_rdmult) {
164 const AV1_COMMON *const cm = &cpi->common;
165 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
166 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
167 cpi->gf_frame_index < cpi->ppi->gf_group.size));
168 const int tpl_idx = cpi->gf_frame_index;
169 const int deltaq_rdmult = set_rdmult(cpi, x, -1);
170 if (!av1_tpl_stats_ready(&cpi->ppi->tpl_data, tpl_idx)) return deltaq_rdmult;
171 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index))
172 return deltaq_rdmult;
173 if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return deltaq_rdmult;
174
175 const int mi_col_sr =
176 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
177 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
178 const int block_mi_width_sr =
179 coded_to_superres_mi(mi_size_wide[bsize], cm->superres_scale_denominator);
180
181 const BLOCK_SIZE bsize_base = BLOCK_16X16;
182 const int num_mi_w = mi_size_wide[bsize_base];
183 const int num_mi_h = mi_size_high[bsize_base];
184 const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w;
185 const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h;
186 const int num_bcols = (block_mi_width_sr + num_mi_w - 1) / num_mi_w;
187 const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h;
188 // This is required because the end col of superblock may be off by 1 in case
189 // of superres.
190 const int sb_bcol_end = get_superblock_tpl_column_end(cm, mi_col, num_mi_w);
191 int row, col;
192 double base_block_count = 0.0;
193 double geom_mean_of_scale = 0.0;
194 for (row = mi_row / num_mi_w;
195 row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
196 for (col = mi_col_sr / num_mi_h;
197 col < num_cols && col < mi_col_sr / num_mi_h + num_bcols &&
198 col < sb_bcol_end;
199 ++col) {
200 const int index = row * num_cols + col;
201 geom_mean_of_scale += log(cpi->ppi->tpl_sb_rdmult_scaling_factors[index]);
202 base_block_count += 1.0;
203 }
204 }
205 geom_mean_of_scale = exp(geom_mean_of_scale / base_block_count);
206 int rdmult = (int)((double)orig_rdmult * geom_mean_of_scale + 0.5);
207 rdmult = AOMMAX(rdmult, 0);
208 av1_set_error_per_bit(&x->errorperbit, rdmult);
209 #if !CONFIG_RD_COMMAND
210 if (bsize == cm->seq_params->sb_size) {
211 const int rdmult_sb = set_rdmult(cpi, x, -1);
212 assert(rdmult_sb == rdmult);
213 (void)rdmult_sb;
214 }
215 #endif // !CONFIG_RD_COMMAND
216 return rdmult;
217 }
218 #endif // !CONFIG_REALTIME_ONLY
219
update_filter_type_count(FRAME_COUNTS * counts,const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)220 static AOM_INLINE void update_filter_type_count(FRAME_COUNTS *counts,
221 const MACROBLOCKD *xd,
222 const MB_MODE_INFO *mbmi) {
223 int dir;
224 for (dir = 0; dir < 2; ++dir) {
225 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
226 InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
227
228 // Only allow the 3 valid SWITCHABLE_FILTERS.
229 assert(filter < SWITCHABLE_FILTERS);
230 ++counts->switchable_interp[ctx][filter];
231 }
232 }
233
234 // This function will copy the best reference mode information from
235 // MB_MODE_INFO_EXT_FRAME to MB_MODE_INFO_EXT.
copy_mbmi_ext_frame_to_mbmi_ext(MB_MODE_INFO_EXT * mbmi_ext,const MB_MODE_INFO_EXT_FRAME * const mbmi_ext_best,uint8_t ref_frame_type)236 static INLINE void copy_mbmi_ext_frame_to_mbmi_ext(
237 MB_MODE_INFO_EXT *mbmi_ext,
238 const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type) {
239 memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack,
240 sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
241 memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight,
242 sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
243 mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
244 mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count;
245 memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs,
246 sizeof(mbmi_ext->global_mvs));
247 }
248
av1_update_state(const AV1_COMP * const cpi,ThreadData * td,const PICK_MODE_CONTEXT * const ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,RUN_TYPE dry_run)249 void av1_update_state(const AV1_COMP *const cpi, ThreadData *td,
250 const PICK_MODE_CONTEXT *const ctx, int mi_row,
251 int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) {
252 int i, x_idx, y;
253 const AV1_COMMON *const cm = &cpi->common;
254 const CommonModeInfoParams *const mi_params = &cm->mi_params;
255 const int num_planes = av1_num_planes(cm);
256 MACROBLOCK *const x = &td->mb;
257 MACROBLOCKD *const xd = &x->e_mbd;
258 struct macroblock_plane *const p = x->plane;
259 struct macroblockd_plane *const pd = xd->plane;
260 const MB_MODE_INFO *const mi = &ctx->mic;
261 MB_MODE_INFO *const mi_addr = xd->mi[0];
262 const struct segmentation *const seg = &cm->seg;
263 assert(bsize < BLOCK_SIZES_ALL);
264 const int bw = mi_size_wide[mi->bsize];
265 const int bh = mi_size_high[mi->bsize];
266 const int mis = mi_params->mi_stride;
267 const int mi_width = mi_size_wide[bsize];
268 const int mi_height = mi_size_high[bsize];
269 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
270
271 assert(mi->bsize == bsize);
272
273 *mi_addr = *mi;
274 copy_mbmi_ext_frame_to_mbmi_ext(&x->mbmi_ext, &ctx->mbmi_ext_best,
275 av1_ref_frame_type(ctx->mic.ref_frame));
276
277 memcpy(txfm_info->blk_skip, ctx->blk_skip,
278 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
279
280 txfm_info->skip_txfm = ctx->rd_stats.skip_txfm;
281
282 xd->tx_type_map = ctx->tx_type_map;
283 xd->tx_type_map_stride = mi_size_wide[bsize];
284 // If not dry_run, copy the transform type data into the frame level buffer.
285 // Encoder will fetch tx types when writing bitstream.
286 if (!dry_run) {
287 const int grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
288 uint8_t *const tx_type_map = mi_params->tx_type_map + grid_idx;
289 const int mi_stride = mi_params->mi_stride;
290 for (int blk_row = 0; blk_row < bh; ++blk_row) {
291 av1_copy_array(tx_type_map + blk_row * mi_stride,
292 xd->tx_type_map + blk_row * xd->tx_type_map_stride, bw);
293 }
294 xd->tx_type_map = tx_type_map;
295 xd->tx_type_map_stride = mi_stride;
296 }
297
298 // If segmentation in use
299 if (seg->enabled) {
300 // For in frame complexity AQ copy the segment id from the segment map.
301 if (cpi->oxcf.q_cfg.aq_mode == COMPLEXITY_AQ) {
302 const uint8_t *const map =
303 seg->update_map ? cpi->enc_seg.map : cm->last_frame_seg_map;
304 mi_addr->segment_id =
305 map ? get_segment_id(mi_params, map, bsize, mi_row, mi_col) : 0;
306 }
307 // Else for cyclic refresh mode update the segment map, set the segment id
308 // and then update the quantizer.
309 if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
310 mi_addr->segment_id != AM_SEGMENT_ID_INACTIVE &&
311 !cpi->rc.rtc_external_ratectrl) {
312 av1_cyclic_refresh_update_segment(cpi, x, mi_row, mi_col, bsize,
313 ctx->rd_stats.rate, ctx->rd_stats.dist,
314 txfm_info->skip_txfm, dry_run);
315 }
316 if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
317 mi_addr->uv_mode = UV_DC_PRED;
318
319 if (!dry_run && !mi_addr->skip_txfm) {
320 int cdf_num;
321 const uint8_t spatial_pred = av1_get_spatial_seg_pred(
322 cm, xd, &cdf_num, cpi->cyclic_refresh->skip_over4x4);
323 const uint8_t coded_id = av1_neg_interleave(
324 mi_addr->segment_id, spatial_pred, seg->last_active_segid + 1);
325 int64_t spatial_cost = x->mode_costs.spatial_pred_cost[cdf_num][coded_id];
326 td->rd_counts.seg_tmp_pred_cost[0] += spatial_cost;
327
328 const int pred_segment_id =
329 cm->last_frame_seg_map
330 ? get_segment_id(mi_params, cm->last_frame_seg_map, bsize, mi_row,
331 mi_col)
332 : 0;
333 const int use_tmp_pred = pred_segment_id == mi_addr->segment_id;
334 const uint8_t tmp_pred_ctx = av1_get_pred_context_seg_id(xd);
335 td->rd_counts.seg_tmp_pred_cost[1] +=
336 x->mode_costs.tmp_pred_cost[tmp_pred_ctx][use_tmp_pred];
337 if (!use_tmp_pred) {
338 td->rd_counts.seg_tmp_pred_cost[1] += spatial_cost;
339 }
340 }
341 }
342
343 // Count zero motion vector.
344 if (!dry_run && !frame_is_intra_only(cm)) {
345 const MV mv = mi->mv[0].as_mv;
346 if (is_inter_block(mi) && mi->ref_frame[0] == LAST_FRAME &&
347 abs(mv.row) < 8 && abs(mv.col) < 8) {
348 const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh);
349 // Accumulate low_content_frame.
350 for (int mi_y = 0; mi_y < ymis; mi_y += 2) x->cnt_zeromv += bw << 1;
351 }
352 }
353
354 for (i = 0; i < num_planes; ++i) {
355 p[i].coeff = ctx->coeff[i];
356 p[i].qcoeff = ctx->qcoeff[i];
357 p[i].dqcoeff = ctx->dqcoeff[i];
358 p[i].eobs = ctx->eobs[i];
359 p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
360 }
361 for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
362 // Restore the coding context of the MB to that that was in place
363 // when the mode was picked for it
364
365 const int cols =
366 AOMMIN((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width, mi_width);
367 const int rows = AOMMIN(
368 (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height, mi_height);
369 for (y = 0; y < rows; y++) {
370 for (x_idx = 0; x_idx < cols; x_idx++) xd->mi[x_idx + y * mis] = mi_addr;
371 }
372
373 if (cpi->oxcf.q_cfg.aq_mode)
374 av1_init_plane_quantizers(cpi, x, mi_addr->segment_id, 0);
375
376 if (dry_run) return;
377
378 #if CONFIG_INTERNAL_STATS
379 {
380 unsigned int *const mode_chosen_counts =
381 (unsigned int *)cpi->mode_chosen_counts; // Cast const away.
382 if (frame_is_intra_only(cm)) {
383 static const int kf_mode_index[] = {
384 THR_DC /*DC_PRED*/,
385 THR_V_PRED /*V_PRED*/,
386 THR_H_PRED /*H_PRED*/,
387 THR_D45_PRED /*D45_PRED*/,
388 THR_D135_PRED /*D135_PRED*/,
389 THR_D113_PRED /*D113_PRED*/,
390 THR_D157_PRED /*D157_PRED*/,
391 THR_D203_PRED /*D203_PRED*/,
392 THR_D67_PRED /*D67_PRED*/,
393 THR_SMOOTH, /*SMOOTH_PRED*/
394 THR_SMOOTH_V, /*SMOOTH_V_PRED*/
395 THR_SMOOTH_H, /*SMOOTH_H_PRED*/
396 THR_PAETH /*PAETH_PRED*/,
397 };
398 ++mode_chosen_counts[kf_mode_index[mi_addr->mode]];
399 } else {
400 // Note how often each mode chosen as best
401 ++mode_chosen_counts[ctx->best_mode_index];
402 }
403 }
404 #endif
405 if (!frame_is_intra_only(cm)) {
406 if (is_inter_block(mi) && cm->features.interp_filter == SWITCHABLE) {
407 // When the frame interp filter is SWITCHABLE, several cases that always
408 // use the default type (EIGHTTAP_REGULAR) are described in
409 // av1_is_interp_needed(). Here, we should keep the counts for all
410 // applicable blocks, so the frame filter resetting decision in
411 // fix_interp_filter() is made correctly.
412 update_filter_type_count(td->counts, xd, mi_addr);
413 }
414 }
415
416 const int x_mis = AOMMIN(bw, mi_params->mi_cols - mi_col);
417 const int y_mis = AOMMIN(bh, mi_params->mi_rows - mi_row);
418 if (cm->seq_params->order_hint_info.enable_ref_frame_mvs)
419 av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
420 }
421
av1_update_inter_mode_stats(FRAME_CONTEXT * fc,FRAME_COUNTS * counts,PREDICTION_MODE mode,int16_t mode_context)422 void av1_update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
423 PREDICTION_MODE mode, int16_t mode_context) {
424 (void)counts;
425
426 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
427 if (mode == NEWMV) {
428 #if CONFIG_ENTROPY_STATS
429 ++counts->newmv_mode[mode_ctx][0];
430 #endif
431 update_cdf(fc->newmv_cdf[mode_ctx], 0, 2);
432 return;
433 }
434
435 #if CONFIG_ENTROPY_STATS
436 ++counts->newmv_mode[mode_ctx][1];
437 #endif
438 update_cdf(fc->newmv_cdf[mode_ctx], 1, 2);
439
440 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
441 if (mode == GLOBALMV) {
442 #if CONFIG_ENTROPY_STATS
443 ++counts->zeromv_mode[mode_ctx][0];
444 #endif
445 update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2);
446 return;
447 }
448
449 #if CONFIG_ENTROPY_STATS
450 ++counts->zeromv_mode[mode_ctx][1];
451 #endif
452 update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2);
453
454 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
455 #if CONFIG_ENTROPY_STATS
456 ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
457 #endif
458 update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
459 }
460
update_palette_cdf(MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,FRAME_COUNTS * counts)461 static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
462 FRAME_COUNTS *counts) {
463 FRAME_CONTEXT *fc = xd->tile_ctx;
464 const BLOCK_SIZE bsize = mbmi->bsize;
465 const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
466 const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize);
467
468 (void)counts;
469
470 if (mbmi->mode == DC_PRED) {
471 const int n = pmi->palette_size[0];
472 const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
473
474 #if CONFIG_ENTROPY_STATS
475 ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0];
476 #endif
477 update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx],
478 n > 0, 2);
479 if (n > 0) {
480 #if CONFIG_ENTROPY_STATS
481 ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
482 #endif
483 update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx],
484 n - PALETTE_MIN_SIZE, PALETTE_SIZES);
485 }
486 }
487
488 if (mbmi->uv_mode == UV_DC_PRED) {
489 const int n = pmi->palette_size[1];
490 const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
491
492 #if CONFIG_ENTROPY_STATS
493 ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0];
494 #endif
495 update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2);
496
497 if (n > 0) {
498 #if CONFIG_ENTROPY_STATS
499 ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
500 #endif
501 update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx],
502 n - PALETTE_MIN_SIZE, PALETTE_SIZES);
503 }
504 }
505 }
506
av1_sum_intra_stats(const AV1_COMMON * const cm,FRAME_COUNTS * counts,MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,const MB_MODE_INFO * above_mi,const MB_MODE_INFO * left_mi,const int intraonly)507 void av1_sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts,
508 MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
509 const MB_MODE_INFO *above_mi,
510 const MB_MODE_INFO *left_mi, const int intraonly) {
511 FRAME_CONTEXT *fc = xd->tile_ctx;
512 const PREDICTION_MODE y_mode = mbmi->mode;
513 (void)counts;
514 const BLOCK_SIZE bsize = mbmi->bsize;
515
516 if (intraonly) {
517 #if CONFIG_ENTROPY_STATS
518 const PREDICTION_MODE above = av1_above_block_mode(above_mi);
519 const PREDICTION_MODE left = av1_left_block_mode(left_mi);
520 const int above_ctx = intra_mode_context[above];
521 const int left_ctx = intra_mode_context[left];
522 ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
523 #endif // CONFIG_ENTROPY_STATS
524 update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES);
525 } else {
526 #if CONFIG_ENTROPY_STATS
527 ++counts->y_mode[size_group_lookup[bsize]][y_mode];
528 #endif // CONFIG_ENTROPY_STATS
529 update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
530 }
531
532 if (av1_filter_intra_allowed(cm, mbmi)) {
533 const int use_filter_intra_mode =
534 mbmi->filter_intra_mode_info.use_filter_intra;
535 #if CONFIG_ENTROPY_STATS
536 ++counts->filter_intra[mbmi->bsize][use_filter_intra_mode];
537 if (use_filter_intra_mode) {
538 ++counts
539 ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode];
540 }
541 #endif // CONFIG_ENTROPY_STATS
542 update_cdf(fc->filter_intra_cdfs[mbmi->bsize], use_filter_intra_mode, 2);
543 if (use_filter_intra_mode) {
544 update_cdf(fc->filter_intra_mode_cdf,
545 mbmi->filter_intra_mode_info.filter_intra_mode,
546 FILTER_INTRA_MODES);
547 }
548 }
549 if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
550 #if CONFIG_ENTROPY_STATS
551 ++counts->angle_delta[mbmi->mode - V_PRED]
552 [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA];
553 #endif
554 update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED],
555 mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA,
556 2 * MAX_ANGLE_DELTA + 1);
557 }
558
559 if (!xd->is_chroma_ref) return;
560
561 const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
562 const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
563 #if CONFIG_ENTROPY_STATS
564 ++counts->uv_mode[cfl_allowed][y_mode][uv_mode];
565 #endif // CONFIG_ENTROPY_STATS
566 update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode,
567 UV_INTRA_MODES - !cfl_allowed);
568 if (uv_mode == UV_CFL_PRED) {
569 const int8_t joint_sign = mbmi->cfl_alpha_signs;
570 const uint8_t idx = mbmi->cfl_alpha_idx;
571
572 #if CONFIG_ENTROPY_STATS
573 ++counts->cfl_sign[joint_sign];
574 #endif
575 update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS);
576 if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
577 aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
578
579 #if CONFIG_ENTROPY_STATS
580 ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)];
581 #endif
582 update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE);
583 }
584 if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
585 aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
586
587 #if CONFIG_ENTROPY_STATS
588 ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)];
589 #endif
590 update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE);
591 }
592 }
593 const PREDICTION_MODE intra_mode = get_uv_mode(uv_mode);
594 if (av1_is_directional_mode(intra_mode) && av1_use_angle_delta(bsize)) {
595 #if CONFIG_ENTROPY_STATS
596 ++counts->angle_delta[intra_mode - V_PRED]
597 [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA];
598 #endif
599 update_cdf(fc->angle_delta_cdf[intra_mode - V_PRED],
600 mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA,
601 2 * MAX_ANGLE_DELTA + 1);
602 }
603 if (av1_allow_palette(cm->features.allow_screen_content_tools, bsize)) {
604 update_palette_cdf(xd, mbmi, counts);
605 }
606 }
607
av1_restore_context(MACROBLOCK * x,const RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)608 void av1_restore_context(MACROBLOCK *x, const RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
609 int mi_row, int mi_col, BLOCK_SIZE bsize,
610 const int num_planes) {
611 MACROBLOCKD *xd = &x->e_mbd;
612 int p;
613 const int num_4x4_blocks_wide = mi_size_wide[bsize];
614 const int num_4x4_blocks_high = mi_size_high[bsize];
615 int mi_width = mi_size_wide[bsize];
616 int mi_height = mi_size_high[bsize];
617 for (p = 0; p < num_planes; p++) {
618 int tx_col = mi_col;
619 int tx_row = mi_row & MAX_MIB_MASK;
620 memcpy(
621 xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x),
622 ctx->a + num_4x4_blocks_wide * p,
623 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
624 xd->plane[p].subsampling_x);
625 memcpy(xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y),
626 ctx->l + num_4x4_blocks_high * p,
627 (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
628 xd->plane[p].subsampling_y);
629 }
630 memcpy(xd->above_partition_context + mi_col, ctx->sa,
631 sizeof(*xd->above_partition_context) * mi_width);
632 memcpy(xd->left_partition_context + (mi_row & MAX_MIB_MASK), ctx->sl,
633 sizeof(xd->left_partition_context[0]) * mi_height);
634 xd->above_txfm_context = ctx->p_ta;
635 xd->left_txfm_context = ctx->p_tl;
636 memcpy(xd->above_txfm_context, ctx->ta,
637 sizeof(*xd->above_txfm_context) * mi_width);
638 memcpy(xd->left_txfm_context, ctx->tl,
639 sizeof(*xd->left_txfm_context) * mi_height);
640 }
641
av1_save_context(const MACROBLOCK * x,RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)642 void av1_save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
643 int mi_row, int mi_col, BLOCK_SIZE bsize,
644 const int num_planes) {
645 const MACROBLOCKD *xd = &x->e_mbd;
646 int p;
647 int mi_width = mi_size_wide[bsize];
648 int mi_height = mi_size_high[bsize];
649
650 // buffer the above/left context information of the block in search.
651 for (p = 0; p < num_planes; ++p) {
652 int tx_col = mi_col;
653 int tx_row = mi_row & MAX_MIB_MASK;
654 memcpy(
655 ctx->a + mi_width * p,
656 xd->above_entropy_context[p] + (tx_col >> xd->plane[p].subsampling_x),
657 (sizeof(ENTROPY_CONTEXT) * mi_width) >> xd->plane[p].subsampling_x);
658 memcpy(ctx->l + mi_height * p,
659 xd->left_entropy_context[p] + (tx_row >> xd->plane[p].subsampling_y),
660 (sizeof(ENTROPY_CONTEXT) * mi_height) >> xd->plane[p].subsampling_y);
661 }
662 memcpy(ctx->sa, xd->above_partition_context + mi_col,
663 sizeof(*xd->above_partition_context) * mi_width);
664 memcpy(ctx->sl, xd->left_partition_context + (mi_row & MAX_MIB_MASK),
665 sizeof(xd->left_partition_context[0]) * mi_height);
666 memcpy(ctx->ta, xd->above_txfm_context,
667 sizeof(*xd->above_txfm_context) * mi_width);
668 memcpy(ctx->tl, xd->left_txfm_context,
669 sizeof(*xd->left_txfm_context) * mi_height);
670 ctx->p_ta = xd->above_txfm_context;
671 ctx->p_tl = xd->left_txfm_context;
672 }
673
set_partial_sb_partition(const AV1_COMMON * const cm,MB_MODE_INFO * mi,int bh_in,int bw_in,int mi_rows_remaining,int mi_cols_remaining,BLOCK_SIZE bsize,MB_MODE_INFO ** mib)674 static void set_partial_sb_partition(const AV1_COMMON *const cm,
675 MB_MODE_INFO *mi, int bh_in, int bw_in,
676 int mi_rows_remaining,
677 int mi_cols_remaining, BLOCK_SIZE bsize,
678 MB_MODE_INFO **mib) {
679 int bh = bh_in;
680 int r, c;
681 for (r = 0; r < cm->seq_params->mib_size; r += bh) {
682 int bw = bw_in;
683 for (c = 0; c < cm->seq_params->mib_size; c += bw) {
684 const int grid_index = get_mi_grid_idx(&cm->mi_params, r, c);
685 const int mi_index = get_alloc_mi_idx(&cm->mi_params, r, c);
686 mib[grid_index] = mi + mi_index;
687 mib[grid_index]->bsize = find_partition_size(
688 bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
689 }
690 }
691 }
692
693 // This function attempts to set all mode info entries in a given superblock
694 // to the same block partition size.
695 // However, at the bottom and right borders of the image the requested size
696 // may not be allowed in which case this code attempts to choose the largest
697 // allowable partition.
av1_set_fixed_partitioning(AV1_COMP * cpi,const TileInfo * const tile,MB_MODE_INFO ** mib,int mi_row,int mi_col,BLOCK_SIZE bsize)698 void av1_set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
699 MB_MODE_INFO **mib, int mi_row, int mi_col,
700 BLOCK_SIZE bsize) {
701 AV1_COMMON *const cm = &cpi->common;
702 const CommonModeInfoParams *const mi_params = &cm->mi_params;
703 const int mi_rows_remaining = tile->mi_row_end - mi_row;
704 const int mi_cols_remaining = tile->mi_col_end - mi_col;
705 MB_MODE_INFO *const mi_upper_left =
706 mi_params->mi_alloc + get_alloc_mi_idx(mi_params, mi_row, mi_col);
707 int bh = mi_size_high[bsize];
708 int bw = mi_size_wide[bsize];
709
710 assert(bsize >= mi_params->mi_alloc_bsize &&
711 "Attempted to use bsize < mi_params->mi_alloc_bsize");
712 assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
713
714 // Apply the requested partition size to the SB if it is all "in image"
715 if ((mi_cols_remaining >= cm->seq_params->mib_size) &&
716 (mi_rows_remaining >= cm->seq_params->mib_size)) {
717 for (int block_row = 0; block_row < cm->seq_params->mib_size;
718 block_row += bh) {
719 for (int block_col = 0; block_col < cm->seq_params->mib_size;
720 block_col += bw) {
721 const int grid_index = get_mi_grid_idx(mi_params, block_row, block_col);
722 const int mi_index = get_alloc_mi_idx(mi_params, block_row, block_col);
723 mib[grid_index] = mi_upper_left + mi_index;
724 mib[grid_index]->bsize = bsize;
725 }
726 }
727 } else {
728 // Else this is a partial SB.
729 set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
730 mi_cols_remaining, bsize, mib);
731 }
732 }
733
av1_is_leaf_split_partition(AV1_COMMON * cm,int mi_row,int mi_col,BLOCK_SIZE bsize)734 int av1_is_leaf_split_partition(AV1_COMMON *cm, int mi_row, int mi_col,
735 BLOCK_SIZE bsize) {
736 const int bs = mi_size_wide[bsize];
737 const int hbs = bs / 2;
738 assert(bsize >= BLOCK_8X8);
739 const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
740
741 for (int i = 0; i < 4; i++) {
742 int x_idx = (i & 1) * hbs;
743 int y_idx = (i >> 1) * hbs;
744 if ((mi_row + y_idx >= cm->mi_params.mi_rows) ||
745 (mi_col + x_idx >= cm->mi_params.mi_cols))
746 return 0;
747 if (get_partition(cm, mi_row + y_idx, mi_col + x_idx, subsize) !=
748 PARTITION_NONE &&
749 subsize != BLOCK_8X8)
750 return 0;
751 }
752 return 1;
753 }
754
755 #if !CONFIG_REALTIME_ONLY
av1_get_rdmult_delta(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int orig_rdmult)756 int av1_get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
757 int mi_col, int orig_rdmult) {
758 AV1_COMMON *const cm = &cpi->common;
759 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
760 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
761 cpi->gf_frame_index < cpi->ppi->gf_group.size));
762 const int tpl_idx = cpi->gf_frame_index;
763 TplParams *const tpl_data = &cpi->ppi->tpl_data;
764 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
765 int64_t intra_cost = 0;
766 int64_t mc_dep_cost = 0;
767 const int mi_wide = mi_size_wide[bsize];
768 const int mi_high = mi_size_high[bsize];
769
770 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
771 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
772 int tpl_stride = tpl_frame->stride;
773
774 if (!av1_tpl_stats_ready(&cpi->ppi->tpl_data, cpi->gf_frame_index)) {
775 return orig_rdmult;
776 }
777 if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) {
778 return orig_rdmult;
779 }
780
781 #ifndef NDEBUG
782 int mi_count = 0;
783 #endif
784 const int mi_col_sr =
785 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
786 const int mi_col_end_sr =
787 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
788 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
789 const int step = 1 << block_mis_log2;
790 const int row_step = step;
791 const int col_step_sr =
792 coded_to_superres_mi(step, cm->superres_scale_denominator);
793 for (int row = mi_row; row < mi_row + mi_high; row += row_step) {
794 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
795 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
796 TplDepStats *this_stats =
797 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
798 int64_t mc_dep_delta =
799 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
800 this_stats->mc_dep_dist);
801 intra_cost += this_stats->recrf_dist << RDDIV_BITS;
802 mc_dep_cost += (this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta;
803 #ifndef NDEBUG
804 mi_count++;
805 #endif
806 }
807 }
808 assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
809
810 double beta = 1.0;
811 if (mc_dep_cost > 0 && intra_cost > 0) {
812 const double r0 = cpi->rd.r0;
813 const double rk = (double)intra_cost / mc_dep_cost;
814 beta = (r0 / rk);
815 }
816
817 int rdmult = av1_get_adaptive_rdmult(cpi, beta);
818
819 rdmult = AOMMIN(rdmult, orig_rdmult * 3 / 2);
820 rdmult = AOMMAX(rdmult, orig_rdmult * 1 / 2);
821
822 rdmult = AOMMAX(1, rdmult);
823
824 return rdmult;
825 }
826
827 // Checks to see if a super block is on a horizontal image edge.
828 // In most cases this is the "real" edge unless there are formatting
829 // bars embedded in the stream.
av1_active_h_edge(const AV1_COMP * cpi,int mi_row,int mi_step)830 int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
831 int top_edge = 0;
832 int bottom_edge = cpi->common.mi_params.mi_rows;
833 int is_active_h_edge = 0;
834
835 // For two pass account for any formatting bars detected.
836 if (is_stat_consumption_stage_twopass(cpi)) {
837 const AV1_COMMON *const cm = &cpi->common;
838 const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
839 &cpi->ppi->twopass, cm->current_frame.display_order_hint);
840 if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
841
842 // The inactive region is specified in MBs not mi units.
843 // The image edge is in the following MB row.
844 top_edge += (int)(this_frame_stats->inactive_zone_rows * 4);
845
846 bottom_edge -= (int)(this_frame_stats->inactive_zone_rows * 4);
847 bottom_edge = AOMMAX(top_edge, bottom_edge);
848 }
849
850 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
851 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
852 is_active_h_edge = 1;
853 }
854 return is_active_h_edge;
855 }
856
857 // Checks to see if a super block is on a vertical image edge.
858 // In most cases this is the "real" edge unless there are formatting
859 // bars embedded in the stream.
av1_active_v_edge(const AV1_COMP * cpi,int mi_col,int mi_step)860 int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
861 int left_edge = 0;
862 int right_edge = cpi->common.mi_params.mi_cols;
863 int is_active_v_edge = 0;
864
865 // For two pass account for any formatting bars detected.
866 if (is_stat_consumption_stage_twopass(cpi)) {
867 const AV1_COMMON *const cm = &cpi->common;
868 const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
869 &cpi->ppi->twopass, cm->current_frame.display_order_hint);
870 if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
871
872 // The inactive region is specified in MBs not mi units.
873 // The image edge is in the following MB row.
874 left_edge += (int)(this_frame_stats->inactive_zone_cols * 4);
875
876 right_edge -= (int)(this_frame_stats->inactive_zone_cols * 4);
877 right_edge = AOMMAX(left_edge, right_edge);
878 }
879
880 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
881 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
882 is_active_v_edge = 1;
883 }
884 return is_active_v_edge;
885 }
886
av1_get_tpl_stats_sb(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,SuperBlockEnc * sb_enc)887 void av1_get_tpl_stats_sb(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
888 int mi_col, SuperBlockEnc *sb_enc) {
889 sb_enc->tpl_data_count = 0;
890
891 if (!cpi->oxcf.algo_cfg.enable_tpl_model) return;
892 if (cpi->common.current_frame.frame_type == KEY_FRAME) return;
893 const FRAME_UPDATE_TYPE update_type =
894 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
895 if (update_type == INTNL_OVERLAY_UPDATE || update_type == OVERLAY_UPDATE)
896 return;
897 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
898 cpi->gf_frame_index < cpi->ppi->gf_group.size));
899
900 AV1_COMMON *const cm = &cpi->common;
901 const int gf_group_index = cpi->gf_frame_index;
902 TplParams *const tpl_data = &cpi->ppi->tpl_data;
903 if (!av1_tpl_stats_ready(tpl_data, gf_group_index)) return;
904 const int mi_wide = mi_size_wide[bsize];
905 const int mi_high = mi_size_high[bsize];
906
907 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_group_index];
908 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
909 int tpl_stride = tpl_frame->stride;
910
911 int mi_count = 0;
912 int count = 0;
913 const int mi_col_sr =
914 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
915 const int mi_col_end_sr =
916 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
917 // mi_cols_sr is mi_cols at superres case.
918 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
919
920 // TPL store unit size is not the same as the motion estimation unit size.
921 // Here always use motion estimation size to avoid getting repetitive inter/
922 // intra cost.
923 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
924 assert(mi_size_wide[tpl_bsize] == mi_size_high[tpl_bsize]);
925 const int row_step = mi_size_high[tpl_bsize];
926 const int col_step_sr = coded_to_superres_mi(mi_size_wide[tpl_bsize],
927 cm->superres_scale_denominator);
928
929 // Stride is only based on SB size, and we fill in values for every 16x16
930 // block in a SB.
931 sb_enc->tpl_stride = (mi_col_end_sr - mi_col_sr) / col_step_sr;
932
933 for (int row = mi_row; row < mi_row + mi_high; row += row_step) {
934 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
935 assert(count < MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
936 // Handle partial SB, so that no invalid values are used later.
937 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) {
938 sb_enc->tpl_inter_cost[count] = INT64_MAX;
939 sb_enc->tpl_intra_cost[count] = INT64_MAX;
940 for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
941 sb_enc->tpl_mv[count][i].as_int = INVALID_MV;
942 }
943 count++;
944 continue;
945 }
946
947 TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
948 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
949 sb_enc->tpl_inter_cost[count] = this_stats->inter_cost
950 << TPL_DEP_COST_SCALE_LOG2;
951 sb_enc->tpl_intra_cost[count] = this_stats->intra_cost
952 << TPL_DEP_COST_SCALE_LOG2;
953 memcpy(sb_enc->tpl_mv[count], this_stats->mv, sizeof(this_stats->mv));
954 mi_count++;
955 count++;
956 }
957 }
958
959 assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
960 sb_enc->tpl_data_count = mi_count;
961 }
962
963 // analysis_type 0: Use mc_dep_cost and intra_cost
964 // analysis_type 1: Use count of best inter predictor chosen
965 // analysis_type 2: Use cost reduction from intra to inter for best inter
966 // predictor chosen
av1_get_q_for_deltaq_objective(AV1_COMP * const cpi,ThreadData * td,int64_t * delta_dist,BLOCK_SIZE bsize,int mi_row,int mi_col)967 int av1_get_q_for_deltaq_objective(AV1_COMP *const cpi, ThreadData *td,
968 int64_t *delta_dist, BLOCK_SIZE bsize,
969 int mi_row, int mi_col) {
970 AV1_COMMON *const cm = &cpi->common;
971 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
972 cpi->gf_frame_index < cpi->ppi->gf_group.size));
973 const int tpl_idx = cpi->gf_frame_index;
974 TplParams *const tpl_data = &cpi->ppi->tpl_data;
975 const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
976 double intra_cost = 0;
977 double mc_dep_reg = 0;
978 double mc_dep_cost = 0;
979 double cbcmp_base = 1;
980 double srcrf_dist = 0;
981 double srcrf_sse = 0;
982 double srcrf_rate = 0;
983 const int mi_wide = mi_size_wide[bsize];
984 const int mi_high = mi_size_high[bsize];
985 const int base_qindex = cm->quant_params.base_qindex;
986
987 if (tpl_idx >= MAX_TPL_FRAME_IDX) return base_qindex;
988
989 TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
990 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
991 int tpl_stride = tpl_frame->stride;
992 if (!tpl_frame->is_valid) return base_qindex;
993
994 #ifndef NDEBUG
995 int mi_count = 0;
996 #endif
997 const int mi_col_sr =
998 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
999 const int mi_col_end_sr =
1000 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
1001 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
1002 const int step = 1 << block_mis_log2;
1003 const int row_step = step;
1004 const int col_step_sr =
1005 coded_to_superres_mi(step, cm->superres_scale_denominator);
1006 for (int row = mi_row; row < mi_row + mi_high; row += row_step) {
1007 for (int col = mi_col_sr; col < mi_col_end_sr; col += col_step_sr) {
1008 if (row >= cm->mi_params.mi_rows || col >= mi_cols_sr) continue;
1009 TplDepStats *this_stats =
1010 &tpl_stats[av1_tpl_ptr_pos(row, col, tpl_stride, block_mis_log2)];
1011 double cbcmp = (double)this_stats->srcrf_dist;
1012 int64_t mc_dep_delta =
1013 RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate,
1014 this_stats->mc_dep_dist);
1015 double dist_scaled = (double)(this_stats->recrf_dist << RDDIV_BITS);
1016 intra_cost += log(dist_scaled) * cbcmp;
1017 mc_dep_cost += log(dist_scaled + mc_dep_delta) * cbcmp;
1018 mc_dep_reg += log(3 * dist_scaled + mc_dep_delta) * cbcmp;
1019 srcrf_dist += (double)(this_stats->srcrf_dist << RDDIV_BITS);
1020 srcrf_sse += (double)(this_stats->srcrf_sse << RDDIV_BITS);
1021 srcrf_rate += (double)(this_stats->srcrf_rate << TPL_DEP_COST_SCALE_LOG2);
1022 #ifndef NDEBUG
1023 mi_count++;
1024 #endif
1025 cbcmp_base += cbcmp;
1026 }
1027 }
1028 assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
1029
1030 int offset = 0;
1031 double beta = 1.0;
1032 double rk;
1033 if (mc_dep_cost > 0 && intra_cost > 0) {
1034 const double r0 = cpi->rd.r0;
1035 rk = exp((intra_cost - mc_dep_cost) / cbcmp_base);
1036 td->mb.rb = exp((intra_cost - mc_dep_reg) / cbcmp_base);
1037 beta = (r0 / rk);
1038 assert(beta > 0.0);
1039 } else {
1040 return base_qindex;
1041 }
1042 offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
1043
1044 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1045 offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1);
1046 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1);
1047 int qindex = cm->quant_params.base_qindex + offset;
1048 qindex = AOMMIN(qindex, MAXQ);
1049 qindex = AOMMAX(qindex, MINQ);
1050
1051 int frm_qstep = av1_dc_quant_QTX(base_qindex, 0, cm->seq_params->bit_depth);
1052 int sbs_qstep =
1053 av1_dc_quant_QTX(base_qindex, offset, cm->seq_params->bit_depth);
1054
1055 if (delta_dist) {
1056 double sbs_dist = srcrf_dist * pow((double)sbs_qstep / frm_qstep, 2.0);
1057 double sbs_rate = srcrf_rate * ((double)frm_qstep / sbs_qstep);
1058 sbs_dist = AOMMIN(sbs_dist, srcrf_sse);
1059 *delta_dist = (int64_t)((sbs_dist - srcrf_dist) / rk);
1060 *delta_dist += RDCOST(tpl_frame->base_rdmult, 4 * 256, 0);
1061 *delta_dist += RDCOST(tpl_frame->base_rdmult, sbs_rate - srcrf_rate, 0);
1062 }
1063 return qindex;
1064 }
1065
1066 #if !DISABLE_HDR_LUMA_DELTAQ
1067 // offset table defined in Table3 of T-REC-H.Sup15 document.
1068 static const int hdr_thres[HDR_QP_LEVELS + 1] = { 0, 301, 367, 434, 501, 567,
1069 634, 701, 767, 834, 1024 };
1070
1071 static const int hdr10_qp_offset[HDR_QP_LEVELS] = { 3, 2, 1, 0, -1,
1072 -2, -3, -4, -5, -6 };
1073 #endif
1074
av1_get_q_for_hdr(AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,int mi_row,int mi_col)1075 int av1_get_q_for_hdr(AV1_COMP *const cpi, MACROBLOCK *const x,
1076 BLOCK_SIZE bsize, int mi_row, int mi_col) {
1077 AV1_COMMON *const cm = &cpi->common;
1078 assert(cm->seq_params->bit_depth == AOM_BITS_10);
1079
1080 #if DISABLE_HDR_LUMA_DELTAQ
1081 (void)x;
1082 (void)bsize;
1083 (void)mi_row;
1084 (void)mi_col;
1085 return cm->quant_params.base_qindex;
1086 #else
1087 // calculate pixel average
1088 const int block_luma_avg = av1_log_block_avg(cpi, x, bsize, mi_row, mi_col);
1089 // adjust offset based on average of the pixel block
1090 int offset = 0;
1091 for (int i = 0; i < HDR_QP_LEVELS; i++) {
1092 if (block_luma_avg >= hdr_thres[i] && block_luma_avg < hdr_thres[i + 1]) {
1093 offset = (int)(hdr10_qp_offset[i] * QP_SCALE_FACTOR);
1094 break;
1095 }
1096 }
1097
1098 const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
1099 offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1);
1100 offset = AOMMAX(offset, -delta_q_info->delta_q_res * 9 + 1);
1101 int qindex = cm->quant_params.base_qindex + offset;
1102 qindex = AOMMIN(qindex, MAXQ);
1103 qindex = AOMMAX(qindex, MINQ);
1104
1105 return qindex;
1106 #endif
1107 }
1108 #endif // !CONFIG_REALTIME_ONLY
1109
av1_reset_simple_motion_tree_partition(SIMPLE_MOTION_DATA_TREE * sms_tree,BLOCK_SIZE bsize)1110 void av1_reset_simple_motion_tree_partition(SIMPLE_MOTION_DATA_TREE *sms_tree,
1111 BLOCK_SIZE bsize) {
1112 if (sms_tree == NULL) return;
1113 sms_tree->partitioning = PARTITION_NONE;
1114
1115 if (bsize >= BLOCK_8X8) {
1116 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
1117 for (int idx = 0; idx < 4; ++idx)
1118 av1_reset_simple_motion_tree_partition(sms_tree->split[idx], subsize);
1119 }
1120 }
1121
1122 // Record the ref frames that have been selected by square partition blocks.
av1_update_picked_ref_frames_mask(MACROBLOCK * const x,int ref_type,BLOCK_SIZE bsize,int mib_size,int mi_row,int mi_col)1123 void av1_update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type,
1124 BLOCK_SIZE bsize, int mib_size,
1125 int mi_row, int mi_col) {
1126 assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1127 const int sb_size_mask = mib_size - 1;
1128 const int mi_row_in_sb = mi_row & sb_size_mask;
1129 const int mi_col_in_sb = mi_col & sb_size_mask;
1130 const int mi_size = mi_size_wide[bsize];
1131 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) {
1132 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) {
1133 x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type;
1134 }
1135 }
1136 }
1137
avg_cdf_symbol(aom_cdf_prob * cdf_ptr_left,aom_cdf_prob * cdf_ptr_tr,int num_cdfs,int cdf_stride,int nsymbs,int wt_left,int wt_tr)1138 static void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left, aom_cdf_prob *cdf_ptr_tr,
1139 int num_cdfs, int cdf_stride, int nsymbs,
1140 int wt_left, int wt_tr) {
1141 for (int i = 0; i < num_cdfs; i++) {
1142 for (int j = 0; j <= nsymbs; j++) {
1143 cdf_ptr_left[i * cdf_stride + j] =
1144 (aom_cdf_prob)(((int)cdf_ptr_left[i * cdf_stride + j] * wt_left +
1145 (int)cdf_ptr_tr[i * cdf_stride + j] * wt_tr +
1146 ((wt_left + wt_tr) / 2)) /
1147 (wt_left + wt_tr));
1148 assert(cdf_ptr_left[i * cdf_stride + j] >= 0 &&
1149 cdf_ptr_left[i * cdf_stride + j] < CDF_PROB_TOP);
1150 }
1151 }
1152 }
1153
1154 #define AVERAGE_CDF(cname_left, cname_tr, nsymbs) \
1155 AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, CDF_SIZE(nsymbs))
1156
1157 #define AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, cdf_stride) \
1158 do { \
1159 aom_cdf_prob *cdf_ptr_left = (aom_cdf_prob *)cname_left; \
1160 aom_cdf_prob *cdf_ptr_tr = (aom_cdf_prob *)cname_tr; \
1161 int array_size = (int)sizeof(cname_left) / sizeof(aom_cdf_prob); \
1162 int num_cdfs = array_size / cdf_stride; \
1163 avg_cdf_symbol(cdf_ptr_left, cdf_ptr_tr, num_cdfs, cdf_stride, nsymbs, \
1164 wt_left, wt_tr); \
1165 } while (0)
1166
avg_nmv(nmv_context * nmv_left,nmv_context * nmv_tr,int wt_left,int wt_tr)1167 static void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr, int wt_left,
1168 int wt_tr) {
1169 AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4);
1170 for (int i = 0; i < 2; i++) {
1171 AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf,
1172 MV_CLASSES);
1173 AVERAGE_CDF(nmv_left->comps[i].class0_fp_cdf,
1174 nmv_tr->comps[i].class0_fp_cdf, MV_FP_SIZE);
1175 AVERAGE_CDF(nmv_left->comps[i].fp_cdf, nmv_tr->comps[i].fp_cdf, MV_FP_SIZE);
1176 AVERAGE_CDF(nmv_left->comps[i].sign_cdf, nmv_tr->comps[i].sign_cdf, 2);
1177 AVERAGE_CDF(nmv_left->comps[i].class0_hp_cdf,
1178 nmv_tr->comps[i].class0_hp_cdf, 2);
1179 AVERAGE_CDF(nmv_left->comps[i].hp_cdf, nmv_tr->comps[i].hp_cdf, 2);
1180 AVERAGE_CDF(nmv_left->comps[i].class0_cdf, nmv_tr->comps[i].class0_cdf,
1181 CLASS0_SIZE);
1182 AVERAGE_CDF(nmv_left->comps[i].bits_cdf, nmv_tr->comps[i].bits_cdf, 2);
1183 }
1184 }
1185
1186 // In case of row-based multi-threading of encoder, since we always
1187 // keep a top - right sync, we can average the top - right SB's CDFs and
1188 // the left SB's CDFs and use the same for current SB's encoding to
1189 // improve the performance. This function facilitates the averaging
1190 // of CDF and used only when row-mt is enabled in encoder.
av1_avg_cdf_symbols(FRAME_CONTEXT * ctx_left,FRAME_CONTEXT * ctx_tr,int wt_left,int wt_tr)1191 void av1_avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
1192 int wt_left, int wt_tr) {
1193 AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2);
1194 AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2);
1195 AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2);
1196 AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5);
1197 AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6);
1198 AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7);
1199 AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8);
1200 AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9);
1201 AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10);
1202 AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11);
1203 AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3);
1204 AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4);
1205 AVERAGE_CDF(ctx_left->coeff_br_cdf, ctx_tr->coeff_br_cdf, BR_CDF_SIZE);
1206 AVERAGE_CDF(ctx_left->newmv_cdf, ctx_tr->newmv_cdf, 2);
1207 AVERAGE_CDF(ctx_left->zeromv_cdf, ctx_tr->zeromv_cdf, 2);
1208 AVERAGE_CDF(ctx_left->refmv_cdf, ctx_tr->refmv_cdf, 2);
1209 AVERAGE_CDF(ctx_left->drl_cdf, ctx_tr->drl_cdf, 2);
1210 AVERAGE_CDF(ctx_left->inter_compound_mode_cdf,
1211 ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
1212 AVERAGE_CDF(ctx_left->compound_type_cdf, ctx_tr->compound_type_cdf,
1213 MASKED_COMPOUND_TYPES);
1214 AVERAGE_CDF(ctx_left->wedge_idx_cdf, ctx_tr->wedge_idx_cdf, 16);
1215 AVERAGE_CDF(ctx_left->interintra_cdf, ctx_tr->interintra_cdf, 2);
1216 AVERAGE_CDF(ctx_left->wedge_interintra_cdf, ctx_tr->wedge_interintra_cdf, 2);
1217 AVERAGE_CDF(ctx_left->interintra_mode_cdf, ctx_tr->interintra_mode_cdf,
1218 INTERINTRA_MODES);
1219 AVERAGE_CDF(ctx_left->motion_mode_cdf, ctx_tr->motion_mode_cdf, MOTION_MODES);
1220 AVERAGE_CDF(ctx_left->obmc_cdf, ctx_tr->obmc_cdf, 2);
1221 AVERAGE_CDF(ctx_left->palette_y_size_cdf, ctx_tr->palette_y_size_cdf,
1222 PALETTE_SIZES);
1223 AVERAGE_CDF(ctx_left->palette_uv_size_cdf, ctx_tr->palette_uv_size_cdf,
1224 PALETTE_SIZES);
1225 for (int j = 0; j < PALETTE_SIZES; j++) {
1226 int nsymbs = j + PALETTE_MIN_SIZE;
1227 AVG_CDF_STRIDE(ctx_left->palette_y_color_index_cdf[j],
1228 ctx_tr->palette_y_color_index_cdf[j], nsymbs,
1229 CDF_SIZE(PALETTE_COLORS));
1230 AVG_CDF_STRIDE(ctx_left->palette_uv_color_index_cdf[j],
1231 ctx_tr->palette_uv_color_index_cdf[j], nsymbs,
1232 CDF_SIZE(PALETTE_COLORS));
1233 }
1234 AVERAGE_CDF(ctx_left->palette_y_mode_cdf, ctx_tr->palette_y_mode_cdf, 2);
1235 AVERAGE_CDF(ctx_left->palette_uv_mode_cdf, ctx_tr->palette_uv_mode_cdf, 2);
1236 AVERAGE_CDF(ctx_left->comp_inter_cdf, ctx_tr->comp_inter_cdf, 2);
1237 AVERAGE_CDF(ctx_left->single_ref_cdf, ctx_tr->single_ref_cdf, 2);
1238 AVERAGE_CDF(ctx_left->comp_ref_type_cdf, ctx_tr->comp_ref_type_cdf, 2);
1239 AVERAGE_CDF(ctx_left->uni_comp_ref_cdf, ctx_tr->uni_comp_ref_cdf, 2);
1240 AVERAGE_CDF(ctx_left->comp_ref_cdf, ctx_tr->comp_ref_cdf, 2);
1241 AVERAGE_CDF(ctx_left->comp_bwdref_cdf, ctx_tr->comp_bwdref_cdf, 2);
1242 AVERAGE_CDF(ctx_left->txfm_partition_cdf, ctx_tr->txfm_partition_cdf, 2);
1243 AVERAGE_CDF(ctx_left->compound_index_cdf, ctx_tr->compound_index_cdf, 2);
1244 AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2);
1245 AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2);
1246 AVERAGE_CDF(ctx_left->skip_txfm_cdfs, ctx_tr->skip_txfm_cdfs, 2);
1247 AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2);
1248 avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr);
1249 avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr);
1250 AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2);
1251 AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2);
1252 AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf,
1253 ctx_tr->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
1254 AVERAGE_CDF(ctx_left->filter_intra_cdfs, ctx_tr->filter_intra_cdfs, 2);
1255 AVERAGE_CDF(ctx_left->filter_intra_mode_cdf, ctx_tr->filter_intra_mode_cdf,
1256 FILTER_INTRA_MODES);
1257 AVERAGE_CDF(ctx_left->switchable_restore_cdf, ctx_tr->switchable_restore_cdf,
1258 RESTORE_SWITCHABLE_TYPES);
1259 AVERAGE_CDF(ctx_left->wiener_restore_cdf, ctx_tr->wiener_restore_cdf, 2);
1260 AVERAGE_CDF(ctx_left->sgrproj_restore_cdf, ctx_tr->sgrproj_restore_cdf, 2);
1261 AVERAGE_CDF(ctx_left->y_mode_cdf, ctx_tr->y_mode_cdf, INTRA_MODES);
1262 AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0],
1263 UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES));
1264 AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES);
1265 for (int i = 0; i < PARTITION_CONTEXTS; i++) {
1266 if (i < 4) {
1267 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 4,
1268 CDF_SIZE(10));
1269 } else if (i < 16) {
1270 AVERAGE_CDF(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 10);
1271 } else {
1272 AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 8,
1273 CDF_SIZE(10));
1274 }
1275 }
1276 AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf,
1277 SWITCHABLE_FILTERS);
1278 AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES);
1279 AVERAGE_CDF(ctx_left->angle_delta_cdf, ctx_tr->angle_delta_cdf,
1280 2 * MAX_ANGLE_DELTA + 1);
1281 AVG_CDF_STRIDE(ctx_left->tx_size_cdf[0], ctx_tr->tx_size_cdf[0], MAX_TX_DEPTH,
1282 CDF_SIZE(MAX_TX_DEPTH + 1));
1283 AVERAGE_CDF(ctx_left->tx_size_cdf[1], ctx_tr->tx_size_cdf[1],
1284 MAX_TX_DEPTH + 1);
1285 AVERAGE_CDF(ctx_left->tx_size_cdf[2], ctx_tr->tx_size_cdf[2],
1286 MAX_TX_DEPTH + 1);
1287 AVERAGE_CDF(ctx_left->tx_size_cdf[3], ctx_tr->tx_size_cdf[3],
1288 MAX_TX_DEPTH + 1);
1289 AVERAGE_CDF(ctx_left->delta_q_cdf, ctx_tr->delta_q_cdf, DELTA_Q_PROBS + 1);
1290 AVERAGE_CDF(ctx_left->delta_lf_cdf, ctx_tr->delta_lf_cdf, DELTA_LF_PROBS + 1);
1291 for (int i = 0; i < FRAME_LF_COUNT; i++) {
1292 AVERAGE_CDF(ctx_left->delta_lf_multi_cdf[i], ctx_tr->delta_lf_multi_cdf[i],
1293 DELTA_LF_PROBS + 1);
1294 }
1295 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], 7,
1296 CDF_SIZE(TX_TYPES));
1297 AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], 5,
1298 CDF_SIZE(TX_TYPES));
1299 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16,
1300 CDF_SIZE(TX_TYPES));
1301 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12,
1302 CDF_SIZE(TX_TYPES));
1303 AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[3], ctx_tr->inter_ext_tx_cdf[3], 2,
1304 CDF_SIZE(TX_TYPES));
1305 AVERAGE_CDF(ctx_left->cfl_sign_cdf, ctx_tr->cfl_sign_cdf, CFL_JOINT_SIGNS);
1306 AVERAGE_CDF(ctx_left->cfl_alpha_cdf, ctx_tr->cfl_alpha_cdf,
1307 CFL_ALPHABET_SIZE);
1308 }
1309
1310 // Check neighbor blocks' motion information.
check_neighbor_blocks(MB_MODE_INFO ** mi,int mi_stride,const TileInfo * const tile_info,int mi_row,int mi_col)1311 static int check_neighbor_blocks(MB_MODE_INFO **mi, int mi_stride,
1312 const TileInfo *const tile_info, int mi_row,
1313 int mi_col) {
1314 int is_above_low_motion = 1;
1315 int is_left_low_motion = 1;
1316 const int thr = 24;
1317
1318 // Check above block.
1319 if (mi_row > tile_info->mi_row_start) {
1320 const MB_MODE_INFO *above_mbmi = mi[-mi_stride];
1321 const int_mv above_mv = above_mbmi->mv[0];
1322 if (above_mbmi->mode >= INTRA_MODE_END &&
1323 (abs(above_mv.as_mv.row) > thr || abs(above_mv.as_mv.col) > thr))
1324 is_above_low_motion = 0;
1325 }
1326
1327 // Check left block.
1328 if (mi_col > tile_info->mi_col_start) {
1329 const MB_MODE_INFO *left_mbmi = mi[-1];
1330 const int_mv left_mv = left_mbmi->mv[0];
1331 if (left_mbmi->mode >= INTRA_MODE_END &&
1332 (abs(left_mv.as_mv.row) > thr || abs(left_mv.as_mv.col) > thr))
1333 is_left_low_motion = 0;
1334 }
1335
1336 return (is_above_low_motion && is_left_low_motion);
1337 }
1338
1339 // Check this block's motion in a fast way.
fast_detect_non_zero_motion(AV1_COMP * cpi,const uint8_t * src_y,int src_ystride,const uint8_t * last_src_y,int last_src_ystride,int mi_row,int mi_col)1340 static int fast_detect_non_zero_motion(AV1_COMP *cpi, const uint8_t *src_y,
1341 int src_ystride,
1342 const uint8_t *last_src_y,
1343 int last_src_ystride, int mi_row,
1344 int mi_col) {
1345 AV1_COMMON *const cm = &cpi->common;
1346 const BLOCK_SIZE bsize = cm->seq_params->sb_size;
1347 unsigned int blk_sad = INT_MAX;
1348 if (cpi->src_sad_blk_64x64 != NULL) {
1349 const int sb_size_by_mb = (bsize == BLOCK_128X128)
1350 ? (cm->seq_params->mib_size >> 1)
1351 : cm->seq_params->mib_size;
1352 const int sb_cols =
1353 (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
1354 const int sbi_col = mi_col / sb_size_by_mb;
1355 const int sbi_row = mi_row / sb_size_by_mb;
1356 blk_sad = (unsigned int)cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
1357 } else {
1358 blk_sad = cpi->ppi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
1359 last_src_ystride);
1360 }
1361
1362 // Search 4 1-away points.
1363 const uint8_t *const search_pos[4] = {
1364 last_src_y - last_src_ystride,
1365 last_src_y - 1,
1366 last_src_y + 1,
1367 last_src_y + last_src_ystride,
1368 };
1369 unsigned int sad_arr[4];
1370 cpi->ppi->fn_ptr[bsize].sdx4df(src_y, src_ystride, search_pos,
1371 last_src_ystride, sad_arr);
1372
1373 blk_sad = (blk_sad * 5) >> 3;
1374 return (blk_sad < sad_arr[0] && blk_sad < sad_arr[1] &&
1375 blk_sad < sad_arr[2] && blk_sad < sad_arr[3]);
1376 }
1377
1378 // Grade the temporal variation of the source by comparing the current sb and
1379 // its collocated block in the last frame.
av1_source_content_sb(AV1_COMP * cpi,MACROBLOCK * x,TileDataEnc * tile_data,int mi_row,int mi_col)1380 void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
1381 int mi_row, int mi_col) {
1382 if (cpi->last_source->y_width != cpi->source->y_width ||
1383 cpi->last_source->y_height != cpi->source->y_height)
1384 return;
1385 #if CONFIG_AV1_HIGHBITDEPTH
1386 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) return;
1387 #endif
1388
1389 unsigned int tmp_sse;
1390 unsigned int tmp_variance;
1391 const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
1392 uint8_t *src_y = cpi->source->y_buffer;
1393 const int src_ystride = cpi->source->y_stride;
1394 const int src_offset = src_ystride * (mi_row << 2) + (mi_col << 2);
1395 uint8_t *last_src_y = cpi->last_source->y_buffer;
1396 const int last_src_ystride = cpi->last_source->y_stride;
1397 const int last_src_offset = last_src_ystride * (mi_row << 2) + (mi_col << 2);
1398 uint64_t avg_source_sse_threshold_verylow = 10000; // ~1.5*1.5*(64*64)
1399 uint64_t avg_source_sse_threshold_low[2] = { 100000, // ~5*5*(64*64)
1400 36000 }; // ~3*3*(64*64)
1401
1402 uint64_t avg_source_sse_threshold_high = 1000000; // ~15*15*(64*64)
1403 if (cpi->sf.rt_sf.increase_source_sad_thresh) {
1404 avg_source_sse_threshold_high = avg_source_sse_threshold_high << 1;
1405 avg_source_sse_threshold_low[0] = avg_source_sse_threshold_low[0] << 1;
1406 avg_source_sse_threshold_verylow = avg_source_sse_threshold_verylow << 1;
1407 }
1408 uint64_t sum_sq_thresh = 10000; // sum = sqrt(thresh / 64*64)) ~1.5
1409 src_y += src_offset;
1410 last_src_y += last_src_offset;
1411 tmp_variance = cpi->ppi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
1412 last_src_ystride, &tmp_sse);
1413 // rd thresholds
1414 if (tmp_sse < avg_source_sse_threshold_low[1])
1415 x->content_state_sb.source_sad_rd = kLowSad;
1416
1417 // nonrd thresholds
1418 if (tmp_sse == 0) {
1419 x->content_state_sb.source_sad_nonrd = kZeroSad;
1420 return;
1421 }
1422 if (tmp_sse < avg_source_sse_threshold_verylow)
1423 x->content_state_sb.source_sad_nonrd = kVeryLowSad;
1424 else if (tmp_sse < avg_source_sse_threshold_low[0])
1425 x->content_state_sb.source_sad_nonrd = kLowSad;
1426 else if (tmp_sse > avg_source_sse_threshold_high)
1427 x->content_state_sb.source_sad_nonrd = kHighSad;
1428
1429 // Detect large lighting change.
1430 // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
1431 if (tmp_variance < (tmp_sse >> 1) && (tmp_sse - tmp_variance) > sum_sq_thresh)
1432 x->content_state_sb.lighting_change = 1;
1433 if ((tmp_sse - tmp_variance) < (sum_sq_thresh >> 1))
1434 x->content_state_sb.low_sumdiff = 1;
1435
1436 if (tmp_sse > ((avg_source_sse_threshold_high * 7) >> 3) &&
1437 !x->content_state_sb.lighting_change && !x->content_state_sb.low_sumdiff)
1438 x->sb_force_fixed_part = 0;
1439
1440 if (!cpi->sf.rt_sf.use_rtc_tf || cpi->rc.high_source_sad ||
1441 cpi->rc.frame_source_sad > 20000 || cpi->svc.number_spatial_layers > 1)
1442 return;
1443
1444 // In-place temporal filter. If psnr calculation is enabled, we store the
1445 // source for that.
1446 AV1_COMMON *const cm = &cpi->common;
1447 // Calculate n*mean^2
1448 const unsigned int nmean2 = tmp_sse - tmp_variance;
1449 const int ac_q_step = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
1450 cm->seq_params->bit_depth);
1451 const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
1452 const int avg_q_step = av1_ac_quant_QTX(p_rc->avg_frame_qindex[INTER_FRAME],
1453 0, cm->seq_params->bit_depth);
1454
1455 const unsigned int threshold =
1456 (cpi->sf.rt_sf.use_rtc_tf == 1)
1457 ? (clamp(avg_q_step, 250, 1000)) * ac_q_step
1458 : 250 * ac_q_step;
1459
1460 // TODO(yunqing): use a weighted sum instead of averaging in filtering.
1461 if (tmp_variance <= threshold && nmean2 <= 15) {
1462 // Check neighbor blocks. If neighbor blocks aren't low-motion blocks,
1463 // skip temporal filtering for this block.
1464 MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
1465 get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
1466 const TileInfo *const tile_info = &tile_data->tile_info;
1467 const int is_neighbor_blocks_low_motion = check_neighbor_blocks(
1468 mi, cm->mi_params.mi_stride, tile_info, mi_row, mi_col);
1469 if (!is_neighbor_blocks_low_motion) return;
1470
1471 // Only consider 64x64 SB for now. Need to extend to 128x128 for large SB
1472 // size.
1473 // Test several nearby points. If non-zero mv exists, don't do temporal
1474 // filtering.
1475 const int is_this_blk_low_motion = fast_detect_non_zero_motion(
1476 cpi, src_y, src_ystride, last_src_y, last_src_ystride, mi_row, mi_col);
1477
1478 if (!is_this_blk_low_motion) return;
1479
1480 const int shift_x[2] = { 0, cpi->source->subsampling_x };
1481 const int shift_y[2] = { 0, cpi->source->subsampling_y };
1482 const uint8_t h = block_size_high[bsize];
1483 const uint8_t w = block_size_wide[bsize];
1484
1485 for (int plane = 0; plane < av1_num_planes(cm); ++plane) {
1486 uint8_t *src = cpi->source->buffers[plane];
1487 const int src_stride = cpi->source->strides[plane != 0];
1488 uint8_t *last_src = cpi->last_source->buffers[plane];
1489 const int last_src_stride = cpi->last_source->strides[plane != 0];
1490 src += src_stride * (mi_row << (2 - shift_y[plane != 0])) +
1491 (mi_col << (2 - shift_x[plane != 0]));
1492 last_src += last_src_stride * (mi_row << (2 - shift_y[plane != 0])) +
1493 (mi_col << (2 - shift_x[plane != 0]));
1494
1495 for (int i = 0; i < (h >> shift_y[plane != 0]); ++i) {
1496 for (int j = 0; j < (w >> shift_x[plane != 0]); ++j) {
1497 src[j] = (last_src[j] + src[j]) >> 1;
1498 }
1499 src += src_stride;
1500 last_src += last_src_stride;
1501 }
1502 }
1503 }
1504 }
1505
1506 // Memset the mbmis at the current superblock to 0
av1_reset_mbmi(CommonModeInfoParams * const mi_params,BLOCK_SIZE sb_size,int mi_row,int mi_col)1507 void av1_reset_mbmi(CommonModeInfoParams *const mi_params, BLOCK_SIZE sb_size,
1508 int mi_row, int mi_col) {
1509 // size of sb in unit of mi (BLOCK_4X4)
1510 const int sb_size_mi = mi_size_wide[sb_size];
1511 const int mi_alloc_size_1d = mi_size_wide[mi_params->mi_alloc_bsize];
1512 // size of sb in unit of allocated mi size
1513 const int sb_size_alloc_mi = mi_size_wide[sb_size] / mi_alloc_size_1d;
1514 assert(mi_params->mi_alloc_stride % sb_size_alloc_mi == 0 &&
1515 "mi is not allocated as a multiple of sb!");
1516 assert(mi_params->mi_stride % sb_size_mi == 0 &&
1517 "mi_grid_base is not allocated as a multiple of sb!");
1518
1519 const int mi_rows = mi_size_high[sb_size];
1520 for (int cur_mi_row = 0; cur_mi_row < mi_rows; cur_mi_row++) {
1521 assert(get_mi_grid_idx(mi_params, 0, mi_col + mi_alloc_size_1d) <
1522 mi_params->mi_stride);
1523 const int mi_grid_idx =
1524 get_mi_grid_idx(mi_params, mi_row + cur_mi_row, mi_col);
1525 const int alloc_mi_idx =
1526 get_alloc_mi_idx(mi_params, mi_row + cur_mi_row, mi_col);
1527 memset(&mi_params->mi_grid_base[mi_grid_idx], 0,
1528 sb_size_mi * sizeof(*mi_params->mi_grid_base));
1529 memset(&mi_params->tx_type_map[mi_grid_idx], 0,
1530 sb_size_mi * sizeof(*mi_params->tx_type_map));
1531 if (cur_mi_row % mi_alloc_size_1d == 0) {
1532 memset(&mi_params->mi_alloc[alloc_mi_idx], 0,
1533 sb_size_alloc_mi * sizeof(*mi_params->mi_alloc));
1534 }
1535 }
1536 }
1537
av1_backup_sb_state(SB_FIRST_PASS_STATS * sb_fp_stats,const AV1_COMP * cpi,ThreadData * td,const TileDataEnc * tile_data,int mi_row,int mi_col)1538 void av1_backup_sb_state(SB_FIRST_PASS_STATS *sb_fp_stats, const AV1_COMP *cpi,
1539 ThreadData *td, const TileDataEnc *tile_data,
1540 int mi_row, int mi_col) {
1541 MACROBLOCK *x = &td->mb;
1542 MACROBLOCKD *xd = &x->e_mbd;
1543 const TileInfo *tile_info = &tile_data->tile_info;
1544
1545 const AV1_COMMON *cm = &cpi->common;
1546 const int num_planes = av1_num_planes(cm);
1547 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1548
1549 xd->above_txfm_context =
1550 cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
1551 xd->left_txfm_context =
1552 xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1553 av1_save_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes);
1554
1555 sb_fp_stats->rd_count = td->rd_counts;
1556 sb_fp_stats->split_count = x->txfm_search_info.txb_split_count;
1557
1558 sb_fp_stats->fc = *td->counts;
1559
1560 // Don't copy in row_mt case, otherwise run into data race. No behavior change
1561 // in row_mt case.
1562 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1563 memcpy(sb_fp_stats->inter_mode_rd_models, tile_data->inter_mode_rd_models,
1564 sizeof(sb_fp_stats->inter_mode_rd_models));
1565 }
1566
1567 memcpy(sb_fp_stats->thresh_freq_fact, x->thresh_freq_fact,
1568 sizeof(sb_fp_stats->thresh_freq_fact));
1569
1570 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
1571 sb_fp_stats->current_qindex =
1572 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
1573
1574 #if CONFIG_INTERNAL_STATS
1575 memcpy(sb_fp_stats->mode_chosen_counts, cpi->mode_chosen_counts,
1576 sizeof(sb_fp_stats->mode_chosen_counts));
1577 #endif // CONFIG_INTERNAL_STATS
1578 }
1579
av1_restore_sb_state(const SB_FIRST_PASS_STATS * sb_fp_stats,AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,int mi_col)1580 void av1_restore_sb_state(const SB_FIRST_PASS_STATS *sb_fp_stats, AV1_COMP *cpi,
1581 ThreadData *td, TileDataEnc *tile_data, int mi_row,
1582 int mi_col) {
1583 MACROBLOCK *x = &td->mb;
1584
1585 const AV1_COMMON *cm = &cpi->common;
1586 const int num_planes = av1_num_planes(cm);
1587 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
1588
1589 av1_restore_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size,
1590 num_planes);
1591
1592 td->rd_counts = sb_fp_stats->rd_count;
1593 x->txfm_search_info.txb_split_count = sb_fp_stats->split_count;
1594
1595 *td->counts = sb_fp_stats->fc;
1596
1597 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1598 memcpy(tile_data->inter_mode_rd_models, sb_fp_stats->inter_mode_rd_models,
1599 sizeof(sb_fp_stats->inter_mode_rd_models));
1600 }
1601
1602 memcpy(x->thresh_freq_fact, sb_fp_stats->thresh_freq_fact,
1603 sizeof(sb_fp_stats->thresh_freq_fact));
1604
1605 const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
1606 cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
1607 sb_fp_stats->current_qindex;
1608
1609 #if CONFIG_INTERNAL_STATS
1610 memcpy(cpi->mode_chosen_counts, sb_fp_stats->mode_chosen_counts,
1611 sizeof(sb_fp_stats->mode_chosen_counts));
1612 #endif // CONFIG_INTERNAL_STATS
1613 }
1614
1615 /*! Checks whether to skip updating the entropy cost based on tile info.
1616 *
1617 * This function contains the common code used to skip the cost update of coeff,
1618 * mode, mv and dv symbols.
1619 */
skip_cost_update(const SequenceHeader * seq_params,const TileInfo * const tile_info,const int mi_row,const int mi_col,INTERNAL_COST_UPDATE_TYPE upd_level)1620 static int skip_cost_update(const SequenceHeader *seq_params,
1621 const TileInfo *const tile_info, const int mi_row,
1622 const int mi_col,
1623 INTERNAL_COST_UPDATE_TYPE upd_level) {
1624 if (upd_level == INTERNAL_COST_UPD_SB) return 0;
1625 if (upd_level == INTERNAL_COST_UPD_OFF) return 1;
1626
1627 // upd_level is at most as frequent as each sb_row in a tile.
1628 if (mi_col != tile_info->mi_col_start) return 1;
1629
1630 if (upd_level == INTERNAL_COST_UPD_SBROW_SET) {
1631 const int mib_size_log2 = seq_params->mib_size_log2;
1632 const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
1633 const int sb_size = seq_params->mib_size * MI_SIZE;
1634 const int tile_height =
1635 (tile_info->mi_row_end - tile_info->mi_row_start) * MI_SIZE;
1636 // When upd_level = INTERNAL_COST_UPD_SBROW_SET, the cost update happens
1637 // once for 2, 4 sb rows for sb size 128, sb size 64 respectively. However,
1638 // as the update will not be equally spaced in smaller resolutions making
1639 // it equally spaced by calculating (mv_num_rows_cost_update) the number of
1640 // rows after which the cost update should happen.
1641 const int sb_size_update_freq_map[2] = { 2, 4 };
1642 const int update_freq_sb_rows =
1643 sb_size_update_freq_map[sb_size != MAX_SB_SIZE];
1644 const int update_freq_num_rows = sb_size * update_freq_sb_rows;
1645 // Round-up the division result to next integer.
1646 const int num_updates_per_tile =
1647 (tile_height + update_freq_num_rows - 1) / update_freq_num_rows;
1648 const int num_rows_update_per_tile = num_updates_per_tile * sb_size;
1649 // Round-up the division result to next integer.
1650 const int num_sb_rows_per_update =
1651 (tile_height + num_rows_update_per_tile - 1) / num_rows_update_per_tile;
1652 if ((sb_row % num_sb_rows_per_update) != 0) return 1;
1653 }
1654 return 0;
1655 }
1656
1657 // Checks for skip status of mv cost update.
skip_mv_cost_update(AV1_COMP * cpi,const TileInfo * const tile_info,const int mi_row,const int mi_col)1658 static int skip_mv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info,
1659 const int mi_row, const int mi_col) {
1660 const AV1_COMMON *cm = &cpi->common;
1661 // For intra frames, mv cdfs are not updated during the encode. Hence, the mv
1662 // cost calculation is skipped in this case.
1663 if (frame_is_intra_only(cm)) return 1;
1664
1665 return skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col,
1666 cpi->sf.inter_sf.mv_cost_upd_level);
1667 }
1668
1669 // Checks for skip status of dv cost update.
skip_dv_cost_update(AV1_COMP * cpi,const TileInfo * const tile_info,const int mi_row,const int mi_col)1670 static int skip_dv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info,
1671 const int mi_row, const int mi_col) {
1672 const AV1_COMMON *cm = &cpi->common;
1673 // Intrabc is only applicable to intra frames. So skip if intrabc is not
1674 // allowed.
1675 if (!av1_allow_intrabc(cm) || is_stat_generation_stage(cpi)) {
1676 return 1;
1677 }
1678
1679 return skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col,
1680 cpi->sf.intra_sf.dv_cost_upd_level);
1681 }
1682
1683 // Update the rate costs of some symbols according to the frequency directed
1684 // by speed features
av1_set_cost_upd_freq(AV1_COMP * cpi,ThreadData * td,const TileInfo * const tile_info,const int mi_row,const int mi_col)1685 void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
1686 const TileInfo *const tile_info, const int mi_row,
1687 const int mi_col) {
1688 AV1_COMMON *const cm = &cpi->common;
1689 const int num_planes = av1_num_planes(cm);
1690 MACROBLOCK *const x = &td->mb;
1691 MACROBLOCKD *const xd = &x->e_mbd;
1692
1693 if (cm->features.disable_cdf_update) {
1694 return;
1695 }
1696
1697 switch (cpi->sf.inter_sf.coeff_cost_upd_level) {
1698 case INTERNAL_COST_UPD_OFF:
1699 case INTERNAL_COST_UPD_TILE: // Tile level
1700 break;
1701 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile
1702 case INTERNAL_COST_UPD_SBROW: // SB row level in tile
1703 case INTERNAL_COST_UPD_SB: // SB level
1704 if (skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col,
1705 cpi->sf.inter_sf.coeff_cost_upd_level))
1706 break;
1707 av1_fill_coeff_costs(&x->coeff_costs, xd->tile_ctx, num_planes);
1708 break;
1709 default: assert(0);
1710 }
1711
1712 switch (cpi->sf.inter_sf.mode_cost_upd_level) {
1713 case INTERNAL_COST_UPD_OFF:
1714 case INTERNAL_COST_UPD_TILE: // Tile level
1715 break;
1716 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile
1717 case INTERNAL_COST_UPD_SBROW: // SB row level in tile
1718 case INTERNAL_COST_UPD_SB: // SB level
1719 if (skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col,
1720 cpi->sf.inter_sf.mode_cost_upd_level))
1721 break;
1722 av1_fill_mode_rates(cm, &x->mode_costs, xd->tile_ctx);
1723 break;
1724 default: assert(0);
1725 }
1726
1727 switch (cpi->sf.inter_sf.mv_cost_upd_level) {
1728 case INTERNAL_COST_UPD_OFF:
1729 case INTERNAL_COST_UPD_TILE: // Tile level
1730 break;
1731 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile
1732 case INTERNAL_COST_UPD_SBROW: // SB row level in tile
1733 case INTERNAL_COST_UPD_SB: // SB level
1734 // Checks for skip status of mv cost update.
1735 if (skip_mv_cost_update(cpi, tile_info, mi_row, mi_col)) break;
1736 av1_fill_mv_costs(&xd->tile_ctx->nmvc,
1737 cm->features.cur_frame_force_integer_mv,
1738 cm->features.allow_high_precision_mv, x->mv_costs);
1739 break;
1740 default: assert(0);
1741 }
1742
1743 switch (cpi->sf.intra_sf.dv_cost_upd_level) {
1744 case INTERNAL_COST_UPD_OFF:
1745 case INTERNAL_COST_UPD_TILE: // Tile level
1746 break;
1747 case INTERNAL_COST_UPD_SBROW_SET: // SB row set level in tile
1748 case INTERNAL_COST_UPD_SBROW: // SB row level in tile
1749 case INTERNAL_COST_UPD_SB: // SB level
1750 // Checks for skip status of dv cost update.
1751 if (skip_dv_cost_update(cpi, tile_info, mi_row, mi_col)) break;
1752 av1_fill_dv_costs(&xd->tile_ctx->ndvc, x->dv_costs);
1753 break;
1754 default: assert(0);
1755 }
1756 }
1757
av1_dealloc_src_diff_buf(struct macroblock * mb,int num_planes)1758 void av1_dealloc_src_diff_buf(struct macroblock *mb, int num_planes) {
1759 for (int plane = 0; plane < num_planes; ++plane) {
1760 aom_free(mb->plane[plane].src_diff);
1761 mb->plane[plane].src_diff = NULL;
1762 }
1763 }
1764
av1_alloc_src_diff_buf(const struct AV1Common * cm,struct macroblock * mb)1765 void av1_alloc_src_diff_buf(const struct AV1Common *cm, struct macroblock *mb) {
1766 const int num_planes = av1_num_planes(cm);
1767 #ifndef NDEBUG
1768 for (int plane = 0; plane < num_planes; ++plane) {
1769 assert(!mb->plane[plane].src_diff);
1770 }
1771 #endif
1772 for (int plane = 0; plane < num_planes; ++plane) {
1773 const int subsampling_xy =
1774 plane ? cm->seq_params->subsampling_x + cm->seq_params->subsampling_y
1775 : 0;
1776 const int sb_size = MAX_SB_SQUARE >> subsampling_xy;
1777 CHECK_MEM_ERROR(cm, mb->plane[plane].src_diff,
1778 (int16_t *)aom_memalign(
1779 32, sizeof(*mb->plane[plane].src_diff) * sb_size));
1780 }
1781 }
1782