1 /*
2 * Copyright (c) 2023 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12
13 #include "./vpx_dsp_rtcd.h"
14 #if CONFIG_NON_GREEDY_MV
15 #include "vp9/common/vp9_mvref_common.h"
16 #endif
17 #include "vp9/common/vp9_reconinter.h"
18 #include "vp9/common/vp9_reconintra.h"
19 #include "vp9/common/vp9_scan.h"
20 #include "vp9/encoder/vp9_encoder.h"
21 #include "vp9/encoder/vp9_tpl_model.h"
22 #include "vpx/internal/vpx_codec_internal.h"
23 #include "vpx/vpx_codec.h"
24
init_gop_frames(VP9_COMP * cpi,GF_PICTURE * gf_picture,const GF_GROUP * gf_group,int * tpl_group_frames)25 static int init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
26 const GF_GROUP *gf_group, int *tpl_group_frames) {
27 VP9_COMMON *cm = &cpi->common;
28 int frame_idx = 0;
29 int i;
30 int gld_index = -1;
31 int alt_index = -1;
32 int lst_index = -1;
33 int arf_index_stack[MAX_ARF_LAYERS];
34 int arf_stack_size = 0;
35 int extend_frame_count = 0;
36 int pframe_qindex = cpi->tpl_stats[2].base_qindex;
37 int frame_gop_offset = 0;
38
39 RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
40 int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
41
42 memset(recon_frame_index, -1, sizeof(recon_frame_index));
43 stack_init(arf_index_stack, MAX_ARF_LAYERS);
44
45 for (i = 0; i < FRAME_BUFFERS; ++i) {
46 if (frame_bufs[i].ref_count == 0) {
47 alloc_frame_mvs(cm, i);
48 if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
49 cm->subsampling_x, cm->subsampling_y,
50 #if CONFIG_VP9_HIGHBITDEPTH
51 cm->use_highbitdepth,
52 #endif
53 VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
54 NULL, NULL, NULL))
55 vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
56 "Failed to allocate frame buffer");
57
58 recon_frame_index[frame_idx] = i;
59 ++frame_idx;
60
61 if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
62 }
63 }
64
65 for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
66 assert(recon_frame_index[i] >= 0);
67 cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
68 }
69
70 *tpl_group_frames = 0;
71
72 // Initialize Golden reference frame.
73 gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
74 for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
75 gf_picture[0].update_type = gf_group->update_type[0];
76 gld_index = 0;
77 ++*tpl_group_frames;
78
79 // Initialize base layer ARF frame
80 gf_picture[1].frame = cpi->Source;
81 gf_picture[1].ref_frame[0] = gld_index;
82 gf_picture[1].ref_frame[1] = lst_index;
83 gf_picture[1].ref_frame[2] = alt_index;
84 gf_picture[1].update_type = gf_group->update_type[1];
85 alt_index = 1;
86 ++*tpl_group_frames;
87
88 // Initialize P frames
89 for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
90 struct lookahead_entry *buf;
91 frame_gop_offset = gf_group->frame_gop_index[frame_idx];
92 buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
93
94 if (buf == NULL) break;
95
96 gf_picture[frame_idx].frame = &buf->img;
97 gf_picture[frame_idx].ref_frame[0] = gld_index;
98 gf_picture[frame_idx].ref_frame[1] = lst_index;
99 gf_picture[frame_idx].ref_frame[2] = alt_index;
100 gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
101
102 switch (gf_group->update_type[frame_idx]) {
103 case ARF_UPDATE:
104 stack_push(arf_index_stack, alt_index, arf_stack_size);
105 ++arf_stack_size;
106 alt_index = frame_idx;
107 break;
108 case LF_UPDATE: lst_index = frame_idx; break;
109 case OVERLAY_UPDATE:
110 gld_index = frame_idx;
111 alt_index = stack_pop(arf_index_stack, arf_stack_size);
112 --arf_stack_size;
113 break;
114 case USE_BUF_FRAME:
115 lst_index = alt_index;
116 alt_index = stack_pop(arf_index_stack, arf_stack_size);
117 --arf_stack_size;
118 break;
119 default: break;
120 }
121
122 ++*tpl_group_frames;
123
124 // The length of group of pictures is baseline_gf_interval, plus the
125 // beginning golden frame from last GOP, plus the last overlay frame in
126 // the same GOP.
127 if (frame_idx == gf_group->gf_group_size) break;
128 }
129
130 alt_index = -1;
131 ++frame_idx;
132 ++frame_gop_offset;
133
134 // Extend two frames outside the current gf group.
135 for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
136 struct lookahead_entry *buf =
137 vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
138
139 if (buf == NULL) break;
140
141 cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
142
143 gf_picture[frame_idx].frame = &buf->img;
144 gf_picture[frame_idx].ref_frame[0] = gld_index;
145 gf_picture[frame_idx].ref_frame[1] = lst_index;
146 gf_picture[frame_idx].ref_frame[2] = alt_index;
147 gf_picture[frame_idx].update_type = LF_UPDATE;
148 lst_index = frame_idx;
149 ++*tpl_group_frames;
150 ++extend_frame_count;
151 ++frame_gop_offset;
152 }
153
154 return extend_frame_count;
155 }
156
init_tpl_stats(VP9_COMP * cpi)157 static void init_tpl_stats(VP9_COMP *cpi) {
158 int frame_idx;
159 for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
160 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
161 memset(tpl_frame->tpl_stats_ptr, 0,
162 tpl_frame->height * tpl_frame->width *
163 sizeof(*tpl_frame->tpl_stats_ptr));
164 tpl_frame->is_valid = 0;
165 }
166 }
167
free_tpl_frame_stats_list(VpxTplGopStats * tpl_gop_stats)168 static void free_tpl_frame_stats_list(VpxTplGopStats *tpl_gop_stats) {
169 int frame_idx;
170 for (frame_idx = 0; frame_idx < tpl_gop_stats->size; ++frame_idx) {
171 vpx_free(tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list);
172 }
173 vpx_free(tpl_gop_stats->frame_stats_list);
174 }
175
init_tpl_stats_before_propagation(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,TplDepFrame * tpl_stats,int tpl_gop_frames,int frame_width,int frame_height)176 static void init_tpl_stats_before_propagation(
177 struct vpx_internal_error_info *error_info, VpxTplGopStats *tpl_gop_stats,
178 TplDepFrame *tpl_stats, int tpl_gop_frames, int frame_width,
179 int frame_height) {
180 int frame_idx;
181 free_tpl_frame_stats_list(tpl_gop_stats);
182 CHECK_MEM_ERROR(
183 error_info, tpl_gop_stats->frame_stats_list,
184 vpx_calloc(tpl_gop_frames, sizeof(*tpl_gop_stats->frame_stats_list)));
185 tpl_gop_stats->size = tpl_gop_frames;
186 for (frame_idx = 0; frame_idx < tpl_gop_frames; ++frame_idx) {
187 const int mi_rows = tpl_stats[frame_idx].height;
188 const int mi_cols = tpl_stats[frame_idx].width;
189 CHECK_MEM_ERROR(
190 error_info, tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list,
191 vpx_calloc(
192 mi_rows * mi_cols,
193 sizeof(
194 *tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list)));
195 tpl_gop_stats->frame_stats_list[frame_idx].num_blocks = mi_rows * mi_cols;
196 tpl_gop_stats->frame_stats_list[frame_idx].frame_width = frame_width;
197 tpl_gop_stats->frame_stats_list[frame_idx].frame_height = frame_height;
198 }
199 }
200
201 #if CONFIG_NON_GREEDY_MV
full_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,int mi_row,int mi_col,MV * mv)202 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
203 MotionField *motion_field,
204 int frame_idx, uint8_t *cur_frame_buf,
205 uint8_t *ref_frame_buf, int stride,
206 BLOCK_SIZE bsize, int mi_row,
207 int mi_col, MV *mv) {
208 MACROBLOCK *const x = &td->mb;
209 MACROBLOCKD *const xd = &x->e_mbd;
210 MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
211 int step_param;
212 uint32_t bestsme = UINT_MAX;
213 const MvLimits tmp_mv_limits = x->mv_limits;
214 // lambda is used to adjust the importance of motion vector consistency.
215 // TODO(angiebird): Figure out lambda's proper value.
216 const int lambda = cpi->tpl_stats[frame_idx].lambda;
217 int_mv nb_full_mvs[NB_MVS_NUM];
218 int nb_full_mv_num;
219
220 MV best_ref_mv1 = { 0, 0 };
221 MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
222
223 best_ref_mv1_full.col = best_ref_mv1.col >> 3;
224 best_ref_mv1_full.row = best_ref_mv1.row >> 3;
225
226 // Setup frame pointers
227 x->plane[0].src.buf = cur_frame_buf;
228 x->plane[0].src.stride = stride;
229 xd->plane[0].pre[0].buf = ref_frame_buf;
230 xd->plane[0].pre[0].stride = stride;
231
232 step_param = mv_sf->reduce_first_step_size;
233 step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
234
235 vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
236
237 nb_full_mv_num =
238 vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
239 vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
240 lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
241
242 /* restore UMV window */
243 x->mv_limits = tmp_mv_limits;
244
245 return bestsme;
246 }
247
sub_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)248 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
249 uint8_t *cur_frame_buf,
250 uint8_t *ref_frame_buf, int stride,
251 BLOCK_SIZE bsize, MV *mv) {
252 MACROBLOCK *const x = &td->mb;
253 MACROBLOCKD *const xd = &x->e_mbd;
254 MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
255 uint32_t bestsme = UINT_MAX;
256 uint32_t distortion;
257 uint32_t sse;
258 int cost_list[5];
259
260 MV best_ref_mv1 = { 0, 0 };
261
262 // Setup frame pointers
263 x->plane[0].src.buf = cur_frame_buf;
264 x->plane[0].src.stride = stride;
265 xd->plane[0].pre[0].buf = ref_frame_buf;
266 xd->plane[0].pre[0].stride = stride;
267
268 // TODO(yunqing): may use higher tap interp filter than 2 taps.
269 // Ignore mv costing by sending NULL pointer instead of cost array
270 bestsme = cpi->find_fractional_mv_step(
271 x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
272 &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
273 cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
274 USE_2_TAPS);
275
276 return bestsme;
277 }
278
279 #else // CONFIG_NON_GREEDY_MV
motion_compensated_prediction(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)280 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
281 uint8_t *cur_frame_buf,
282 uint8_t *ref_frame_buf,
283 int stride, BLOCK_SIZE bsize,
284 MV *mv) {
285 MACROBLOCK *const x = &td->mb;
286 MACROBLOCKD *const xd = &x->e_mbd;
287 MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
288 const SEARCH_METHODS search_method = NSTEP;
289 int step_param;
290 int sadpb = x->sadperbit16;
291 uint32_t bestsme = UINT_MAX;
292 uint32_t distortion;
293 uint32_t sse;
294 int cost_list[5];
295 const MvLimits tmp_mv_limits = x->mv_limits;
296
297 MV best_ref_mv1 = { 0, 0 };
298 MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
299
300 best_ref_mv1_full.col = best_ref_mv1.col >> 3;
301 best_ref_mv1_full.row = best_ref_mv1.row >> 3;
302
303 // Setup frame pointers
304 x->plane[0].src.buf = cur_frame_buf;
305 x->plane[0].src.stride = stride;
306 xd->plane[0].pre[0].buf = ref_frame_buf;
307 xd->plane[0].pre[0].stride = stride;
308
309 step_param = mv_sf->reduce_first_step_size;
310 step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
311
312 vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
313
314 vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
315 search_method, sadpb, cond_cost_list(cpi, cost_list),
316 &best_ref_mv1, mv, 0, 0);
317
318 /* restore UMV window */
319 x->mv_limits = tmp_mv_limits;
320
321 // TODO(yunqing): may use higher tap interp filter than 2 taps.
322 // Ignore mv costing by sending NULL pointer instead of cost array
323 bestsme = cpi->find_fractional_mv_step(
324 x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
325 &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
326 cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
327 USE_2_TAPS);
328
329 return bestsme;
330 }
331 #endif
332
get_overlap_area(int grid_pos_row,int grid_pos_col,int ref_pos_row,int ref_pos_col,int block,BLOCK_SIZE bsize)333 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
334 int ref_pos_col, int block, BLOCK_SIZE bsize) {
335 int width = 0, height = 0;
336 int bw = 4 << b_width_log2_lookup[bsize];
337 int bh = 4 << b_height_log2_lookup[bsize];
338
339 switch (block) {
340 case 0:
341 width = grid_pos_col + bw - ref_pos_col;
342 height = grid_pos_row + bh - ref_pos_row;
343 break;
344 case 1:
345 width = ref_pos_col + bw - grid_pos_col;
346 height = grid_pos_row + bh - ref_pos_row;
347 break;
348 case 2:
349 width = grid_pos_col + bw - ref_pos_col;
350 height = ref_pos_row + bh - grid_pos_row;
351 break;
352 case 3:
353 width = ref_pos_col + bw - grid_pos_col;
354 height = ref_pos_row + bh - grid_pos_row;
355 break;
356 default: assert(0);
357 }
358
359 return width * height;
360 }
361
round_floor(int ref_pos,int bsize_pix)362 static int round_floor(int ref_pos, int bsize_pix) {
363 int round;
364 if (ref_pos < 0)
365 round = -(1 + (-ref_pos - 1) / bsize_pix);
366 else
367 round = ref_pos / bsize_pix;
368
369 return round;
370 }
371
tpl_model_store(TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int stride)372 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
373 BLOCK_SIZE bsize, int stride) {
374 const int mi_height = num_8x8_blocks_high_lookup[bsize];
375 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
376 const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
377 int idx, idy;
378
379 for (idy = 0; idy < mi_height; ++idy) {
380 for (idx = 0; idx < mi_width; ++idx) {
381 TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
382 const int64_t mc_flow = tpl_ptr->mc_flow;
383 const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
384 *tpl_ptr = *src_stats;
385 tpl_ptr->mc_flow = mc_flow;
386 tpl_ptr->mc_ref_cost = mc_ref_cost;
387 tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
388 }
389 }
390 }
391
tpl_store_before_propagation(VpxTplBlockStats * tpl_block_stats,TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int stride,int64_t recon_error,int64_t rate_cost,int ref_frame_idx)392 static void tpl_store_before_propagation(VpxTplBlockStats *tpl_block_stats,
393 TplDepStats *tpl_stats, int mi_row,
394 int mi_col, BLOCK_SIZE bsize,
395 int stride, int64_t recon_error,
396 int64_t rate_cost, int ref_frame_idx) {
397 const int mi_height = num_8x8_blocks_high_lookup[bsize];
398 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
399 const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
400 int idx, idy;
401
402 for (idy = 0; idy < mi_height; ++idy) {
403 for (idx = 0; idx < mi_width; ++idx) {
404 VpxTplBlockStats *tpl_block_stats_ptr =
405 &tpl_block_stats[(mi_row + idy) * stride + mi_col + idx];
406 tpl_block_stats_ptr->row = mi_row * 8;
407 tpl_block_stats_ptr->col = mi_col * 8;
408 tpl_block_stats_ptr->inter_cost = src_stats->inter_cost;
409 tpl_block_stats_ptr->intra_cost = src_stats->intra_cost;
410 tpl_block_stats_ptr->recrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2;
411 tpl_block_stats_ptr->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
412 tpl_block_stats_ptr->mv_r = src_stats->mv.as_mv.row;
413 tpl_block_stats_ptr->mv_c = src_stats->mv.as_mv.col;
414 tpl_block_stats_ptr->ref_frame_index = ref_frame_idx;
415 }
416 }
417 }
418
tpl_model_update_b(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)419 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
420 int mi_row, int mi_col, const BLOCK_SIZE bsize) {
421 TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
422 TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
423 MV mv = tpl_stats->mv.as_mv;
424 int mv_row = mv.row >> 3;
425 int mv_col = mv.col >> 3;
426
427 int ref_pos_row = mi_row * MI_SIZE + mv_row;
428 int ref_pos_col = mi_col * MI_SIZE + mv_col;
429
430 const int bw = 4 << b_width_log2_lookup[bsize];
431 const int bh = 4 << b_height_log2_lookup[bsize];
432 const int mi_height = num_8x8_blocks_high_lookup[bsize];
433 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
434 const int pix_num = bw * bh;
435
436 // top-left on grid block location in pixel
437 int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
438 int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
439 int block;
440
441 for (block = 0; block < 4; ++block) {
442 int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
443 int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
444
445 if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
446 grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
447 int overlap_area = get_overlap_area(
448 grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
449 int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
450 int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
451
452 int64_t mc_flow = tpl_stats->mc_dep_cost -
453 (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
454 tpl_stats->intra_cost;
455
456 int idx, idy;
457
458 for (idy = 0; idy < mi_height; ++idy) {
459 for (idx = 0; idx < mi_width; ++idx) {
460 TplDepStats *des_stats =
461 &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
462 (ref_mi_col + idx)];
463
464 des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
465 des_stats->mc_ref_cost +=
466 ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
467 pix_num;
468 assert(overlap_area >= 0);
469 }
470 }
471 }
472 }
473 }
474
tpl_model_update(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)475 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
476 int mi_row, int mi_col, const BLOCK_SIZE bsize) {
477 int idx, idy;
478 const int mi_height = num_8x8_blocks_high_lookup[bsize];
479 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
480
481 for (idy = 0; idy < mi_height; ++idy) {
482 for (idx = 0; idx < mi_width; ++idx) {
483 TplDepStats *tpl_ptr =
484 &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
485 tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
486 BLOCK_8X8);
487 }
488 }
489 }
490
get_quantize_error(MACROBLOCK * x,int plane,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,TX_SIZE tx_size,int64_t * recon_error,int64_t * sse,uint16_t * eob)491 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
492 tran_low_t *qcoeff, tran_low_t *dqcoeff,
493 TX_SIZE tx_size, int64_t *recon_error,
494 int64_t *sse, uint16_t *eob) {
495 MACROBLOCKD *const xd = &x->e_mbd;
496 const struct macroblock_plane *const p = &x->plane[plane];
497 const struct macroblockd_plane *const pd = &xd->plane[plane];
498 const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
499 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
500 const int shift = tx_size == TX_32X32 ? 0 : 2;
501
502 // skip block condition should be handled before this is called.
503 assert(!x->skip_block);
504
505 #if CONFIG_VP9_HIGHBITDEPTH
506 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
507 vp9_highbd_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff,
508 pd->dequant, eob, scan_order);
509 } else {
510 vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
511 scan_order);
512 }
513 #else
514 vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
515 scan_order);
516 #endif // CONFIG_VP9_HIGHBITDEPTH
517
518 *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
519 *recon_error = VPXMAX(*recon_error, 1);
520
521 *sse = (*sse) >> shift;
522 *sse = VPXMAX(*sse, 1);
523 }
524
525 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)526 void vp9_highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
527 TX_SIZE tx_size) {
528 // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
529 switch (tx_size) {
530 case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
531 case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
532 case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
533 default: assert(0);
534 }
535 }
536 #endif // CONFIG_VP9_HIGHBITDEPTH
537
vp9_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)538 void vp9_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
539 TX_SIZE tx_size) {
540 switch (tx_size) {
541 case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
542 case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
543 case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
544 default: assert(0);
545 }
546 }
547
set_mv_limits(const VP9_COMMON * cm,MACROBLOCK * x,int mi_row,int mi_col)548 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
549 int mi_col) {
550 x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
551 x->mv_limits.row_max =
552 (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
553 x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
554 x->mv_limits.col_max =
555 ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
556 }
557
rate_estimator(const tran_low_t * qcoeff,int eob,TX_SIZE tx_size)558 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
559 const ScanOrder *const scan_order = &vp9_scan_orders[tx_size][DCT_DCT];
560 int rate_cost = 1;
561 int idx;
562 assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
563 for (idx = 0; idx < eob; ++idx) {
564 unsigned int abs_level = abs(qcoeff[scan_order->scan[idx]]);
565 rate_cost += get_msb(abs_level + 1) + 1 + (abs_level > 0);
566 }
567
568 return (rate_cost << VP9_PROB_COST_SHIFT);
569 }
570
mode_estimation(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,struct scale_factors * sf,GF_PICTURE * gf_picture,int frame_idx,TplDepFrame * tpl_frame,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,int mi_row,int mi_col,BLOCK_SIZE bsize,TX_SIZE tx_size,YV12_BUFFER_CONFIG * ref_frame[],uint8_t * predictor,int64_t * recon_error,int64_t * rate_cost,int64_t * sse,int * ref_frame_idx)571 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
572 struct scale_factors *sf, GF_PICTURE *gf_picture,
573 int frame_idx, TplDepFrame *tpl_frame,
574 int16_t *src_diff, tran_low_t *coeff,
575 tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
576 int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
577 YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
578 int64_t *recon_error, int64_t *rate_cost,
579 int64_t *sse, int *ref_frame_idx) {
580 VP9_COMMON *cm = &cpi->common;
581 ThreadData *td = &cpi->td;
582
583 const int bw = 4 << b_width_log2_lookup[bsize];
584 const int bh = 4 << b_height_log2_lookup[bsize];
585 const int pix_num = bw * bh;
586 int best_rf_idx = -1;
587 int_mv best_mv;
588 int64_t best_inter_cost = INT64_MAX;
589 int64_t inter_cost;
590 int rf_idx;
591 const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
592
593 int64_t best_intra_cost = INT64_MAX;
594 int64_t intra_cost;
595 PREDICTION_MODE mode;
596 int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
597 MODE_INFO mi_above, mi_left;
598 const int mi_height = num_8x8_blocks_high_lookup[bsize];
599 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
600 TplDepStats *tpl_stats =
601 &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
602
603 xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
604 xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
605 xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
606 xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
607 xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
608 xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
609
610 // Intra prediction search
611 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
612 uint8_t *src, *dst;
613 int src_stride, dst_stride;
614
615 src = xd->cur_buf->y_buffer + mb_y_offset;
616 src_stride = xd->cur_buf->y_stride;
617
618 dst = &predictor[0];
619 dst_stride = bw;
620
621 xd->mi[0]->sb_type = bsize;
622 xd->mi[0]->ref_frame[0] = INTRA_FRAME;
623
624 vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
625 src_stride, dst, dst_stride, 0, 0, 0);
626
627 #if CONFIG_VP9_HIGHBITDEPTH
628 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
629 vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
630 dst_stride, xd->bd);
631 vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
632 intra_cost = vpx_highbd_satd(coeff, pix_num);
633 } else {
634 vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
635 dst_stride);
636 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
637 intra_cost = vpx_satd(coeff, pix_num);
638 }
639 #else
640 vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
641 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
642 intra_cost = vpx_satd(coeff, pix_num);
643 #endif // CONFIG_VP9_HIGHBITDEPTH
644
645 if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
646 }
647
648 // Motion compensated prediction
649 best_mv.as_int = 0;
650
651 set_mv_limits(cm, x, mi_row, mi_col);
652
653 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
654 int_mv mv;
655 #if CONFIG_NON_GREEDY_MV
656 MotionField *motion_field;
657 #endif
658 if (ref_frame[rf_idx] == NULL) continue;
659
660 #if CONFIG_NON_GREEDY_MV
661 (void)td;
662 motion_field = vp9_motion_field_info_get_motion_field(
663 &cpi->motion_field_info, frame_idx, rf_idx, bsize);
664 mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
665 #else
666 motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
667 ref_frame[rf_idx]->y_buffer + mb_y_offset,
668 xd->cur_buf->y_stride, bsize, &mv.as_mv);
669 #endif
670
671 #if CONFIG_VP9_HIGHBITDEPTH
672 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
673 vp9_highbd_build_inter_predictor(
674 CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
675 ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
676 &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
677 mi_row * MI_SIZE, xd->bd);
678 vpx_highbd_subtract_block(
679 bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
680 xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
681 vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
682 inter_cost = vpx_highbd_satd(coeff, pix_num);
683 } else {
684 vp9_build_inter_predictor(
685 ref_frame[rf_idx]->y_buffer + mb_y_offset,
686 ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
687 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
688 vpx_subtract_block(bh, bw, src_diff, bw,
689 xd->cur_buf->y_buffer + mb_y_offset,
690 xd->cur_buf->y_stride, &predictor[0], bw);
691 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
692 inter_cost = vpx_satd(coeff, pix_num);
693 }
694 #else
695 vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
696 ref_frame[rf_idx]->y_stride, &predictor[0], bw,
697 &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
698 mi_col * MI_SIZE, mi_row * MI_SIZE);
699 vpx_subtract_block(bh, bw, src_diff, bw,
700 xd->cur_buf->y_buffer + mb_y_offset,
701 xd->cur_buf->y_stride, &predictor[0], bw);
702 vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
703 inter_cost = vpx_satd(coeff, pix_num);
704 #endif
705
706 if (inter_cost < best_inter_cost) {
707 uint16_t eob = 0;
708 best_rf_idx = rf_idx;
709 best_inter_cost = inter_cost;
710 best_mv.as_int = mv.as_int;
711 // Since best_inter_cost is initialized as INT64_MAX, recon_error and
712 // rate_cost will be calculated with the best reference frame.
713 get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
714 sse, &eob);
715 *rate_cost = rate_estimator(qcoeff, eob, tx_size);
716 }
717 }
718 best_intra_cost = VPXMAX(best_intra_cost, 1);
719 best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
720 tpl_stats->inter_cost = VPXMAX(
721 1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
722 tpl_stats->intra_cost = VPXMAX(
723 1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
724 tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
725 tpl_stats->mv.as_int = best_mv.as_int;
726 *ref_frame_idx = best_rf_idx;
727 }
728
729 #if CONFIG_NON_GREEDY_MV
get_block_src_pred_buf(MACROBLOCKD * xd,GF_PICTURE * gf_picture,int frame_idx,int rf_idx,int mi_row,int mi_col,struct buf_2d * src,struct buf_2d * pre)730 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
731 int frame_idx, int rf_idx, int mi_row,
732 int mi_col, struct buf_2d *src,
733 struct buf_2d *pre) {
734 const int mb_y_offset =
735 mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
736 YV12_BUFFER_CONFIG *ref_frame = NULL;
737 int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
738 if (ref_frame_idx != -1) {
739 ref_frame = gf_picture[ref_frame_idx].frame;
740 src->buf = xd->cur_buf->y_buffer + mb_y_offset;
741 src->stride = xd->cur_buf->y_stride;
742 pre->buf = ref_frame->y_buffer + mb_y_offset;
743 pre->stride = ref_frame->y_stride;
744 assert(src->stride == pre->stride);
745 return 1;
746 } else {
747 printf("invalid ref_frame_idx");
748 assert(ref_frame_idx != -1);
749 return 0;
750 }
751 }
752
753 #define kMvPreCheckLines 5
754 #define kMvPreCheckSize 15
755
756 #define MV_REF_POS_NUM 3
757 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
758 { -1, 0 },
759 { 0, -1 },
760 { -1, -1 },
761 };
762
get_select_mv(VP9_COMP * cpi,TplDepFrame * tpl_frame,int mi_row,int mi_col)763 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
764 int mi_col) {
765 return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
766 }
767
find_ref_mv(int mv_mode,VP9_COMP * cpi,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)768 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
769 BLOCK_SIZE bsize, int mi_row, int mi_col) {
770 int i;
771 const int mi_height = num_8x8_blocks_high_lookup[bsize];
772 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
773 int_mv nearest_mv, near_mv, invalid_mv;
774 nearest_mv.as_int = INVALID_MV;
775 near_mv.as_int = INVALID_MV;
776 invalid_mv.as_int = INVALID_MV;
777 for (i = 0; i < MV_REF_POS_NUM; ++i) {
778 int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
779 int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
780 assert(mv_ref_pos[i].row <= 0);
781 assert(mv_ref_pos[i].col <= 0);
782 if (nb_row >= 0 && nb_col >= 0) {
783 if (nearest_mv.as_int == INVALID_MV) {
784 nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
785 } else {
786 int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
787 if (mv.as_int == nearest_mv.as_int) {
788 continue;
789 } else {
790 near_mv = mv;
791 break;
792 }
793 }
794 }
795 }
796 if (nearest_mv.as_int == INVALID_MV) {
797 nearest_mv.as_mv.row = 0;
798 nearest_mv.as_mv.col = 0;
799 }
800 if (near_mv.as_int == INVALID_MV) {
801 near_mv.as_mv.row = 0;
802 near_mv.as_mv.col = 0;
803 }
804 if (mv_mode == NEAREST_MV_MODE) {
805 return nearest_mv;
806 }
807 if (mv_mode == NEAR_MV_MODE) {
808 return near_mv;
809 }
810 assert(0);
811 return invalid_mv;
812 }
813
get_mv_from_mv_mode(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)814 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
815 MotionField *motion_field,
816 TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
817 int mi_row, int mi_col) {
818 int_mv mv;
819 switch (mv_mode) {
820 case ZERO_MV_MODE:
821 mv.as_mv.row = 0;
822 mv.as_mv.col = 0;
823 break;
824 case NEW_MV_MODE:
825 mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
826 break;
827 case NEAREST_MV_MODE:
828 mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
829 break;
830 case NEAR_MV_MODE:
831 mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
832 break;
833 default:
834 mv.as_int = INVALID_MV;
835 assert(0);
836 break;
837 }
838 return mv;
839 }
840
get_mv_dist(int mv_mode,VP9_COMP * cpi,MACROBLOCKD * xd,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)841 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
842 GF_PICTURE *gf_picture, MotionField *motion_field,
843 int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
844 BLOCK_SIZE bsize, int mi_row, int mi_col,
845 int_mv *mv) {
846 uint32_t sse;
847 struct buf_2d src;
848 struct buf_2d pre;
849 MV full_mv;
850 *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
851 mi_row, mi_col);
852 full_mv = get_full_mv(&mv->as_mv);
853 if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
854 &src, &pre)) {
855 // TODO(angiebird): Consider subpixel when computing the sse.
856 cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
857 pre.stride, &sse);
858 return (double)(sse << VP9_DIST_SCALE_LOG2);
859 } else {
860 assert(0);
861 return 0;
862 }
863 }
864
get_mv_mode_cost(int mv_mode)865 static int get_mv_mode_cost(int mv_mode) {
866 // TODO(angiebird): The probabilities are roughly inferred from
867 // default_inter_mode_probs. Check if there is a better way to set the
868 // probabilities.
869 const int zero_mv_prob = 16;
870 const int new_mv_prob = 24 * 1;
871 const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
872 assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
873 switch (mv_mode) {
874 case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
875 case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
876 case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
877 case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
878 default: assert(0); return -1;
879 }
880 }
881
get_mv_diff_cost(MV * new_mv,MV * ref_mv)882 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
883 double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
884 log2(1 + abs(new_mv->col - ref_mv->col));
885 mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
886 return mv_diff_cost;
887 }
get_mv_cost(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)888 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
889 TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
890 int mi_col) {
891 double mv_cost = get_mv_mode_cost(mv_mode);
892 if (mv_mode == NEW_MV_MODE) {
893 MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
894 bsize, mi_row, mi_col)
895 .as_mv;
896 MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
897 tpl_frame, bsize, mi_row, mi_col)
898 .as_mv;
899 MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
900 bsize, mi_row, mi_col)
901 .as_mv;
902 double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
903 double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
904 mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
905 }
906 return mv_cost;
907 }
908
eval_mv_mode(int mv_mode,VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)909 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
910 GF_PICTURE *gf_picture, MotionField *motion_field,
911 int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
912 BLOCK_SIZE bsize, int mi_row, int mi_col,
913 int_mv *mv) {
914 MACROBLOCKD *xd = &x->e_mbd;
915 double mv_dist =
916 get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
917 tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
918 double mv_cost =
919 get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
920 double mult = 180;
921
922 return mv_cost + mult * log2f(1 + mv_dist);
923 }
924
find_best_ref_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,double * rd,int_mv * mv)925 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
926 GF_PICTURE *gf_picture,
927 MotionField *motion_field, int frame_idx,
928 TplDepFrame *tpl_frame, int rf_idx,
929 BLOCK_SIZE bsize, int mi_row, int mi_col,
930 double *rd, int_mv *mv) {
931 int best_mv_mode = ZERO_MV_MODE;
932 int update = 0;
933 int mv_mode;
934 *rd = 0;
935 for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
936 double this_rd;
937 int_mv this_mv;
938 if (mv_mode == NEW_MV_MODE) {
939 continue;
940 }
941 this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
942 tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
943 if (update == 0) {
944 *rd = this_rd;
945 *mv = this_mv;
946 best_mv_mode = mv_mode;
947 update = 1;
948 } else {
949 if (this_rd < *rd) {
950 *rd = this_rd;
951 *mv = this_mv;
952 best_mv_mode = mv_mode;
953 }
954 }
955 }
956 return best_mv_mode;
957 }
958
predict_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col)959 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
960 GF_PICTURE *gf_picture, MotionField *motion_field,
961 int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
962 BLOCK_SIZE bsize, int mi_row, int mi_col) {
963 const int mi_height = num_8x8_blocks_high_lookup[bsize];
964 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
965 int tmp_mv_mode_arr[kMvPreCheckSize];
966 int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
967 double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
968 int_mv *select_mv_arr = cpi->select_mv_arr;
969 int_mv tmp_select_mv_arr[kMvPreCheckSize];
970 int stride = tpl_frame->stride;
971 double new_mv_rd = 0;
972 double no_new_mv_rd = 0;
973 double this_new_mv_rd = 0;
974 double this_no_new_mv_rd = 0;
975 int idx;
976 int tmp_idx;
977 assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
978
979 // no new mv
980 // diagonal scan order
981 tmp_idx = 0;
982 for (idx = 0; idx < kMvPreCheckLines; ++idx) {
983 int r;
984 for (r = 0; r <= idx; ++r) {
985 int c = idx - r;
986 int nb_row = mi_row + r * mi_height;
987 int nb_col = mi_col + c * mi_width;
988 if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
989 double this_rd;
990 int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
991 mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
992 cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
993 bsize, nb_row, nb_col, &this_rd, mv);
994 if (r == 0 && c == 0) {
995 this_no_new_mv_rd = this_rd;
996 }
997 no_new_mv_rd += this_rd;
998 tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
999 tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
1000 ++tmp_idx;
1001 }
1002 }
1003 }
1004
1005 // new mv
1006 mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
1007 this_new_mv_rd = eval_mv_mode(
1008 NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1009 rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
1010 new_mv_rd = this_new_mv_rd;
1011 // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
1012 // beforehand.
1013 for (idx = 1; idx < kMvPreCheckLines; ++idx) {
1014 int r;
1015 for (r = 0; r <= idx; ++r) {
1016 int c = idx - r;
1017 int nb_row = mi_row + r * mi_height;
1018 int nb_col = mi_col + c * mi_width;
1019 if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1020 double this_rd;
1021 int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
1022 mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
1023 cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
1024 bsize, nb_row, nb_col, &this_rd, mv);
1025 new_mv_rd += this_rd;
1026 }
1027 }
1028 }
1029
1030 // update best_mv_mode
1031 tmp_idx = 0;
1032 if (no_new_mv_rd < new_mv_rd) {
1033 for (idx = 0; idx < kMvPreCheckLines; ++idx) {
1034 int r;
1035 for (r = 0; r <= idx; ++r) {
1036 int c = idx - r;
1037 int nb_row = mi_row + r * mi_height;
1038 int nb_col = mi_col + c * mi_width;
1039 if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1040 mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
1041 select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
1042 ++tmp_idx;
1043 }
1044 }
1045 }
1046 rd_diff_arr[mi_row * stride + mi_col] = 0;
1047 } else {
1048 rd_diff_arr[mi_row * stride + mi_col] =
1049 (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
1050 }
1051 }
1052
predict_mv_mode_arr(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize)1053 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
1054 GF_PICTURE *gf_picture,
1055 MotionField *motion_field, int frame_idx,
1056 TplDepFrame *tpl_frame, int rf_idx,
1057 BLOCK_SIZE bsize) {
1058 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1059 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1060 const int unit_rows = tpl_frame->mi_rows / mi_height;
1061 const int unit_cols = tpl_frame->mi_cols / mi_width;
1062 const int max_diagonal_lines = unit_rows + unit_cols - 1;
1063 int idx;
1064 for (idx = 0; idx < max_diagonal_lines; ++idx) {
1065 int r;
1066 for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
1067 ++r) {
1068 int c = idx - r;
1069 int mi_row = r * mi_height;
1070 int mi_col = c * mi_width;
1071 assert(c >= 0 && c < unit_cols);
1072 assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
1073 assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
1074 predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1075 rf_idx, bsize, mi_row, mi_col);
1076 }
1077 }
1078 }
1079
do_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,YV12_BUFFER_CONFIG * ref_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1080 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
1081 MotionField *motion_field, int frame_idx,
1082 YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
1083 int mi_row, int mi_col) {
1084 VP9_COMMON *cm = &cpi->common;
1085 MACROBLOCK *x = &td->mb;
1086 MACROBLOCKD *xd = &x->e_mbd;
1087 const int mb_y_offset =
1088 mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
1089 assert(ref_frame != NULL);
1090 set_mv_limits(cm, x, mi_row, mi_col);
1091 {
1092 int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
1093 uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
1094 uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
1095 const int stride = xd->cur_buf->y_stride;
1096 full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
1097 ref_frame_buf, stride, bsize, mi_row, mi_col,
1098 &mv.as_mv);
1099 sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
1100 bsize, &mv.as_mv);
1101 vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
1102 }
1103 }
1104
build_motion_field(VP9_COMP * cpi,int frame_idx,YV12_BUFFER_CONFIG * ref_frame[MAX_INTER_REF_FRAMES],BLOCK_SIZE bsize)1105 static void build_motion_field(
1106 VP9_COMP *cpi, int frame_idx,
1107 YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
1108 VP9_COMMON *cm = &cpi->common;
1109 ThreadData *td = &cpi->td;
1110 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1111 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1112 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1113 const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
1114 const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
1115 int mi_row, mi_col;
1116 int rf_idx;
1117
1118 tpl_frame->lambda = (pw * ph) >> 2;
1119 assert(pw * ph == tpl_frame->lambda << 2);
1120
1121 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1122 MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1123 &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1124 if (ref_frame[rf_idx] == NULL) {
1125 continue;
1126 }
1127 vp9_motion_field_reset_mvs(motion_field);
1128 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1129 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1130 do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
1131 bsize, mi_row, mi_col);
1132 }
1133 }
1134 }
1135 }
1136 #endif // CONFIG_NON_GREEDY_MV
1137
mc_flow_dispenser(VP9_COMP * cpi,GF_PICTURE * gf_picture,int frame_idx,BLOCK_SIZE bsize)1138 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
1139 int frame_idx, BLOCK_SIZE bsize) {
1140 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1141 VpxTplFrameStats *tpl_frame_stats_before_propagation =
1142 &cpi->tpl_gop_stats.frame_stats_list[frame_idx];
1143 YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
1144 YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
1145
1146 VP9_COMMON *cm = &cpi->common;
1147 struct scale_factors sf;
1148 int rdmult, idx;
1149 ThreadData *td = &cpi->td;
1150 MACROBLOCK *x = &td->mb;
1151 MACROBLOCKD *xd = &x->e_mbd;
1152 int mi_row, mi_col;
1153
1154 #if CONFIG_VP9_HIGHBITDEPTH
1155 DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
1156 DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
1157 uint8_t *predictor;
1158 #else
1159 DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
1160 #endif
1161 DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
1162 DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
1163 DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
1164 DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
1165
1166 const TX_SIZE tx_size = max_txsize_lookup[bsize];
1167 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1168 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1169
1170 tpl_frame_stats_before_propagation->frame_width = cm->width;
1171 tpl_frame_stats_before_propagation->frame_height = cm->height;
1172 // Setup scaling factor
1173 #if CONFIG_VP9_HIGHBITDEPTH
1174 vp9_setup_scale_factors_for_frame(
1175 &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1176 this_frame->y_crop_width, this_frame->y_crop_height,
1177 cpi->common.use_highbitdepth);
1178
1179 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1180 predictor = CONVERT_TO_BYTEPTR(predictor16);
1181 else
1182 predictor = predictor8;
1183 #else
1184 vp9_setup_scale_factors_for_frame(
1185 &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1186 this_frame->y_crop_width, this_frame->y_crop_height);
1187 #endif // CONFIG_VP9_HIGHBITDEPTH
1188
1189 // Prepare reference frame pointers. If any reference frame slot is
1190 // unavailable, the pointer will be set to Null.
1191 for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
1192 int rf_idx = gf_picture[frame_idx].ref_frame[idx];
1193 if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
1194 }
1195
1196 xd->mi = cm->mi_grid_visible;
1197 xd->mi[0] = cm->mi;
1198 xd->cur_buf = this_frame;
1199
1200 // Get rd multiplier set up.
1201 rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
1202 set_error_per_bit(&cpi->td.mb, rdmult);
1203 vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
1204
1205 tpl_frame->is_valid = 1;
1206
1207 cm->base_qindex = tpl_frame->base_qindex;
1208 vp9_frame_init_quantizer(cpi);
1209
1210 #if CONFIG_NON_GREEDY_MV
1211 {
1212 int square_block_idx;
1213 int rf_idx;
1214 for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
1215 ++square_block_idx) {
1216 BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
1217 build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
1218 }
1219 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1220 int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1221 if (ref_frame_idx != -1) {
1222 MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1223 &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1224 predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
1225 tpl_frame, rf_idx, bsize);
1226 }
1227 }
1228 }
1229 #endif // CONFIG_NON_GREEDY_MV
1230
1231 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1232 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1233 int64_t recon_error = 0;
1234 int64_t rate_cost = 0;
1235 int64_t sse = 0;
1236 // Ref frame index in the ref frame buffer.
1237 int ref_frame_idx = -1;
1238 mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
1239 src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
1240 tx_size, ref_frame, predictor, &recon_error, &rate_cost,
1241 &sse, &ref_frame_idx);
1242 // Motion flow dependency dispenser.
1243 tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
1244 tpl_frame->stride);
1245
1246 tpl_store_before_propagation(
1247 tpl_frame_stats_before_propagation->block_stats_list,
1248 tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, tpl_frame->stride,
1249 recon_error, rate_cost, ref_frame_idx);
1250
1251 tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
1252 bsize);
1253 }
1254 }
1255 }
1256
trim_tpl_stats(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,int extra_frames)1257 static void trim_tpl_stats(struct vpx_internal_error_info *error_info,
1258 VpxTplGopStats *tpl_gop_stats, int extra_frames) {
1259 int i;
1260 VpxTplFrameStats *new_frame_stats;
1261 const int new_size = tpl_gop_stats->size - extra_frames;
1262 if (tpl_gop_stats->size <= extra_frames)
1263 vpx_internal_error(
1264 error_info, VPX_CODEC_ERROR,
1265 "The number of frames in VpxTplGopStats is fewer than expected.");
1266 CHECK_MEM_ERROR(error_info, new_frame_stats,
1267 vpx_calloc(new_size, sizeof(*new_frame_stats)));
1268 for (i = 0; i < new_size; i++) {
1269 VpxTplFrameStats *frame_stats = &tpl_gop_stats->frame_stats_list[i];
1270 const int num_blocks = frame_stats->num_blocks;
1271 new_frame_stats[i].num_blocks = frame_stats->num_blocks;
1272 new_frame_stats[i].frame_width = frame_stats->frame_width;
1273 new_frame_stats[i].frame_height = frame_stats->frame_height;
1274 new_frame_stats[i].num_blocks = num_blocks;
1275 CHECK_MEM_ERROR(
1276 error_info, new_frame_stats[i].block_stats_list,
1277 vpx_calloc(num_blocks, sizeof(*new_frame_stats[i].block_stats_list)));
1278 memcpy(new_frame_stats[i].block_stats_list, frame_stats->block_stats_list,
1279 num_blocks * sizeof(*new_frame_stats[i].block_stats_list));
1280 }
1281 free_tpl_frame_stats_list(tpl_gop_stats);
1282 tpl_gop_stats->size = new_size;
1283 tpl_gop_stats->frame_stats_list = new_frame_stats;
1284 }
1285
1286 #if CONFIG_NON_GREEDY_MV
1287 #define DUMP_TPL_STATS 0
1288 #if DUMP_TPL_STATS
dump_buf(uint8_t * buf,int stride,int row,int col,int h,int w)1289 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
1290 int i, j;
1291 printf("%d %d\n", h, w);
1292 for (i = 0; i < h; ++i) {
1293 for (j = 0; j < w; ++j) {
1294 printf("%d ", buf[(row + i) * stride + col + j]);
1295 }
1296 }
1297 printf("\n");
1298 }
1299
dump_frame_buf(const YV12_BUFFER_CONFIG * frame_buf)1300 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
1301 dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
1302 frame_buf->y_width);
1303 dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
1304 frame_buf->uv_height, frame_buf->uv_width);
1305 dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
1306 frame_buf->uv_height, frame_buf->uv_width);
1307 }
1308
dump_tpl_stats(const VP9_COMP * cpi,int tpl_group_frames,const GF_GROUP * gf_group,const GF_PICTURE * gf_picture,BLOCK_SIZE bsize)1309 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
1310 const GF_GROUP *gf_group,
1311 const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
1312 int frame_idx;
1313 const VP9_COMMON *cm = &cpi->common;
1314 int rf_idx;
1315 for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
1316 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1317 const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1318 int mi_row, mi_col;
1319 int ref_frame_idx;
1320 const int mi_height = num_8x8_blocks_high_lookup[bsize];
1321 const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1322 ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1323 if (ref_frame_idx != -1) {
1324 YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
1325 const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
1326 const int ref_gf_frame_offset =
1327 gf_group->frame_gop_index[ref_frame_idx];
1328 printf("=\n");
1329 printf(
1330 "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
1331 "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
1332 frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
1333 ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
1334 for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1335 for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1336 if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1337 int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
1338 frame_idx, rf_idx, bsize,
1339 mi_row, mi_col);
1340 printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
1341 mv.as_mv.col);
1342 }
1343 }
1344 }
1345 for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1346 for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1347 if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1348 const TplDepStats *tpl_ptr =
1349 &tpl_frame
1350 ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
1351 printf("%f ", tpl_ptr->feature_score);
1352 }
1353 }
1354 }
1355 printf("\n");
1356
1357 for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1358 for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1359 const int mv_mode =
1360 tpl_frame
1361 ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
1362 printf("%d ", mv_mode);
1363 }
1364 }
1365 printf("\n");
1366
1367 dump_frame_buf(gf_picture[frame_idx].frame);
1368 dump_frame_buf(ref_frame_buf);
1369 }
1370 }
1371 }
1372 }
1373 #endif // DUMP_TPL_STATS
1374 #endif // CONFIG_NON_GREEDY_MV
1375
vp9_init_tpl_buffer(VP9_COMP * cpi)1376 void vp9_init_tpl_buffer(VP9_COMP *cpi) {
1377 VP9_COMMON *cm = &cpi->common;
1378 int frame;
1379
1380 const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
1381 const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
1382 #if CONFIG_NON_GREEDY_MV
1383 int rf_idx;
1384
1385 vpx_free(cpi->select_mv_arr);
1386 CHECK_MEM_ERROR(
1387 &cm->error, cpi->select_mv_arr,
1388 vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
1389 #endif
1390
1391 // TODO(jingning): Reduce the actual memory use for tpl model build up.
1392 for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1393 if (cpi->tpl_stats[frame].width >= mi_cols &&
1394 cpi->tpl_stats[frame].height >= mi_rows &&
1395 cpi->tpl_stats[frame].tpl_stats_ptr)
1396 continue;
1397
1398 #if CONFIG_NON_GREEDY_MV
1399 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1400 vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1401 CHECK_MEM_ERROR(
1402 &cm->error, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
1403 vpx_calloc(mi_rows * mi_cols * 4,
1404 sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
1405 vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1406 CHECK_MEM_ERROR(
1407 &cm->error, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
1408 vpx_calloc(mi_rows * mi_cols * 4,
1409 sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
1410 }
1411 #endif
1412 vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1413 CHECK_MEM_ERROR(&cm->error, cpi->tpl_stats[frame].tpl_stats_ptr,
1414 vpx_calloc(mi_rows * mi_cols,
1415 sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
1416 cpi->tpl_stats[frame].is_valid = 0;
1417 cpi->tpl_stats[frame].width = mi_cols;
1418 cpi->tpl_stats[frame].height = mi_rows;
1419 cpi->tpl_stats[frame].stride = mi_cols;
1420 cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
1421 cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
1422 }
1423
1424 for (frame = 0; frame < REF_FRAMES; ++frame) {
1425 cpi->enc_frame_buf[frame].mem_valid = 0;
1426 cpi->enc_frame_buf[frame].released = 1;
1427 }
1428 }
1429
vp9_free_tpl_buffer(VP9_COMP * cpi)1430 void vp9_free_tpl_buffer(VP9_COMP *cpi) {
1431 int frame;
1432 #if CONFIG_NON_GREEDY_MV
1433 vp9_free_motion_field_info(&cpi->motion_field_info);
1434 vpx_free(cpi->select_mv_arr);
1435 #endif
1436 for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1437 #if CONFIG_NON_GREEDY_MV
1438 int rf_idx;
1439 for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1440 vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1441 vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1442 }
1443 #endif
1444 vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1445 cpi->tpl_stats[frame].is_valid = 0;
1446 }
1447 free_tpl_frame_stats_list(&cpi->tpl_gop_stats);
1448 }
1449
1450 #if CONFIG_RATE_CTRL
accumulate_frame_tpl_stats(VP9_COMP * cpi)1451 static void accumulate_frame_tpl_stats(VP9_COMP *cpi) {
1452 VP9_COMMON *const cm = &cpi->common;
1453 const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1454 int show_frame_count = 0;
1455 int frame_idx;
1456 // Accumulate tpl stats for each frame in the current group of picture.
1457 for (frame_idx = 1; frame_idx < gf_group->gf_group_size; ++frame_idx) {
1458 TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1459 TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
1460 const int tpl_stride = tpl_frame->stride;
1461 int64_t intra_cost_base = 0;
1462 int64_t inter_cost_base = 0;
1463 int64_t mc_dep_cost_base = 0;
1464 int64_t mc_ref_cost_base = 0;
1465 int64_t mc_flow_base = 0;
1466 int row, col;
1467
1468 if (!tpl_frame->is_valid) continue;
1469
1470 for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
1471 for (col = 0; col < cm->mi_cols; ++col) {
1472 TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
1473 intra_cost_base += this_stats->intra_cost;
1474 inter_cost_base += this_stats->inter_cost;
1475 mc_dep_cost_base += this_stats->mc_dep_cost;
1476 mc_ref_cost_base += this_stats->mc_ref_cost;
1477 mc_flow_base += this_stats->mc_flow;
1478 }
1479 }
1480
1481 cpi->tpl_stats_info[show_frame_count].intra_cost = intra_cost_base;
1482 cpi->tpl_stats_info[show_frame_count].inter_cost = inter_cost_base;
1483 cpi->tpl_stats_info[show_frame_count].mc_dep_cost = mc_dep_cost_base;
1484 cpi->tpl_stats_info[show_frame_count].mc_ref_cost = mc_ref_cost_base;
1485 cpi->tpl_stats_info[show_frame_count].mc_flow = mc_flow_base;
1486
1487 ++show_frame_count;
1488 }
1489 }
1490 #endif // CONFIG_RATE_CTRL
1491
vp9_setup_tpl_stats(VP9_COMP * cpi)1492 void vp9_setup_tpl_stats(VP9_COMP *cpi) {
1493 GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
1494 const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1495 int tpl_group_frames = 0;
1496 int frame_idx;
1497 int extended_frame_count;
1498 cpi->tpl_bsize = BLOCK_32X32;
1499
1500 extended_frame_count =
1501 init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
1502
1503 init_tpl_stats(cpi);
1504
1505 init_tpl_stats_before_propagation(&cpi->common.error, &cpi->tpl_gop_stats,
1506 cpi->tpl_stats, tpl_group_frames,
1507 cpi->common.width, cpi->common.height);
1508
1509 // Backward propagation from tpl_group_frames to 1.
1510 for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
1511 if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
1512 mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
1513 }
1514
1515 // TPL stats has extra frames from next GOP. Trim those extra frames for
1516 // Qmode.
1517 trim_tpl_stats(&cpi->common.error, &cpi->tpl_gop_stats, extended_frame_count);
1518
1519 if (cpi->ext_ratectrl.ready &&
1520 cpi->ext_ratectrl.funcs.send_tpl_gop_stats != NULL) {
1521 const vpx_codec_err_t codec_status =
1522 vp9_extrc_send_tpl_stats(&cpi->ext_ratectrl, &cpi->tpl_gop_stats);
1523 if (codec_status != VPX_CODEC_OK) {
1524 vpx_internal_error(&cpi->common.error, codec_status,
1525 "vp9_extrc_send_tpl_stats() failed");
1526 }
1527 }
1528
1529 #if CONFIG_NON_GREEDY_MV
1530 cpi->tpl_ready = 1;
1531 #if DUMP_TPL_STATS
1532 dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
1533 #endif // DUMP_TPL_STATS
1534 #endif // CONFIG_NON_GREEDY_MV
1535
1536 #if CONFIG_RATE_CTRL
1537 if (cpi->oxcf.use_simple_encode_api) {
1538 accumulate_frame_tpl_stats(cpi);
1539 }
1540 #endif // CONFIG_RATE_CTRL
1541 }
1542