• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2023 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 
13 #include "./vpx_dsp_rtcd.h"
14 #if CONFIG_NON_GREEDY_MV
15 #include "vp9/common/vp9_mvref_common.h"
16 #endif
17 #include "vp9/common/vp9_reconinter.h"
18 #include "vp9/common/vp9_reconintra.h"
19 #include "vp9/common/vp9_scan.h"
20 #include "vp9/encoder/vp9_encoder.h"
21 #include "vp9/encoder/vp9_tpl_model.h"
22 #include "vpx/internal/vpx_codec_internal.h"
23 #include "vpx/vpx_codec.h"
24 
init_gop_frames(VP9_COMP * cpi,GF_PICTURE * gf_picture,const GF_GROUP * gf_group,int * tpl_group_frames)25 static int init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
26                            const GF_GROUP *gf_group, int *tpl_group_frames) {
27   VP9_COMMON *cm = &cpi->common;
28   int frame_idx = 0;
29   int i;
30   int gld_index = -1;
31   int alt_index = -1;
32   int lst_index = -1;
33   int arf_index_stack[MAX_ARF_LAYERS];
34   int arf_stack_size = 0;
35   int extend_frame_count = 0;
36   int pframe_qindex = cpi->tpl_stats[2].base_qindex;
37   int frame_gop_offset = 0;
38 
39   RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
40   int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
41 
42   memset(recon_frame_index, -1, sizeof(recon_frame_index));
43   stack_init(arf_index_stack, MAX_ARF_LAYERS);
44 
45   for (i = 0; i < FRAME_BUFFERS; ++i) {
46     if (frame_bufs[i].ref_count == 0) {
47       alloc_frame_mvs(cm, i);
48       if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
49                                    cm->subsampling_x, cm->subsampling_y,
50 #if CONFIG_VP9_HIGHBITDEPTH
51                                    cm->use_highbitdepth,
52 #endif
53                                    VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
54                                    NULL, NULL, NULL))
55         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
56                            "Failed to allocate frame buffer");
57 
58       recon_frame_index[frame_idx] = i;
59       ++frame_idx;
60 
61       if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
62     }
63   }
64 
65   for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
66     assert(recon_frame_index[i] >= 0);
67     cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
68   }
69 
70   *tpl_group_frames = 0;
71 
72   // Initialize Golden reference frame.
73   gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
74   for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
75   gf_picture[0].update_type = gf_group->update_type[0];
76   gld_index = 0;
77   ++*tpl_group_frames;
78 
79   // Initialize base layer ARF frame
80   gf_picture[1].frame = cpi->Source;
81   gf_picture[1].ref_frame[0] = gld_index;
82   gf_picture[1].ref_frame[1] = lst_index;
83   gf_picture[1].ref_frame[2] = alt_index;
84   gf_picture[1].update_type = gf_group->update_type[1];
85   alt_index = 1;
86   ++*tpl_group_frames;
87 
88   // Initialize P frames
89   for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
90     struct lookahead_entry *buf;
91     frame_gop_offset = gf_group->frame_gop_index[frame_idx];
92     buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
93 
94     if (buf == NULL) break;
95 
96     gf_picture[frame_idx].frame = &buf->img;
97     gf_picture[frame_idx].ref_frame[0] = gld_index;
98     gf_picture[frame_idx].ref_frame[1] = lst_index;
99     gf_picture[frame_idx].ref_frame[2] = alt_index;
100     gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
101 
102     switch (gf_group->update_type[frame_idx]) {
103       case ARF_UPDATE:
104         stack_push(arf_index_stack, alt_index, arf_stack_size);
105         ++arf_stack_size;
106         alt_index = frame_idx;
107         break;
108       case LF_UPDATE: lst_index = frame_idx; break;
109       case OVERLAY_UPDATE:
110         gld_index = frame_idx;
111         alt_index = stack_pop(arf_index_stack, arf_stack_size);
112         --arf_stack_size;
113         break;
114       case USE_BUF_FRAME:
115         lst_index = alt_index;
116         alt_index = stack_pop(arf_index_stack, arf_stack_size);
117         --arf_stack_size;
118         break;
119       default: break;
120     }
121 
122     ++*tpl_group_frames;
123 
124     // The length of group of pictures is baseline_gf_interval, plus the
125     // beginning golden frame from last GOP, plus the last overlay frame in
126     // the same GOP.
127     if (frame_idx == gf_group->gf_group_size) break;
128   }
129 
130   alt_index = -1;
131   ++frame_idx;
132   ++frame_gop_offset;
133 
134   // Extend two frames outside the current gf group.
135   for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
136     struct lookahead_entry *buf =
137         vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
138 
139     if (buf == NULL) break;
140 
141     cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
142 
143     gf_picture[frame_idx].frame = &buf->img;
144     gf_picture[frame_idx].ref_frame[0] = gld_index;
145     gf_picture[frame_idx].ref_frame[1] = lst_index;
146     gf_picture[frame_idx].ref_frame[2] = alt_index;
147     gf_picture[frame_idx].update_type = LF_UPDATE;
148     lst_index = frame_idx;
149     ++*tpl_group_frames;
150     ++extend_frame_count;
151     ++frame_gop_offset;
152   }
153 
154   return extend_frame_count;
155 }
156 
init_tpl_stats(VP9_COMP * cpi)157 static void init_tpl_stats(VP9_COMP *cpi) {
158   int frame_idx;
159   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
160     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
161     memset(tpl_frame->tpl_stats_ptr, 0,
162            tpl_frame->height * tpl_frame->width *
163                sizeof(*tpl_frame->tpl_stats_ptr));
164     tpl_frame->is_valid = 0;
165   }
166 }
167 
free_tpl_frame_stats_list(VpxTplGopStats * tpl_gop_stats)168 static void free_tpl_frame_stats_list(VpxTplGopStats *tpl_gop_stats) {
169   int frame_idx;
170   for (frame_idx = 0; frame_idx < tpl_gop_stats->size; ++frame_idx) {
171     vpx_free(tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list);
172   }
173   vpx_free(tpl_gop_stats->frame_stats_list);
174 }
175 
init_tpl_stats_before_propagation(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,TplDepFrame * tpl_stats,int tpl_gop_frames,int frame_width,int frame_height)176 static void init_tpl_stats_before_propagation(
177     struct vpx_internal_error_info *error_info, VpxTplGopStats *tpl_gop_stats,
178     TplDepFrame *tpl_stats, int tpl_gop_frames, int frame_width,
179     int frame_height) {
180   int frame_idx;
181   free_tpl_frame_stats_list(tpl_gop_stats);
182   CHECK_MEM_ERROR(
183       error_info, tpl_gop_stats->frame_stats_list,
184       vpx_calloc(tpl_gop_frames, sizeof(*tpl_gop_stats->frame_stats_list)));
185   tpl_gop_stats->size = tpl_gop_frames;
186   for (frame_idx = 0; frame_idx < tpl_gop_frames; ++frame_idx) {
187     const int mi_rows = tpl_stats[frame_idx].height;
188     const int mi_cols = tpl_stats[frame_idx].width;
189     CHECK_MEM_ERROR(
190         error_info, tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list,
191         vpx_calloc(
192             mi_rows * mi_cols,
193             sizeof(
194                 *tpl_gop_stats->frame_stats_list[frame_idx].block_stats_list)));
195     tpl_gop_stats->frame_stats_list[frame_idx].num_blocks = mi_rows * mi_cols;
196     tpl_gop_stats->frame_stats_list[frame_idx].frame_width = frame_width;
197     tpl_gop_stats->frame_stats_list[frame_idx].frame_height = frame_height;
198   }
199 }
200 
201 #if CONFIG_NON_GREEDY_MV
full_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,int mi_row,int mi_col,MV * mv)202 static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
203                                          MotionField *motion_field,
204                                          int frame_idx, uint8_t *cur_frame_buf,
205                                          uint8_t *ref_frame_buf, int stride,
206                                          BLOCK_SIZE bsize, int mi_row,
207                                          int mi_col, MV *mv) {
208   MACROBLOCK *const x = &td->mb;
209   MACROBLOCKD *const xd = &x->e_mbd;
210   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
211   int step_param;
212   uint32_t bestsme = UINT_MAX;
213   const MvLimits tmp_mv_limits = x->mv_limits;
214   // lambda is used to adjust the importance of motion vector consistency.
215   // TODO(angiebird): Figure out lambda's proper value.
216   const int lambda = cpi->tpl_stats[frame_idx].lambda;
217   int_mv nb_full_mvs[NB_MVS_NUM];
218   int nb_full_mv_num;
219 
220   MV best_ref_mv1 = { 0, 0 };
221   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
222 
223   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
224   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
225 
226   // Setup frame pointers
227   x->plane[0].src.buf = cur_frame_buf;
228   x->plane[0].src.stride = stride;
229   xd->plane[0].pre[0].buf = ref_frame_buf;
230   xd->plane[0].pre[0].stride = stride;
231 
232   step_param = mv_sf->reduce_first_step_size;
233   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
234 
235   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
236 
237   nb_full_mv_num =
238       vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs);
239   vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
240                              lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
241 
242   /* restore UMV window */
243   x->mv_limits = tmp_mv_limits;
244 
245   return bestsme;
246 }
247 
sub_pixel_motion_search(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)248 static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td,
249                                         uint8_t *cur_frame_buf,
250                                         uint8_t *ref_frame_buf, int stride,
251                                         BLOCK_SIZE bsize, MV *mv) {
252   MACROBLOCK *const x = &td->mb;
253   MACROBLOCKD *const xd = &x->e_mbd;
254   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
255   uint32_t bestsme = UINT_MAX;
256   uint32_t distortion;
257   uint32_t sse;
258   int cost_list[5];
259 
260   MV best_ref_mv1 = { 0, 0 };
261 
262   // Setup frame pointers
263   x->plane[0].src.buf = cur_frame_buf;
264   x->plane[0].src.stride = stride;
265   xd->plane[0].pre[0].buf = ref_frame_buf;
266   xd->plane[0].pre[0].stride = stride;
267 
268   // TODO(yunqing): may use higher tap interp filter than 2 taps.
269   // Ignore mv costing by sending NULL pointer instead of cost array
270   bestsme = cpi->find_fractional_mv_step(
271       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
272       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
273       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
274       USE_2_TAPS);
275 
276   return bestsme;
277 }
278 
279 #else  // CONFIG_NON_GREEDY_MV
motion_compensated_prediction(VP9_COMP * cpi,ThreadData * td,uint8_t * cur_frame_buf,uint8_t * ref_frame_buf,int stride,BLOCK_SIZE bsize,MV * mv)280 static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
281                                               uint8_t *cur_frame_buf,
282                                               uint8_t *ref_frame_buf,
283                                               int stride, BLOCK_SIZE bsize,
284                                               MV *mv) {
285   MACROBLOCK *const x = &td->mb;
286   MACROBLOCKD *const xd = &x->e_mbd;
287   MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
288   const SEARCH_METHODS search_method = NSTEP;
289   int step_param;
290   int sadpb = x->sadperbit16;
291   uint32_t bestsme = UINT_MAX;
292   uint32_t distortion;
293   uint32_t sse;
294   int cost_list[5];
295   const MvLimits tmp_mv_limits = x->mv_limits;
296 
297   MV best_ref_mv1 = { 0, 0 };
298   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
299 
300   best_ref_mv1_full.col = best_ref_mv1.col >> 3;
301   best_ref_mv1_full.row = best_ref_mv1.row >> 3;
302 
303   // Setup frame pointers
304   x->plane[0].src.buf = cur_frame_buf;
305   x->plane[0].src.stride = stride;
306   xd->plane[0].pre[0].buf = ref_frame_buf;
307   xd->plane[0].pre[0].stride = stride;
308 
309   step_param = mv_sf->reduce_first_step_size;
310   step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
311 
312   vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
313 
314   vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
315                         search_method, sadpb, cond_cost_list(cpi, cost_list),
316                         &best_ref_mv1, mv, 0, 0);
317 
318   /* restore UMV window */
319   x->mv_limits = tmp_mv_limits;
320 
321   // TODO(yunqing): may use higher tap interp filter than 2 taps.
322   // Ignore mv costing by sending NULL pointer instead of cost array
323   bestsme = cpi->find_fractional_mv_step(
324       x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
325       &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
326       cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
327       USE_2_TAPS);
328 
329   return bestsme;
330 }
331 #endif
332 
get_overlap_area(int grid_pos_row,int grid_pos_col,int ref_pos_row,int ref_pos_col,int block,BLOCK_SIZE bsize)333 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
334                             int ref_pos_col, int block, BLOCK_SIZE bsize) {
335   int width = 0, height = 0;
336   int bw = 4 << b_width_log2_lookup[bsize];
337   int bh = 4 << b_height_log2_lookup[bsize];
338 
339   switch (block) {
340     case 0:
341       width = grid_pos_col + bw - ref_pos_col;
342       height = grid_pos_row + bh - ref_pos_row;
343       break;
344     case 1:
345       width = ref_pos_col + bw - grid_pos_col;
346       height = grid_pos_row + bh - ref_pos_row;
347       break;
348     case 2:
349       width = grid_pos_col + bw - ref_pos_col;
350       height = ref_pos_row + bh - grid_pos_row;
351       break;
352     case 3:
353       width = ref_pos_col + bw - grid_pos_col;
354       height = ref_pos_row + bh - grid_pos_row;
355       break;
356     default: assert(0);
357   }
358 
359   return width * height;
360 }
361 
round_floor(int ref_pos,int bsize_pix)362 static int round_floor(int ref_pos, int bsize_pix) {
363   int round;
364   if (ref_pos < 0)
365     round = -(1 + (-ref_pos - 1) / bsize_pix);
366   else
367     round = ref_pos / bsize_pix;
368 
369   return round;
370 }
371 
tpl_model_store(TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int stride)372 static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
373                             BLOCK_SIZE bsize, int stride) {
374   const int mi_height = num_8x8_blocks_high_lookup[bsize];
375   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
376   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
377   int idx, idy;
378 
379   for (idy = 0; idy < mi_height; ++idy) {
380     for (idx = 0; idx < mi_width; ++idx) {
381       TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
382       const int64_t mc_flow = tpl_ptr->mc_flow;
383       const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
384       *tpl_ptr = *src_stats;
385       tpl_ptr->mc_flow = mc_flow;
386       tpl_ptr->mc_ref_cost = mc_ref_cost;
387       tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
388     }
389   }
390 }
391 
tpl_store_before_propagation(VpxTplBlockStats * tpl_block_stats,TplDepStats * tpl_stats,int mi_row,int mi_col,BLOCK_SIZE bsize,int stride,int64_t recon_error,int64_t rate_cost,int ref_frame_idx)392 static void tpl_store_before_propagation(VpxTplBlockStats *tpl_block_stats,
393                                          TplDepStats *tpl_stats, int mi_row,
394                                          int mi_col, BLOCK_SIZE bsize,
395                                          int stride, int64_t recon_error,
396                                          int64_t rate_cost, int ref_frame_idx) {
397   const int mi_height = num_8x8_blocks_high_lookup[bsize];
398   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
399   const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
400   int idx, idy;
401 
402   for (idy = 0; idy < mi_height; ++idy) {
403     for (idx = 0; idx < mi_width; ++idx) {
404       VpxTplBlockStats *tpl_block_stats_ptr =
405           &tpl_block_stats[(mi_row + idy) * stride + mi_col + idx];
406       tpl_block_stats_ptr->row = mi_row * 8;
407       tpl_block_stats_ptr->col = mi_col * 8;
408       tpl_block_stats_ptr->inter_cost = src_stats->inter_cost;
409       tpl_block_stats_ptr->intra_cost = src_stats->intra_cost;
410       tpl_block_stats_ptr->recrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2;
411       tpl_block_stats_ptr->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
412       tpl_block_stats_ptr->mv_r = src_stats->mv.as_mv.row;
413       tpl_block_stats_ptr->mv_c = src_stats->mv.as_mv.col;
414       tpl_block_stats_ptr->ref_frame_index = ref_frame_idx;
415     }
416   }
417 }
418 
tpl_model_update_b(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)419 static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
420                                int mi_row, int mi_col, const BLOCK_SIZE bsize) {
421   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
422   TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
423   MV mv = tpl_stats->mv.as_mv;
424   int mv_row = mv.row >> 3;
425   int mv_col = mv.col >> 3;
426 
427   int ref_pos_row = mi_row * MI_SIZE + mv_row;
428   int ref_pos_col = mi_col * MI_SIZE + mv_col;
429 
430   const int bw = 4 << b_width_log2_lookup[bsize];
431   const int bh = 4 << b_height_log2_lookup[bsize];
432   const int mi_height = num_8x8_blocks_high_lookup[bsize];
433   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
434   const int pix_num = bw * bh;
435 
436   // top-left on grid block location in pixel
437   int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
438   int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
439   int block;
440 
441   for (block = 0; block < 4; ++block) {
442     int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
443     int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
444 
445     if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
446         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
447       int overlap_area = get_overlap_area(
448           grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
449       int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
450       int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
451 
452       int64_t mc_flow = tpl_stats->mc_dep_cost -
453                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
454                             tpl_stats->intra_cost;
455 
456       int idx, idy;
457 
458       for (idy = 0; idy < mi_height; ++idy) {
459         for (idx = 0; idx < mi_width; ++idx) {
460           TplDepStats *des_stats =
461               &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
462                          (ref_mi_col + idx)];
463 
464           des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
465           des_stats->mc_ref_cost +=
466               ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
467               pix_num;
468           assert(overlap_area >= 0);
469         }
470       }
471     }
472   }
473 }
474 
tpl_model_update(TplDepFrame * tpl_frame,TplDepStats * tpl_stats,int mi_row,int mi_col,const BLOCK_SIZE bsize)475 static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
476                              int mi_row, int mi_col, const BLOCK_SIZE bsize) {
477   int idx, idy;
478   const int mi_height = num_8x8_blocks_high_lookup[bsize];
479   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
480 
481   for (idy = 0; idy < mi_height; ++idy) {
482     for (idx = 0; idx < mi_width; ++idx) {
483       TplDepStats *tpl_ptr =
484           &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
485       tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
486                          BLOCK_8X8);
487     }
488   }
489 }
490 
get_quantize_error(MACROBLOCK * x,int plane,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,TX_SIZE tx_size,int64_t * recon_error,int64_t * sse,uint16_t * eob)491 static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
492                                tran_low_t *qcoeff, tran_low_t *dqcoeff,
493                                TX_SIZE tx_size, int64_t *recon_error,
494                                int64_t *sse, uint16_t *eob) {
495   MACROBLOCKD *const xd = &x->e_mbd;
496   const struct macroblock_plane *const p = &x->plane[plane];
497   const struct macroblockd_plane *const pd = &xd->plane[plane];
498   const ScanOrder *const scan_order = &vp9_default_scan_orders[tx_size];
499   int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
500   const int shift = tx_size == TX_32X32 ? 0 : 2;
501 
502   // skip block condition should be handled before this is called.
503   assert(!x->skip_block);
504 
505 #if CONFIG_VP9_HIGHBITDEPTH
506   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
507     vp9_highbd_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff,
508                                  pd->dequant, eob, scan_order);
509   } else {
510     vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
511                           scan_order);
512   }
513 #else
514   vp9_quantize_fp_32x32(coeff, pix_num, p, qcoeff, dqcoeff, pd->dequant, eob,
515                         scan_order);
516 #endif  // CONFIG_VP9_HIGHBITDEPTH
517 
518   *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
519   *recon_error = VPXMAX(*recon_error, 1);
520 
521   *sse = (*sse) >> shift;
522   *sse = VPXMAX(*sse, 1);
523 }
524 
525 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)526 void vp9_highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
527                              TX_SIZE tx_size) {
528   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
529   switch (tx_size) {
530     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
531     case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
532     case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
533     default: assert(0);
534   }
535 }
536 #endif  // CONFIG_VP9_HIGHBITDEPTH
537 
vp9_wht_fwd_txfm(int16_t * src_diff,int bw,tran_low_t * coeff,TX_SIZE tx_size)538 void vp9_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
539                       TX_SIZE tx_size) {
540   switch (tx_size) {
541     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
542     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
543     case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
544     default: assert(0);
545   }
546 }
547 
set_mv_limits(const VP9_COMMON * cm,MACROBLOCK * x,int mi_row,int mi_col)548 static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
549                           int mi_col) {
550   x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
551   x->mv_limits.row_max =
552       (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
553   x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
554   x->mv_limits.col_max =
555       ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
556 }
557 
rate_estimator(const tran_low_t * qcoeff,int eob,TX_SIZE tx_size)558 static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
559   const ScanOrder *const scan_order = &vp9_scan_orders[tx_size][DCT_DCT];
560   int rate_cost = 1;
561   int idx;
562   assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
563   for (idx = 0; idx < eob; ++idx) {
564     unsigned int abs_level = abs(qcoeff[scan_order->scan[idx]]);
565     rate_cost += get_msb(abs_level + 1) + 1 + (abs_level > 0);
566   }
567 
568   return (rate_cost << VP9_PROB_COST_SHIFT);
569 }
570 
mode_estimation(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,struct scale_factors * sf,GF_PICTURE * gf_picture,int frame_idx,TplDepFrame * tpl_frame,int16_t * src_diff,tran_low_t * coeff,tran_low_t * qcoeff,tran_low_t * dqcoeff,int mi_row,int mi_col,BLOCK_SIZE bsize,TX_SIZE tx_size,YV12_BUFFER_CONFIG * ref_frame[],uint8_t * predictor,int64_t * recon_error,int64_t * rate_cost,int64_t * sse,int * ref_frame_idx)571 static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
572                             struct scale_factors *sf, GF_PICTURE *gf_picture,
573                             int frame_idx, TplDepFrame *tpl_frame,
574                             int16_t *src_diff, tran_low_t *coeff,
575                             tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
576                             int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
577                             YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
578                             int64_t *recon_error, int64_t *rate_cost,
579                             int64_t *sse, int *ref_frame_idx) {
580   VP9_COMMON *cm = &cpi->common;
581   ThreadData *td = &cpi->td;
582 
583   const int bw = 4 << b_width_log2_lookup[bsize];
584   const int bh = 4 << b_height_log2_lookup[bsize];
585   const int pix_num = bw * bh;
586   int best_rf_idx = -1;
587   int_mv best_mv;
588   int64_t best_inter_cost = INT64_MAX;
589   int64_t inter_cost;
590   int rf_idx;
591   const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
592 
593   int64_t best_intra_cost = INT64_MAX;
594   int64_t intra_cost;
595   PREDICTION_MODE mode;
596   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
597   MODE_INFO mi_above, mi_left;
598   const int mi_height = num_8x8_blocks_high_lookup[bsize];
599   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
600   TplDepStats *tpl_stats =
601       &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
602 
603   xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
604   xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
605   xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
606   xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
607   xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
608   xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
609 
610   // Intra prediction search
611   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
612     uint8_t *src, *dst;
613     int src_stride, dst_stride;
614 
615     src = xd->cur_buf->y_buffer + mb_y_offset;
616     src_stride = xd->cur_buf->y_stride;
617 
618     dst = &predictor[0];
619     dst_stride = bw;
620 
621     xd->mi[0]->sb_type = bsize;
622     xd->mi[0]->ref_frame[0] = INTRA_FRAME;
623 
624     vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
625                             src_stride, dst, dst_stride, 0, 0, 0);
626 
627 #if CONFIG_VP9_HIGHBITDEPTH
628     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
629       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
630                                 dst_stride, xd->bd);
631       vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
632       intra_cost = vpx_highbd_satd(coeff, pix_num);
633     } else {
634       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
635                          dst_stride);
636       vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
637       intra_cost = vpx_satd(coeff, pix_num);
638     }
639 #else
640     vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
641     vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
642     intra_cost = vpx_satd(coeff, pix_num);
643 #endif  // CONFIG_VP9_HIGHBITDEPTH
644 
645     if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
646   }
647 
648   // Motion compensated prediction
649   best_mv.as_int = 0;
650 
651   set_mv_limits(cm, x, mi_row, mi_col);
652 
653   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
654     int_mv mv;
655 #if CONFIG_NON_GREEDY_MV
656     MotionField *motion_field;
657 #endif
658     if (ref_frame[rf_idx] == NULL) continue;
659 
660 #if CONFIG_NON_GREEDY_MV
661     (void)td;
662     motion_field = vp9_motion_field_info_get_motion_field(
663         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
664     mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
665 #else
666     motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset,
667                                   ref_frame[rf_idx]->y_buffer + mb_y_offset,
668                                   xd->cur_buf->y_stride, bsize, &mv.as_mv);
669 #endif
670 
671 #if CONFIG_VP9_HIGHBITDEPTH
672     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
673       vp9_highbd_build_inter_predictor(
674           CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
675           ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
676           &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
677           mi_row * MI_SIZE, xd->bd);
678       vpx_highbd_subtract_block(
679           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
680           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
681       vp9_highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
682       inter_cost = vpx_highbd_satd(coeff, pix_num);
683     } else {
684       vp9_build_inter_predictor(
685           ref_frame[rf_idx]->y_buffer + mb_y_offset,
686           ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
687           0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
688       vpx_subtract_block(bh, bw, src_diff, bw,
689                          xd->cur_buf->y_buffer + mb_y_offset,
690                          xd->cur_buf->y_stride, &predictor[0], bw);
691       vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
692       inter_cost = vpx_satd(coeff, pix_num);
693     }
694 #else
695     vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
696                               ref_frame[rf_idx]->y_stride, &predictor[0], bw,
697                               &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
698                               mi_col * MI_SIZE, mi_row * MI_SIZE);
699     vpx_subtract_block(bh, bw, src_diff, bw,
700                        xd->cur_buf->y_buffer + mb_y_offset,
701                        xd->cur_buf->y_stride, &predictor[0], bw);
702     vp9_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
703     inter_cost = vpx_satd(coeff, pix_num);
704 #endif
705 
706     if (inter_cost < best_inter_cost) {
707       uint16_t eob = 0;
708       best_rf_idx = rf_idx;
709       best_inter_cost = inter_cost;
710       best_mv.as_int = mv.as_int;
711       // Since best_inter_cost is initialized as INT64_MAX, recon_error and
712       // rate_cost will be calculated with the best reference frame.
713       get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
714                          sse, &eob);
715       *rate_cost = rate_estimator(qcoeff, eob, tx_size);
716     }
717   }
718   best_intra_cost = VPXMAX(best_intra_cost, 1);
719   best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
720   tpl_stats->inter_cost = VPXMAX(
721       1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
722   tpl_stats->intra_cost = VPXMAX(
723       1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
724   tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
725   tpl_stats->mv.as_int = best_mv.as_int;
726   *ref_frame_idx = best_rf_idx;
727 }
728 
729 #if CONFIG_NON_GREEDY_MV
get_block_src_pred_buf(MACROBLOCKD * xd,GF_PICTURE * gf_picture,int frame_idx,int rf_idx,int mi_row,int mi_col,struct buf_2d * src,struct buf_2d * pre)730 static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
731                                   int frame_idx, int rf_idx, int mi_row,
732                                   int mi_col, struct buf_2d *src,
733                                   struct buf_2d *pre) {
734   const int mb_y_offset =
735       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
736   YV12_BUFFER_CONFIG *ref_frame = NULL;
737   int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
738   if (ref_frame_idx != -1) {
739     ref_frame = gf_picture[ref_frame_idx].frame;
740     src->buf = xd->cur_buf->y_buffer + mb_y_offset;
741     src->stride = xd->cur_buf->y_stride;
742     pre->buf = ref_frame->y_buffer + mb_y_offset;
743     pre->stride = ref_frame->y_stride;
744     assert(src->stride == pre->stride);
745     return 1;
746   } else {
747     printf("invalid ref_frame_idx");
748     assert(ref_frame_idx != -1);
749     return 0;
750   }
751 }
752 
753 #define kMvPreCheckLines 5
754 #define kMvPreCheckSize 15
755 
756 #define MV_REF_POS_NUM 3
757 POSITION mv_ref_pos[MV_REF_POS_NUM] = {
758   { -1, 0 },
759   { 0, -1 },
760   { -1, -1 },
761 };
762 
get_select_mv(VP9_COMP * cpi,TplDepFrame * tpl_frame,int mi_row,int mi_col)763 static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
764                              int mi_col) {
765   return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
766 }
767 
find_ref_mv(int mv_mode,VP9_COMP * cpi,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)768 static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
769                           BLOCK_SIZE bsize, int mi_row, int mi_col) {
770   int i;
771   const int mi_height = num_8x8_blocks_high_lookup[bsize];
772   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
773   int_mv nearest_mv, near_mv, invalid_mv;
774   nearest_mv.as_int = INVALID_MV;
775   near_mv.as_int = INVALID_MV;
776   invalid_mv.as_int = INVALID_MV;
777   for (i = 0; i < MV_REF_POS_NUM; ++i) {
778     int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
779     int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
780     assert(mv_ref_pos[i].row <= 0);
781     assert(mv_ref_pos[i].col <= 0);
782     if (nb_row >= 0 && nb_col >= 0) {
783       if (nearest_mv.as_int == INVALID_MV) {
784         nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
785       } else {
786         int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
787         if (mv.as_int == nearest_mv.as_int) {
788           continue;
789         } else {
790           near_mv = mv;
791           break;
792         }
793       }
794     }
795   }
796   if (nearest_mv.as_int == INVALID_MV) {
797     nearest_mv.as_mv.row = 0;
798     nearest_mv.as_mv.col = 0;
799   }
800   if (near_mv.as_int == INVALID_MV) {
801     near_mv.as_mv.row = 0;
802     near_mv.as_mv.col = 0;
803   }
804   if (mv_mode == NEAREST_MV_MODE) {
805     return nearest_mv;
806   }
807   if (mv_mode == NEAR_MV_MODE) {
808     return near_mv;
809   }
810   assert(0);
811   return invalid_mv;
812 }
813 
get_mv_from_mv_mode(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)814 static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
815                                   MotionField *motion_field,
816                                   TplDepFrame *tpl_frame, BLOCK_SIZE bsize,
817                                   int mi_row, int mi_col) {
818   int_mv mv;
819   switch (mv_mode) {
820     case ZERO_MV_MODE:
821       mv.as_mv.row = 0;
822       mv.as_mv.col = 0;
823       break;
824     case NEW_MV_MODE:
825       mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
826       break;
827     case NEAREST_MV_MODE:
828       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
829       break;
830     case NEAR_MV_MODE:
831       mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
832       break;
833     default:
834       mv.as_int = INVALID_MV;
835       assert(0);
836       break;
837   }
838   return mv;
839 }
840 
get_mv_dist(int mv_mode,VP9_COMP * cpi,MACROBLOCKD * xd,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)841 static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
842                           GF_PICTURE *gf_picture, MotionField *motion_field,
843                           int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
844                           BLOCK_SIZE bsize, int mi_row, int mi_col,
845                           int_mv *mv) {
846   uint32_t sse;
847   struct buf_2d src;
848   struct buf_2d pre;
849   MV full_mv;
850   *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize,
851                             mi_row, mi_col);
852   full_mv = get_full_mv(&mv->as_mv);
853   if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
854                              &src, &pre)) {
855     // TODO(angiebird): Consider subpixel when computing the sse.
856     cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
857                           pre.stride, &sse);
858     return (double)(sse << VP9_DIST_SCALE_LOG2);
859   } else {
860     assert(0);
861     return 0;
862   }
863 }
864 
get_mv_mode_cost(int mv_mode)865 static int get_mv_mode_cost(int mv_mode) {
866   // TODO(angiebird): The probabilities are roughly inferred from
867   // default_inter_mode_probs. Check if there is a better way to set the
868   // probabilities.
869   const int zero_mv_prob = 16;
870   const int new_mv_prob = 24 * 1;
871   const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob;
872   assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256);
873   switch (mv_mode) {
874     case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break;
875     case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break;
876     case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
877     case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break;
878     default: assert(0); return -1;
879   }
880 }
881 
get_mv_diff_cost(MV * new_mv,MV * ref_mv)882 static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) {
883   double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) +
884                         log2(1 + abs(new_mv->col - ref_mv->col));
885   mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT);
886   return mv_diff_cost;
887 }
get_mv_cost(int mv_mode,VP9_COMP * cpi,MotionField * motion_field,TplDepFrame * tpl_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)888 static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field,
889                           TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row,
890                           int mi_col) {
891   double mv_cost = get_mv_mode_cost(mv_mode);
892   if (mv_mode == NEW_MV_MODE) {
893     MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame,
894                                     bsize, mi_row, mi_col)
895                     .as_mv;
896     MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field,
897                                         tpl_frame, bsize, mi_row, mi_col)
898                         .as_mv;
899     MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame,
900                                      bsize, mi_row, mi_col)
901                      .as_mv;
902     double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv);
903     double near_cost = get_mv_diff_cost(&new_mv, &near_mv);
904     mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost;
905   }
906   return mv_cost;
907 }
908 
eval_mv_mode(int mv_mode,VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * mv)909 static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
910                            GF_PICTURE *gf_picture, MotionField *motion_field,
911                            int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
912                            BLOCK_SIZE bsize, int mi_row, int mi_col,
913                            int_mv *mv) {
914   MACROBLOCKD *xd = &x->e_mbd;
915   double mv_dist =
916       get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx,
917                   tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
918   double mv_cost =
919       get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col);
920   double mult = 180;
921 
922   return mv_cost + mult * log2f(1 + mv_dist);
923 }
924 
find_best_ref_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col,double * rd,int_mv * mv)925 static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
926                                  GF_PICTURE *gf_picture,
927                                  MotionField *motion_field, int frame_idx,
928                                  TplDepFrame *tpl_frame, int rf_idx,
929                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
930                                  double *rd, int_mv *mv) {
931   int best_mv_mode = ZERO_MV_MODE;
932   int update = 0;
933   int mv_mode;
934   *rd = 0;
935   for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
936     double this_rd;
937     int_mv this_mv;
938     if (mv_mode == NEW_MV_MODE) {
939       continue;
940     }
941     this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx,
942                            tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv);
943     if (update == 0) {
944       *rd = this_rd;
945       *mv = this_mv;
946       best_mv_mode = mv_mode;
947       update = 1;
948     } else {
949       if (this_rd < *rd) {
950         *rd = this_rd;
951         *mv = this_mv;
952         best_mv_mode = mv_mode;
953       }
954     }
955   }
956   return best_mv_mode;
957 }
958 
predict_mv_mode(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize,int mi_row,int mi_col)959 static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
960                             GF_PICTURE *gf_picture, MotionField *motion_field,
961                             int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
962                             BLOCK_SIZE bsize, int mi_row, int mi_col) {
963   const int mi_height = num_8x8_blocks_high_lookup[bsize];
964   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
965   int tmp_mv_mode_arr[kMvPreCheckSize];
966   int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
967   double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx];
968   int_mv *select_mv_arr = cpi->select_mv_arr;
969   int_mv tmp_select_mv_arr[kMvPreCheckSize];
970   int stride = tpl_frame->stride;
971   double new_mv_rd = 0;
972   double no_new_mv_rd = 0;
973   double this_new_mv_rd = 0;
974   double this_no_new_mv_rd = 0;
975   int idx;
976   int tmp_idx;
977   assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
978 
979   // no new mv
980   // diagonal scan order
981   tmp_idx = 0;
982   for (idx = 0; idx < kMvPreCheckLines; ++idx) {
983     int r;
984     for (r = 0; r <= idx; ++r) {
985       int c = idx - r;
986       int nb_row = mi_row + r * mi_height;
987       int nb_col = mi_col + c * mi_width;
988       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
989         double this_rd;
990         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
991         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
992             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
993             bsize, nb_row, nb_col, &this_rd, mv);
994         if (r == 0 && c == 0) {
995           this_no_new_mv_rd = this_rd;
996         }
997         no_new_mv_rd += this_rd;
998         tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
999         tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
1000         ++tmp_idx;
1001       }
1002     }
1003   }
1004 
1005   // new mv
1006   mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
1007   this_new_mv_rd = eval_mv_mode(
1008       NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1009       rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]);
1010   new_mv_rd = this_new_mv_rd;
1011   // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
1012   // beforehand.
1013   for (idx = 1; idx < kMvPreCheckLines; ++idx) {
1014     int r;
1015     for (r = 0; r <= idx; ++r) {
1016       int c = idx - r;
1017       int nb_row = mi_row + r * mi_height;
1018       int nb_col = mi_col + c * mi_width;
1019       if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1020         double this_rd;
1021         int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
1022         mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode(
1023             cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx,
1024             bsize, nb_row, nb_col, &this_rd, mv);
1025         new_mv_rd += this_rd;
1026       }
1027     }
1028   }
1029 
1030   // update best_mv_mode
1031   tmp_idx = 0;
1032   if (no_new_mv_rd < new_mv_rd) {
1033     for (idx = 0; idx < kMvPreCheckLines; ++idx) {
1034       int r;
1035       for (r = 0; r <= idx; ++r) {
1036         int c = idx - r;
1037         int nb_row = mi_row + r * mi_height;
1038         int nb_col = mi_col + c * mi_width;
1039         if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
1040           mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
1041           select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
1042           ++tmp_idx;
1043         }
1044       }
1045     }
1046     rd_diff_arr[mi_row * stride + mi_col] = 0;
1047   } else {
1048     rd_diff_arr[mi_row * stride + mi_col] =
1049         (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd);
1050   }
1051 }
1052 
predict_mv_mode_arr(VP9_COMP * cpi,MACROBLOCK * x,GF_PICTURE * gf_picture,MotionField * motion_field,int frame_idx,TplDepFrame * tpl_frame,int rf_idx,BLOCK_SIZE bsize)1053 static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x,
1054                                 GF_PICTURE *gf_picture,
1055                                 MotionField *motion_field, int frame_idx,
1056                                 TplDepFrame *tpl_frame, int rf_idx,
1057                                 BLOCK_SIZE bsize) {
1058   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1059   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1060   const int unit_rows = tpl_frame->mi_rows / mi_height;
1061   const int unit_cols = tpl_frame->mi_cols / mi_width;
1062   const int max_diagonal_lines = unit_rows + unit_cols - 1;
1063   int idx;
1064   for (idx = 0; idx < max_diagonal_lines; ++idx) {
1065     int r;
1066     for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
1067          ++r) {
1068       int c = idx - r;
1069       int mi_row = r * mi_height;
1070       int mi_col = c * mi_width;
1071       assert(c >= 0 && c < unit_cols);
1072       assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
1073       assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
1074       predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame,
1075                       rf_idx, bsize, mi_row, mi_col);
1076     }
1077   }
1078 }
1079 
do_motion_search(VP9_COMP * cpi,ThreadData * td,MotionField * motion_field,int frame_idx,YV12_BUFFER_CONFIG * ref_frame,BLOCK_SIZE bsize,int mi_row,int mi_col)1080 static void do_motion_search(VP9_COMP *cpi, ThreadData *td,
1081                              MotionField *motion_field, int frame_idx,
1082                              YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize,
1083                              int mi_row, int mi_col) {
1084   VP9_COMMON *cm = &cpi->common;
1085   MACROBLOCK *x = &td->mb;
1086   MACROBLOCKD *xd = &x->e_mbd;
1087   const int mb_y_offset =
1088       mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
1089   assert(ref_frame != NULL);
1090   set_mv_limits(cm, x, mi_row, mi_col);
1091   {
1092     int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col);
1093     uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset;
1094     uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset;
1095     const int stride = xd->cur_buf->y_stride;
1096     full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf,
1097                              ref_frame_buf, stride, bsize, mi_row, mi_col,
1098                              &mv.as_mv);
1099     sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride,
1100                             bsize, &mv.as_mv);
1101     vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv);
1102   }
1103 }
1104 
build_motion_field(VP9_COMP * cpi,int frame_idx,YV12_BUFFER_CONFIG * ref_frame[MAX_INTER_REF_FRAMES],BLOCK_SIZE bsize)1105 static void build_motion_field(
1106     VP9_COMP *cpi, int frame_idx,
1107     YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) {
1108   VP9_COMMON *cm = &cpi->common;
1109   ThreadData *td = &cpi->td;
1110   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1111   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1112   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1113   const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
1114   const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
1115   int mi_row, mi_col;
1116   int rf_idx;
1117 
1118   tpl_frame->lambda = (pw * ph) >> 2;
1119   assert(pw * ph == tpl_frame->lambda << 2);
1120 
1121   for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1122     MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1123         &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1124     if (ref_frame[rf_idx] == NULL) {
1125       continue;
1126     }
1127     vp9_motion_field_reset_mvs(motion_field);
1128     for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1129       for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1130         do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx],
1131                          bsize, mi_row, mi_col);
1132       }
1133     }
1134   }
1135 }
1136 #endif  // CONFIG_NON_GREEDY_MV
1137 
mc_flow_dispenser(VP9_COMP * cpi,GF_PICTURE * gf_picture,int frame_idx,BLOCK_SIZE bsize)1138 static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
1139                               int frame_idx, BLOCK_SIZE bsize) {
1140   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1141   VpxTplFrameStats *tpl_frame_stats_before_propagation =
1142       &cpi->tpl_gop_stats.frame_stats_list[frame_idx];
1143   YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
1144   YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL };
1145 
1146   VP9_COMMON *cm = &cpi->common;
1147   struct scale_factors sf;
1148   int rdmult, idx;
1149   ThreadData *td = &cpi->td;
1150   MACROBLOCK *x = &td->mb;
1151   MACROBLOCKD *xd = &x->e_mbd;
1152   int mi_row, mi_col;
1153 
1154 #if CONFIG_VP9_HIGHBITDEPTH
1155   DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
1156   DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
1157   uint8_t *predictor;
1158 #else
1159   DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
1160 #endif
1161   DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
1162   DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
1163   DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
1164   DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
1165 
1166   const TX_SIZE tx_size = max_txsize_lookup[bsize];
1167   const int mi_height = num_8x8_blocks_high_lookup[bsize];
1168   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1169 
1170   tpl_frame_stats_before_propagation->frame_width = cm->width;
1171   tpl_frame_stats_before_propagation->frame_height = cm->height;
1172   // Setup scaling factor
1173 #if CONFIG_VP9_HIGHBITDEPTH
1174   vp9_setup_scale_factors_for_frame(
1175       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1176       this_frame->y_crop_width, this_frame->y_crop_height,
1177       cpi->common.use_highbitdepth);
1178 
1179   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1180     predictor = CONVERT_TO_BYTEPTR(predictor16);
1181   else
1182     predictor = predictor8;
1183 #else
1184   vp9_setup_scale_factors_for_frame(
1185       &sf, this_frame->y_crop_width, this_frame->y_crop_height,
1186       this_frame->y_crop_width, this_frame->y_crop_height);
1187 #endif  // CONFIG_VP9_HIGHBITDEPTH
1188 
1189   // Prepare reference frame pointers. If any reference frame slot is
1190   // unavailable, the pointer will be set to Null.
1191   for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) {
1192     int rf_idx = gf_picture[frame_idx].ref_frame[idx];
1193     if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
1194   }
1195 
1196   xd->mi = cm->mi_grid_visible;
1197   xd->mi[0] = cm->mi;
1198   xd->cur_buf = this_frame;
1199 
1200   // Get rd multiplier set up.
1201   rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
1202   set_error_per_bit(&cpi->td.mb, rdmult);
1203   vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
1204 
1205   tpl_frame->is_valid = 1;
1206 
1207   cm->base_qindex = tpl_frame->base_qindex;
1208   vp9_frame_init_quantizer(cpi);
1209 
1210 #if CONFIG_NON_GREEDY_MV
1211   {
1212     int square_block_idx;
1213     int rf_idx;
1214     for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
1215          ++square_block_idx) {
1216       BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
1217       build_motion_field(cpi, frame_idx, ref_frame, square_bsize);
1218     }
1219     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1220       int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1221       if (ref_frame_idx != -1) {
1222         MotionField *motion_field = vp9_motion_field_info_get_motion_field(
1223             &cpi->motion_field_info, frame_idx, rf_idx, bsize);
1224         predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx,
1225                             tpl_frame, rf_idx, bsize);
1226       }
1227     }
1228   }
1229 #endif  // CONFIG_NON_GREEDY_MV
1230 
1231   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1232     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1233       int64_t recon_error = 0;
1234       int64_t rate_cost = 0;
1235       int64_t sse = 0;
1236       // Ref frame index in the ref frame buffer.
1237       int ref_frame_idx = -1;
1238       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
1239                       src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
1240                       tx_size, ref_frame, predictor, &recon_error, &rate_cost,
1241                       &sse, &ref_frame_idx);
1242       // Motion flow dependency dispenser.
1243       tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
1244                       tpl_frame->stride);
1245 
1246       tpl_store_before_propagation(
1247           tpl_frame_stats_before_propagation->block_stats_list,
1248           tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, tpl_frame->stride,
1249           recon_error, rate_cost, ref_frame_idx);
1250 
1251       tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
1252                        bsize);
1253     }
1254   }
1255 }
1256 
trim_tpl_stats(struct vpx_internal_error_info * error_info,VpxTplGopStats * tpl_gop_stats,int extra_frames)1257 static void trim_tpl_stats(struct vpx_internal_error_info *error_info,
1258                            VpxTplGopStats *tpl_gop_stats, int extra_frames) {
1259   int i;
1260   VpxTplFrameStats *new_frame_stats;
1261   const int new_size = tpl_gop_stats->size - extra_frames;
1262   if (tpl_gop_stats->size <= extra_frames)
1263     vpx_internal_error(
1264         error_info, VPX_CODEC_ERROR,
1265         "The number of frames in VpxTplGopStats is fewer than expected.");
1266   CHECK_MEM_ERROR(error_info, new_frame_stats,
1267                   vpx_calloc(new_size, sizeof(*new_frame_stats)));
1268   for (i = 0; i < new_size; i++) {
1269     VpxTplFrameStats *frame_stats = &tpl_gop_stats->frame_stats_list[i];
1270     const int num_blocks = frame_stats->num_blocks;
1271     new_frame_stats[i].num_blocks = frame_stats->num_blocks;
1272     new_frame_stats[i].frame_width = frame_stats->frame_width;
1273     new_frame_stats[i].frame_height = frame_stats->frame_height;
1274     new_frame_stats[i].num_blocks = num_blocks;
1275     CHECK_MEM_ERROR(
1276         error_info, new_frame_stats[i].block_stats_list,
1277         vpx_calloc(num_blocks, sizeof(*new_frame_stats[i].block_stats_list)));
1278     memcpy(new_frame_stats[i].block_stats_list, frame_stats->block_stats_list,
1279            num_blocks * sizeof(*new_frame_stats[i].block_stats_list));
1280   }
1281   free_tpl_frame_stats_list(tpl_gop_stats);
1282   tpl_gop_stats->size = new_size;
1283   tpl_gop_stats->frame_stats_list = new_frame_stats;
1284 }
1285 
1286 #if CONFIG_NON_GREEDY_MV
1287 #define DUMP_TPL_STATS 0
1288 #if DUMP_TPL_STATS
dump_buf(uint8_t * buf,int stride,int row,int col,int h,int w)1289 static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
1290   int i, j;
1291   printf("%d %d\n", h, w);
1292   for (i = 0; i < h; ++i) {
1293     for (j = 0; j < w; ++j) {
1294       printf("%d ", buf[(row + i) * stride + col + j]);
1295     }
1296   }
1297   printf("\n");
1298 }
1299 
dump_frame_buf(const YV12_BUFFER_CONFIG * frame_buf)1300 static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
1301   dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
1302            frame_buf->y_width);
1303   dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
1304            frame_buf->uv_height, frame_buf->uv_width);
1305   dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
1306            frame_buf->uv_height, frame_buf->uv_width);
1307 }
1308 
dump_tpl_stats(const VP9_COMP * cpi,int tpl_group_frames,const GF_GROUP * gf_group,const GF_PICTURE * gf_picture,BLOCK_SIZE bsize)1309 static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
1310                            const GF_GROUP *gf_group,
1311                            const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
1312   int frame_idx;
1313   const VP9_COMMON *cm = &cpi->common;
1314   int rf_idx;
1315   for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
1316     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1317       const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1318       int mi_row, mi_col;
1319       int ref_frame_idx;
1320       const int mi_height = num_8x8_blocks_high_lookup[bsize];
1321       const int mi_width = num_8x8_blocks_wide_lookup[bsize];
1322       ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
1323       if (ref_frame_idx != -1) {
1324         YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame;
1325         const int gf_frame_offset = gf_group->frame_gop_index[frame_idx];
1326         const int ref_gf_frame_offset =
1327             gf_group->frame_gop_index[ref_frame_idx];
1328         printf("=\n");
1329         printf(
1330             "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d "
1331             "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n",
1332             frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE,
1333             ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset);
1334         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1335           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1336             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1337               int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info,
1338                                                        frame_idx, rf_idx, bsize,
1339                                                        mi_row, mi_col);
1340               printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row,
1341                      mv.as_mv.col);
1342             }
1343           }
1344         }
1345         for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
1346           for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
1347             if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
1348               const TplDepStats *tpl_ptr =
1349                   &tpl_frame
1350                        ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
1351               printf("%f ", tpl_ptr->feature_score);
1352             }
1353           }
1354         }
1355         printf("\n");
1356 
1357         for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1358           for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1359             const int mv_mode =
1360                 tpl_frame
1361                     ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col];
1362             printf("%d ", mv_mode);
1363           }
1364         }
1365         printf("\n");
1366 
1367         dump_frame_buf(gf_picture[frame_idx].frame);
1368         dump_frame_buf(ref_frame_buf);
1369       }
1370     }
1371   }
1372 }
1373 #endif  // DUMP_TPL_STATS
1374 #endif  // CONFIG_NON_GREEDY_MV
1375 
vp9_init_tpl_buffer(VP9_COMP * cpi)1376 void vp9_init_tpl_buffer(VP9_COMP *cpi) {
1377   VP9_COMMON *cm = &cpi->common;
1378   int frame;
1379 
1380   const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
1381   const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
1382 #if CONFIG_NON_GREEDY_MV
1383   int rf_idx;
1384 
1385   vpx_free(cpi->select_mv_arr);
1386   CHECK_MEM_ERROR(
1387       &cm->error, cpi->select_mv_arr,
1388       vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
1389 #endif
1390 
1391   // TODO(jingning): Reduce the actual memory use for tpl model build up.
1392   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1393     if (cpi->tpl_stats[frame].width >= mi_cols &&
1394         cpi->tpl_stats[frame].height >= mi_rows &&
1395         cpi->tpl_stats[frame].tpl_stats_ptr)
1396       continue;
1397 
1398 #if CONFIG_NON_GREEDY_MV
1399     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1400       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1401       CHECK_MEM_ERROR(
1402           &cm->error, cpi->tpl_stats[frame].mv_mode_arr[rf_idx],
1403           vpx_calloc(mi_rows * mi_cols * 4,
1404                      sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx])));
1405       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1406       CHECK_MEM_ERROR(
1407           &cm->error, cpi->tpl_stats[frame].rd_diff_arr[rf_idx],
1408           vpx_calloc(mi_rows * mi_cols * 4,
1409                      sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx])));
1410     }
1411 #endif
1412     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1413     CHECK_MEM_ERROR(&cm->error, cpi->tpl_stats[frame].tpl_stats_ptr,
1414                     vpx_calloc(mi_rows * mi_cols,
1415                                sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
1416     cpi->tpl_stats[frame].is_valid = 0;
1417     cpi->tpl_stats[frame].width = mi_cols;
1418     cpi->tpl_stats[frame].height = mi_rows;
1419     cpi->tpl_stats[frame].stride = mi_cols;
1420     cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
1421     cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
1422   }
1423 
1424   for (frame = 0; frame < REF_FRAMES; ++frame) {
1425     cpi->enc_frame_buf[frame].mem_valid = 0;
1426     cpi->enc_frame_buf[frame].released = 1;
1427   }
1428 }
1429 
vp9_free_tpl_buffer(VP9_COMP * cpi)1430 void vp9_free_tpl_buffer(VP9_COMP *cpi) {
1431   int frame;
1432 #if CONFIG_NON_GREEDY_MV
1433   vp9_free_motion_field_info(&cpi->motion_field_info);
1434   vpx_free(cpi->select_mv_arr);
1435 #endif
1436   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
1437 #if CONFIG_NON_GREEDY_MV
1438     int rf_idx;
1439     for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) {
1440       vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]);
1441       vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]);
1442     }
1443 #endif
1444     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
1445     cpi->tpl_stats[frame].is_valid = 0;
1446   }
1447   free_tpl_frame_stats_list(&cpi->tpl_gop_stats);
1448 }
1449 
1450 #if CONFIG_RATE_CTRL
accumulate_frame_tpl_stats(VP9_COMP * cpi)1451 static void accumulate_frame_tpl_stats(VP9_COMP *cpi) {
1452   VP9_COMMON *const cm = &cpi->common;
1453   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1454   int show_frame_count = 0;
1455   int frame_idx;
1456   // Accumulate tpl stats for each frame in the current group of picture.
1457   for (frame_idx = 1; frame_idx < gf_group->gf_group_size; ++frame_idx) {
1458     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
1459     TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
1460     const int tpl_stride = tpl_frame->stride;
1461     int64_t intra_cost_base = 0;
1462     int64_t inter_cost_base = 0;
1463     int64_t mc_dep_cost_base = 0;
1464     int64_t mc_ref_cost_base = 0;
1465     int64_t mc_flow_base = 0;
1466     int row, col;
1467 
1468     if (!tpl_frame->is_valid) continue;
1469 
1470     for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
1471       for (col = 0; col < cm->mi_cols; ++col) {
1472         TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
1473         intra_cost_base += this_stats->intra_cost;
1474         inter_cost_base += this_stats->inter_cost;
1475         mc_dep_cost_base += this_stats->mc_dep_cost;
1476         mc_ref_cost_base += this_stats->mc_ref_cost;
1477         mc_flow_base += this_stats->mc_flow;
1478       }
1479     }
1480 
1481     cpi->tpl_stats_info[show_frame_count].intra_cost = intra_cost_base;
1482     cpi->tpl_stats_info[show_frame_count].inter_cost = inter_cost_base;
1483     cpi->tpl_stats_info[show_frame_count].mc_dep_cost = mc_dep_cost_base;
1484     cpi->tpl_stats_info[show_frame_count].mc_ref_cost = mc_ref_cost_base;
1485     cpi->tpl_stats_info[show_frame_count].mc_flow = mc_flow_base;
1486 
1487     ++show_frame_count;
1488   }
1489 }
1490 #endif  // CONFIG_RATE_CTRL
1491 
vp9_setup_tpl_stats(VP9_COMP * cpi)1492 void vp9_setup_tpl_stats(VP9_COMP *cpi) {
1493   GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
1494   const GF_GROUP *gf_group = &cpi->twopass.gf_group;
1495   int tpl_group_frames = 0;
1496   int frame_idx;
1497   int extended_frame_count;
1498   cpi->tpl_bsize = BLOCK_32X32;
1499 
1500   extended_frame_count =
1501       init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
1502 
1503   init_tpl_stats(cpi);
1504 
1505   init_tpl_stats_before_propagation(&cpi->common.error, &cpi->tpl_gop_stats,
1506                                     cpi->tpl_stats, tpl_group_frames,
1507                                     cpi->common.width, cpi->common.height);
1508 
1509   // Backward propagation from tpl_group_frames to 1.
1510   for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
1511     if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
1512     mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize);
1513   }
1514 
1515   // TPL stats has extra frames from next GOP. Trim those extra frames for
1516   // Qmode.
1517   trim_tpl_stats(&cpi->common.error, &cpi->tpl_gop_stats, extended_frame_count);
1518 
1519   if (cpi->ext_ratectrl.ready &&
1520       cpi->ext_ratectrl.funcs.send_tpl_gop_stats != NULL) {
1521     const vpx_codec_err_t codec_status =
1522         vp9_extrc_send_tpl_stats(&cpi->ext_ratectrl, &cpi->tpl_gop_stats);
1523     if (codec_status != VPX_CODEC_OK) {
1524       vpx_internal_error(&cpi->common.error, codec_status,
1525                          "vp9_extrc_send_tpl_stats() failed");
1526     }
1527   }
1528 
1529 #if CONFIG_NON_GREEDY_MV
1530   cpi->tpl_ready = 1;
1531 #if DUMP_TPL_STATS
1532   dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize);
1533 #endif  // DUMP_TPL_STATS
1534 #endif  // CONFIG_NON_GREEDY_MV
1535 
1536 #if CONFIG_RATE_CTRL
1537   if (cpi->oxcf.use_simple_encode_api) {
1538     accumulate_frame_tpl_stats(cpi);
1539   }
1540 #endif  // CONFIG_RATE_CTRL
1541 }
1542