• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <memory>
12 #include <vector>
13 #include "./ivfenc.h"
14 #include "vp9/common/vp9_entropymode.h"
15 #include "vp9/common/vp9_enums.h"
16 #include "vp9/common/vp9_onyxc_int.h"
17 #include "vp9/vp9_iface_common.h"
18 #include "vp9/encoder/vp9_encoder.h"
19 #include "vp9/encoder/vp9_firstpass.h"
20 #include "vp9/simple_encode.h"
21 #include "vp9/vp9_cx_iface.h"
22 
23 namespace vp9 {
24 
get_plane_height(vpx_img_fmt_t img_fmt,int frame_height,int plane)25 static int get_plane_height(vpx_img_fmt_t img_fmt, int frame_height,
26                             int plane) {
27   assert(plane < 3);
28   if (plane == 0) {
29     return frame_height;
30   }
31   switch (img_fmt) {
32     case VPX_IMG_FMT_I420:
33     case VPX_IMG_FMT_I440:
34     case VPX_IMG_FMT_YV12:
35     case VPX_IMG_FMT_I42016:
36     case VPX_IMG_FMT_I44016: return (frame_height + 1) >> 1;
37     default: return frame_height;
38   }
39 }
40 
get_plane_width(vpx_img_fmt_t img_fmt,int frame_width,int plane)41 static int get_plane_width(vpx_img_fmt_t img_fmt, int frame_width, int plane) {
42   assert(plane < 3);
43   if (plane == 0) {
44     return frame_width;
45   }
46   switch (img_fmt) {
47     case VPX_IMG_FMT_I420:
48     case VPX_IMG_FMT_YV12:
49     case VPX_IMG_FMT_I422:
50     case VPX_IMG_FMT_I42016:
51     case VPX_IMG_FMT_I42216: return (frame_width + 1) >> 1;
52     default: return frame_width;
53   }
54 }
55 
56 // TODO(angiebird): Merge this function with vpx_img_plane_width()
img_plane_width(const vpx_image_t * img,int plane)57 static int img_plane_width(const vpx_image_t *img, int plane) {
58   if (plane > 0 && img->x_chroma_shift > 0)
59     return (img->d_w + 1) >> img->x_chroma_shift;
60   else
61     return img->d_w;
62 }
63 
64 // TODO(angiebird): Merge this function with vpx_img_plane_height()
img_plane_height(const vpx_image_t * img,int plane)65 static int img_plane_height(const vpx_image_t *img, int plane) {
66   if (plane > 0 && img->y_chroma_shift > 0)
67     return (img->d_h + 1) >> img->y_chroma_shift;
68   else
69     return img->d_h;
70 }
71 
72 // TODO(angiebird): Merge this function with vpx_img_read()
img_read(vpx_image_t * img,FILE * file)73 static int img_read(vpx_image_t *img, FILE *file) {
74   int plane;
75 
76   for (plane = 0; plane < 3; ++plane) {
77     unsigned char *buf = img->planes[plane];
78     const int stride = img->stride[plane];
79     const int w = img_plane_width(img, plane) *
80                   ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
81     const int h = img_plane_height(img, plane);
82     int y;
83 
84     for (y = 0; y < h; ++y) {
85       if (fread(buf, 1, w, file) != (size_t)w) return 0;
86       buf += stride;
87     }
88   }
89 
90   return 1;
91 }
92 
93 // Assume every config in VP9EncoderConfig is less than 100 characters.
94 #define ENCODE_CONFIG_BUF_SIZE 100
95 struct EncodeConfig {
96   char name[ENCODE_CONFIG_BUF_SIZE];
97   char value[ENCODE_CONFIG_BUF_SIZE];
98 };
99 
100 class SimpleEncode::EncodeImpl {
101  public:
102   VP9_COMP *cpi;
103   vpx_img_fmt_t img_fmt;
104   vpx_image_t tmp_img;
105   std::vector<FIRSTPASS_STATS> first_pass_stats;
106   std::vector<EncodeConfig> encode_config_list;
107 };
108 
init_encoder(const VP9EncoderConfig * oxcf,vpx_img_fmt_t img_fmt)109 static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
110                               vpx_img_fmt_t img_fmt) {
111   VP9_COMP *cpi;
112   BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
113   if (!buffer_pool) return NULL;
114   vp9_initialize_enc();
115   cpi = vp9_create_compressor(oxcf, buffer_pool);
116   vp9_update_compressor_with_img_fmt(cpi, img_fmt);
117   return cpi;
118 }
119 
free_encoder(VP9_COMP * cpi)120 static void free_encoder(VP9_COMP *cpi) {
121   BufferPool *buffer_pool = cpi->common.buffer_pool;
122   vp9_remove_compressor(cpi);
123   // buffer_pool needs to be free after cpi because buffer_pool contains
124   // allocated buffers that will be free in vp9_remove_compressor()
125   vpx_free(buffer_pool);
126 }
127 
make_vpx_rational(int num,int den)128 static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
129   vpx_rational_t v;
130   v.num = num;
131   v.den = den;
132   return v;
133 }
134 
135 static INLINE FrameType
get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type)136 get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
137   switch (update_type) {
138     case KF_UPDATE: return kFrameTypeKey;
139     case ARF_UPDATE: return kFrameTypeAltRef;
140     case GF_UPDATE: return kFrameTypeGolden;
141     case OVERLAY_UPDATE: return kFrameTypeOverlay;
142     case LF_UPDATE: return kFrameTypeInter;
143     default:
144       fprintf(stderr, "Unsupported update_type %d\n", update_type);
145       abort();
146   }
147 }
148 
update_partition_info(const PARTITION_INFO * input_partition_info,const int num_rows_4x4,const int num_cols_4x4,PartitionInfo * output_partition_info)149 static void update_partition_info(const PARTITION_INFO *input_partition_info,
150                                   const int num_rows_4x4,
151                                   const int num_cols_4x4,
152                                   PartitionInfo *output_partition_info) {
153   const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
154   for (int i = 0; i < num_units_4x4; ++i) {
155     output_partition_info[i].row = input_partition_info[i].row;
156     output_partition_info[i].column = input_partition_info[i].column;
157     output_partition_info[i].row_start = input_partition_info[i].row_start;
158     output_partition_info[i].column_start =
159         input_partition_info[i].column_start;
160     output_partition_info[i].width = input_partition_info[i].width;
161     output_partition_info[i].height = input_partition_info[i].height;
162   }
163 }
164 
165 // translate MV_REFERENCE_FRAME to RefFrameType
mv_ref_frame_to_ref_frame_type(MV_REFERENCE_FRAME mv_ref_frame)166 static RefFrameType mv_ref_frame_to_ref_frame_type(
167     MV_REFERENCE_FRAME mv_ref_frame) {
168   switch (mv_ref_frame) {
169     case LAST_FRAME: return kRefFrameTypeLast;
170     case GOLDEN_FRAME: return kRefFrameTypePast;
171     case ALTREF_FRAME: return kRefFrameTypeFuture;
172     default: return kRefFrameTypeNone;
173   }
174 }
175 
update_motion_vector_info(const MOTION_VECTOR_INFO * input_motion_vector_info,const int num_rows_4x4,const int num_cols_4x4,MotionVectorInfo * output_motion_vector_info,int motion_vector_scale)176 static void update_motion_vector_info(
177     const MOTION_VECTOR_INFO *input_motion_vector_info, const int num_rows_4x4,
178     const int num_cols_4x4, MotionVectorInfo *output_motion_vector_info,
179     int motion_vector_scale) {
180   const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
181   for (int i = 0; i < num_units_4x4; ++i) {
182     const MV_REFERENCE_FRAME *in_ref_frame =
183         input_motion_vector_info[i].ref_frame;
184     output_motion_vector_info[i].mv_count =
185         (in_ref_frame[0] == INTRA_FRAME)
186             ? 0
187             : ((in_ref_frame[1] == NO_REF_FRAME) ? 1 : 2);
188     if (in_ref_frame[0] == NO_REF_FRAME) {
189       fprintf(stderr, "in_ref_frame[0] shouldn't be NO_REF_FRAME\n");
190       abort();
191     }
192     output_motion_vector_info[i].ref_frame[0] =
193         mv_ref_frame_to_ref_frame_type(in_ref_frame[0]);
194     output_motion_vector_info[i].ref_frame[1] =
195         mv_ref_frame_to_ref_frame_type(in_ref_frame[1]);
196     output_motion_vector_info[i].mv_row[0] =
197         (double)input_motion_vector_info[i].mv[0].as_mv.row /
198         motion_vector_scale;
199     output_motion_vector_info[i].mv_column[0] =
200         (double)input_motion_vector_info[i].mv[0].as_mv.col /
201         motion_vector_scale;
202     output_motion_vector_info[i].mv_row[1] =
203         (double)input_motion_vector_info[i].mv[1].as_mv.row /
204         motion_vector_scale;
205     output_motion_vector_info[i].mv_column[1] =
206         (double)input_motion_vector_info[i].mv[1].as_mv.col /
207         motion_vector_scale;
208   }
209 }
210 
update_tpl_stats_info(const TplDepStats * input_tpl_stats_info,const int show_frame_count,TplStatsInfo * output_tpl_stats_info)211 static void update_tpl_stats_info(const TplDepStats *input_tpl_stats_info,
212                                   const int show_frame_count,
213                                   TplStatsInfo *output_tpl_stats_info) {
214   int frame_idx;
215   for (frame_idx = 0; frame_idx < show_frame_count; ++frame_idx) {
216     output_tpl_stats_info[frame_idx].intra_cost =
217         input_tpl_stats_info[frame_idx].intra_cost;
218     output_tpl_stats_info[frame_idx].inter_cost =
219         input_tpl_stats_info[frame_idx].inter_cost;
220     output_tpl_stats_info[frame_idx].mc_flow =
221         input_tpl_stats_info[frame_idx].mc_flow;
222     output_tpl_stats_info[frame_idx].mc_dep_cost =
223         input_tpl_stats_info[frame_idx].mc_dep_cost;
224     output_tpl_stats_info[frame_idx].mc_ref_cost =
225         input_tpl_stats_info[frame_idx].mc_ref_cost;
226   }
227 }
228 
update_frame_counts(const FRAME_COUNTS * input_counts,FrameCounts * output_counts)229 static void update_frame_counts(const FRAME_COUNTS *input_counts,
230                                 FrameCounts *output_counts) {
231   // Init array sizes.
232   output_counts->y_mode.resize(BLOCK_SIZE_GROUPS);
233   for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
234     output_counts->y_mode[i].resize(INTRA_MODES);
235   }
236 
237   output_counts->uv_mode.resize(INTRA_MODES);
238   for (int i = 0; i < INTRA_MODES; ++i) {
239     output_counts->uv_mode[i].resize(INTRA_MODES);
240   }
241 
242   output_counts->partition.resize(PARTITION_CONTEXTS);
243   for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
244     output_counts->partition[i].resize(PARTITION_TYPES);
245   }
246 
247   output_counts->coef.resize(TX_SIZES);
248   output_counts->eob_branch.resize(TX_SIZES);
249   for (int i = 0; i < TX_SIZES; ++i) {
250     output_counts->coef[i].resize(PLANE_TYPES);
251     output_counts->eob_branch[i].resize(PLANE_TYPES);
252     for (int j = 0; j < PLANE_TYPES; ++j) {
253       output_counts->coef[i][j].resize(REF_TYPES);
254       output_counts->eob_branch[i][j].resize(REF_TYPES);
255       for (int k = 0; k < REF_TYPES; ++k) {
256         output_counts->coef[i][j][k].resize(COEF_BANDS);
257         output_counts->eob_branch[i][j][k].resize(COEF_BANDS);
258         for (int l = 0; l < COEF_BANDS; ++l) {
259           output_counts->coef[i][j][k][l].resize(COEFF_CONTEXTS);
260           output_counts->eob_branch[i][j][k][l].resize(COEFF_CONTEXTS);
261           for (int m = 0; m < COEFF_CONTEXTS; ++m) {
262             output_counts->coef[i][j][k][l][m].resize(UNCONSTRAINED_NODES + 1);
263           }
264         }
265       }
266     }
267   }
268 
269   output_counts->switchable_interp.resize(SWITCHABLE_FILTER_CONTEXTS);
270   for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
271     output_counts->switchable_interp[i].resize(SWITCHABLE_FILTERS);
272   }
273 
274   output_counts->inter_mode.resize(INTER_MODE_CONTEXTS);
275   for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
276     output_counts->inter_mode[i].resize(INTER_MODES);
277   }
278 
279   output_counts->intra_inter.resize(INTRA_INTER_CONTEXTS);
280   for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
281     output_counts->intra_inter[i].resize(2);
282   }
283 
284   output_counts->comp_inter.resize(COMP_INTER_CONTEXTS);
285   for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
286     output_counts->comp_inter[i].resize(2);
287   }
288 
289   output_counts->single_ref.resize(REF_CONTEXTS);
290   for (int i = 0; i < REF_CONTEXTS; ++i) {
291     output_counts->single_ref[i].resize(2);
292     for (int j = 0; j < 2; ++j) {
293       output_counts->single_ref[i][j].resize(2);
294     }
295   }
296 
297   output_counts->comp_ref.resize(REF_CONTEXTS);
298   for (int i = 0; i < REF_CONTEXTS; ++i) {
299     output_counts->comp_ref[i].resize(2);
300   }
301 
302   output_counts->skip.resize(SKIP_CONTEXTS);
303   for (int i = 0; i < SKIP_CONTEXTS; ++i) {
304     output_counts->skip[i].resize(2);
305   }
306 
307   output_counts->tx.p32x32.resize(TX_SIZE_CONTEXTS);
308   output_counts->tx.p16x16.resize(TX_SIZE_CONTEXTS);
309   output_counts->tx.p8x8.resize(TX_SIZE_CONTEXTS);
310   for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
311     output_counts->tx.p32x32[i].resize(TX_SIZES);
312     output_counts->tx.p16x16[i].resize(TX_SIZES - 1);
313     output_counts->tx.p8x8[i].resize(TX_SIZES - 2);
314   }
315   output_counts->tx.tx_totals.resize(TX_SIZES);
316 
317   output_counts->mv.joints.resize(MV_JOINTS);
318   output_counts->mv.comps.resize(2);
319   for (int i = 0; i < 2; ++i) {
320     output_counts->mv.comps[i].sign.resize(2);
321     output_counts->mv.comps[i].classes.resize(MV_CLASSES);
322     output_counts->mv.comps[i].class0.resize(CLASS0_SIZE);
323     output_counts->mv.comps[i].bits.resize(MV_OFFSET_BITS);
324     for (int j = 0; j < MV_OFFSET_BITS; ++j) {
325       output_counts->mv.comps[i].bits[j].resize(2);
326     }
327     output_counts->mv.comps[i].class0_fp.resize(CLASS0_SIZE);
328     for (int j = 0; j < CLASS0_SIZE; ++j) {
329       output_counts->mv.comps[i].class0_fp[j].resize(MV_FP_SIZE);
330     }
331     output_counts->mv.comps[i].fp.resize(MV_FP_SIZE);
332     output_counts->mv.comps[i].class0_hp.resize(2);
333     output_counts->mv.comps[i].hp.resize(2);
334   }
335 
336   // Populate counts.
337   for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
338     for (int j = 0; j < INTRA_MODES; ++j) {
339       output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
340     }
341   }
342   for (int i = 0; i < INTRA_MODES; ++i) {
343     for (int j = 0; j < INTRA_MODES; ++j) {
344       output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
345     }
346   }
347   for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
348     for (int j = 0; j < PARTITION_TYPES; ++j) {
349       output_counts->partition[i][j] = input_counts->partition[i][j];
350     }
351   }
352   for (int i = 0; i < TX_SIZES; ++i) {
353     for (int j = 0; j < PLANE_TYPES; ++j) {
354       for (int k = 0; k < REF_TYPES; ++k) {
355         for (int l = 0; l < COEF_BANDS; ++l) {
356           for (int m = 0; m < COEFF_CONTEXTS; ++m) {
357             output_counts->eob_branch[i][j][k][l][m] =
358                 input_counts->eob_branch[i][j][k][l][m];
359             for (int n = 0; n < UNCONSTRAINED_NODES + 1; n++) {
360               output_counts->coef[i][j][k][l][m][n] =
361                   input_counts->coef[i][j][k][l][m][n];
362             }
363           }
364         }
365       }
366     }
367   }
368   for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
369     for (int j = 0; j < SWITCHABLE_FILTERS; ++j) {
370       output_counts->switchable_interp[i][j] =
371           input_counts->switchable_interp[i][j];
372     }
373   }
374   for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
375     for (int j = 0; j < INTER_MODES; ++j) {
376       output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
377     }
378   }
379   for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
380     for (int j = 0; j < 2; ++j) {
381       output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
382     }
383   }
384   for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
385     for (int j = 0; j < 2; ++j) {
386       output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
387     }
388   }
389   for (int i = 0; i < REF_CONTEXTS; ++i) {
390     for (int j = 0; j < 2; ++j) {
391       for (int k = 0; k < 2; ++k) {
392         output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
393       }
394     }
395   }
396   for (int i = 0; i < REF_CONTEXTS; ++i) {
397     for (int j = 0; j < 2; ++j) {
398       output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
399     }
400   }
401   for (int i = 0; i < SKIP_CONTEXTS; ++i) {
402     for (int j = 0; j < 2; ++j) {
403       output_counts->skip[i][j] = input_counts->skip[i][j];
404     }
405   }
406   for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
407     for (int j = 0; j < TX_SIZES; j++) {
408       output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
409     }
410     for (int j = 0; j < TX_SIZES - 1; j++) {
411       output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
412     }
413     for (int j = 0; j < TX_SIZES - 2; j++) {
414       output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
415     }
416   }
417   for (int i = 0; i < TX_SIZES; i++) {
418     output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
419   }
420   for (int i = 0; i < MV_JOINTS; i++) {
421     output_counts->mv.joints[i] = input_counts->mv.joints[i];
422   }
423   for (int k = 0; k < 2; k++) {
424     const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
425     for (int i = 0; i < 2; i++) {
426       output_counts->mv.comps[k].sign[i] = comps_t->sign[i];
427       output_counts->mv.comps[k].class0_hp[i] = comps_t->class0_hp[i];
428       output_counts->mv.comps[k].hp[i] = comps_t->hp[i];
429     }
430     for (int i = 0; i < MV_CLASSES; i++) {
431       output_counts->mv.comps[k].classes[i] = comps_t->classes[i];
432     }
433     for (int i = 0; i < CLASS0_SIZE; i++) {
434       output_counts->mv.comps[k].class0[i] = comps_t->class0[i];
435       for (int j = 0; j < MV_FP_SIZE; j++) {
436         output_counts->mv.comps[k].class0_fp[i][j] = comps_t->class0_fp[i][j];
437       }
438     }
439     for (int i = 0; i < MV_OFFSET_BITS; i++) {
440       for (int j = 0; j < 2; j++) {
441         output_counts->mv.comps[k].bits[i][j] = comps_t->bits[i][j];
442       }
443     }
444     for (int i = 0; i < MV_FP_SIZE; i++) {
445       output_counts->mv.comps[k].fp[i] = comps_t->fp[i];
446     }
447   }
448 }
449 
output_image_buffer(const ImageBuffer & image_buffer,std::FILE * out_file)450 void output_image_buffer(const ImageBuffer &image_buffer, std::FILE *out_file) {
451   for (int plane = 0; plane < 3; ++plane) {
452     const int w = image_buffer.plane_width[plane];
453     const int h = image_buffer.plane_height[plane];
454     const uint8_t *buf = image_buffer.plane_buffer[plane].get();
455     fprintf(out_file, "%d %d\n", h, w);
456     for (int i = 0; i < w * h; ++i) {
457       fprintf(out_file, "%d ", (int)buf[i]);
458     }
459     fprintf(out_file, "\n");
460   }
461 }
462 
init_image_buffer(ImageBuffer * image_buffer,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)463 static bool init_image_buffer(ImageBuffer *image_buffer, int frame_width,
464                               int frame_height, vpx_img_fmt_t img_fmt) {
465   for (int plane = 0; plane < 3; ++plane) {
466     const int w = get_plane_width(img_fmt, frame_width, plane);
467     const int h = get_plane_height(img_fmt, frame_height, plane);
468     image_buffer->plane_width[plane] = w;
469     image_buffer->plane_height[plane] = h;
470     image_buffer->plane_buffer[plane].reset(new (std::nothrow) uint8_t[w * h]);
471     if (image_buffer->plane_buffer[plane].get() == nullptr) {
472       return false;
473     }
474   }
475   return true;
476 }
477 
ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer & image_buffer,IMAGE_BUFFER * image_buffer_c)478 static void ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer &image_buffer,
479                                         IMAGE_BUFFER *image_buffer_c) {
480   image_buffer_c->allocated = 1;
481   for (int plane = 0; plane < 3; ++plane) {
482     image_buffer_c->plane_width[plane] = image_buffer.plane_width[plane];
483     image_buffer_c->plane_height[plane] = image_buffer.plane_height[plane];
484     image_buffer_c->plane_buffer[plane] =
485         image_buffer.plane_buffer[plane].get();
486   }
487 }
488 
get_max_coding_data_byte_size(int frame_width,int frame_height)489 static size_t get_max_coding_data_byte_size(int frame_width, int frame_height) {
490   return frame_width * frame_height * 3;
491 }
492 
init_encode_frame_result(EncodeFrameResult * encode_frame_result,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)493 static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
494                                      int frame_width, int frame_height,
495                                      vpx_img_fmt_t img_fmt) {
496   const size_t max_coding_data_byte_size =
497       get_max_coding_data_byte_size(frame_width, frame_height);
498 
499   encode_frame_result->coding_data.reset(
500       new (std::nothrow) uint8_t[max_coding_data_byte_size]);
501   encode_frame_result->max_coding_data_byte_size = max_coding_data_byte_size;
502 
503   encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
504   encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
505   encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
506                                              encode_frame_result->num_cols_4x4);
507   encode_frame_result->motion_vector_info.resize(
508       encode_frame_result->num_rows_4x4 * encode_frame_result->num_cols_4x4);
509   encode_frame_result->tpl_stats_info.resize(MAX_LAG_BUFFERS);
510 
511   if (encode_frame_result->coding_data.get() == nullptr) {
512     encode_frame_result->max_coding_data_byte_size = 0;
513     return false;
514   }
515   return init_image_buffer(&encode_frame_result->coded_frame, frame_width,
516                            frame_height, img_fmt);
517 }
518 
encode_frame_result_update_rq_history(const RATE_QINDEX_HISTORY * rq_history,EncodeFrameResult * encode_frame_result)519 static void encode_frame_result_update_rq_history(
520     const RATE_QINDEX_HISTORY *rq_history,
521     EncodeFrameResult *encode_frame_result) {
522   encode_frame_result->recode_count = rq_history->recode_count;
523   for (int i = 0; i < encode_frame_result->recode_count; ++i) {
524     const int q_index = rq_history->q_index_history[i];
525     const int rate = rq_history->rate_history[i];
526     encode_frame_result->q_index_history.push_back(q_index);
527     encode_frame_result->rate_history.push_back(rate);
528   }
529 }
530 
update_encode_frame_result(EncodeFrameResult * encode_frame_result,const int show_frame_count,const ENCODE_FRAME_RESULT * encode_frame_info)531 static void update_encode_frame_result(
532     EncodeFrameResult *encode_frame_result, const int show_frame_count,
533     const ENCODE_FRAME_RESULT *encode_frame_info) {
534   encode_frame_result->coding_data_bit_size =
535       encode_frame_result->coding_data_byte_size * 8;
536   encode_frame_result->show_idx = encode_frame_info->show_idx;
537   encode_frame_result->coding_idx = encode_frame_info->frame_coding_index;
538   assert(kRefFrameTypeMax == MAX_INTER_REF_FRAMES);
539   for (int i = 0; i < kRefFrameTypeMax; ++i) {
540     encode_frame_result->ref_frame_info.coding_indexes[i] =
541         encode_frame_info->ref_frame_coding_indexes[i];
542     encode_frame_result->ref_frame_info.valid_list[i] =
543         encode_frame_info->ref_frame_valid_list[i];
544   }
545   encode_frame_result->frame_type =
546       get_frame_type_from_update_type(encode_frame_info->update_type);
547   encode_frame_result->psnr = encode_frame_info->psnr;
548   encode_frame_result->sse = encode_frame_info->sse;
549   encode_frame_result->quantize_index = encode_frame_info->quantize_index;
550   update_partition_info(encode_frame_info->partition_info,
551                         encode_frame_result->num_rows_4x4,
552                         encode_frame_result->num_cols_4x4,
553                         &encode_frame_result->partition_info[0]);
554   update_motion_vector_info(encode_frame_info->motion_vector_info,
555                             encode_frame_result->num_rows_4x4,
556                             encode_frame_result->num_cols_4x4,
557                             &encode_frame_result->motion_vector_info[0],
558                             kMotionVectorSubPixelPrecision);
559   update_frame_counts(&encode_frame_info->frame_counts,
560                       &encode_frame_result->frame_counts);
561   if (encode_frame_result->frame_type == kFrameTypeAltRef) {
562     update_tpl_stats_info(encode_frame_info->tpl_stats_info, show_frame_count,
563                           &encode_frame_result->tpl_stats_info[0]);
564   }
565   encode_frame_result_update_rq_history(&encode_frame_info->rq_history,
566                                         encode_frame_result);
567 }
568 
IncreaseGroupOfPictureIndex(GroupOfPicture * group_of_picture)569 static void IncreaseGroupOfPictureIndex(GroupOfPicture *group_of_picture) {
570   ++group_of_picture->next_encode_frame_index;
571 }
572 
IsGroupOfPictureFinished(const GroupOfPicture & group_of_picture)573 static int IsGroupOfPictureFinished(const GroupOfPicture &group_of_picture) {
574   return static_cast<size_t>(group_of_picture.next_encode_frame_index) ==
575          group_of_picture.encode_frame_list.size();
576 }
577 
operator ==(const RefFrameInfo & a,const RefFrameInfo & b)578 bool operator==(const RefFrameInfo &a, const RefFrameInfo &b) {
579   bool match = true;
580   for (int i = 0; i < kRefFrameTypeMax; ++i) {
581     match &= a.coding_indexes[i] == b.coding_indexes[i];
582     match &= a.valid_list[i] == b.valid_list[i];
583   }
584   return match;
585 }
586 
InitRefFrameInfo(RefFrameInfo * ref_frame_info)587 static void InitRefFrameInfo(RefFrameInfo *ref_frame_info) {
588   for (int i = 0; i < kRefFrameTypeMax; ++i) {
589     ref_frame_info->coding_indexes[i] = -1;
590     ref_frame_info->valid_list[i] = 0;
591   }
592 }
593 
594 // After finishing coding a frame, this function will update the coded frame
595 // into the ref_frame_info based on the frame_type and the coding_index.
PostUpdateRefFrameInfo(FrameType frame_type,int frame_coding_index,RefFrameInfo * ref_frame_info)596 static void PostUpdateRefFrameInfo(FrameType frame_type, int frame_coding_index,
597                                    RefFrameInfo *ref_frame_info) {
598   // This part is written based on the logics in vp9_configure_buffer_updates()
599   // and update_ref_frames()
600   int *ref_frame_coding_indexes = ref_frame_info->coding_indexes;
601   switch (frame_type) {
602     case kFrameTypeKey:
603       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
604       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
605       ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
606       break;
607     case kFrameTypeInter:
608       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
609       break;
610     case kFrameTypeAltRef:
611       ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
612       break;
613     case kFrameTypeOverlay:
614       // Reserve the past coding_index in the future slot. This logic is from
615       // update_ref_frames() with condition vp9_preserve_existing_gf() == 1
616       // TODO(angiebird): Invetegate why we need this.
617       ref_frame_coding_indexes[kRefFrameTypeFuture] =
618           ref_frame_coding_indexes[kRefFrameTypePast];
619       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
620       break;
621     case kFrameTypeGolden:
622       ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
623       ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
624       break;
625   }
626 
627   //  This part is written based on the logics in get_ref_frame_flags() but we
628   //  rename the flags alt, golden to future, past respectively. Mark
629   //  non-duplicated reference frames as valid. The priorities are
630   //  kRefFrameTypeLast > kRefFrameTypePast > kRefFrameTypeFuture.
631   const int last_index = ref_frame_coding_indexes[kRefFrameTypeLast];
632   const int past_index = ref_frame_coding_indexes[kRefFrameTypePast];
633   const int future_index = ref_frame_coding_indexes[kRefFrameTypeFuture];
634 
635   int *ref_frame_valid_list = ref_frame_info->valid_list;
636   for (int ref_frame_idx = 0; ref_frame_idx < kRefFrameTypeMax;
637        ++ref_frame_idx) {
638     ref_frame_valid_list[ref_frame_idx] = 1;
639   }
640 
641   if (past_index == last_index) {
642     ref_frame_valid_list[kRefFrameTypePast] = 0;
643   }
644 
645   if (future_index == last_index) {
646     ref_frame_valid_list[kRefFrameTypeFuture] = 0;
647   }
648 
649   if (future_index == past_index) {
650     ref_frame_valid_list[kRefFrameTypeFuture] = 0;
651   }
652 }
653 
SetGroupOfPicture(int first_is_key_frame,int use_alt_ref,int coding_frame_count,int first_show_idx,int last_gop_use_alt_ref,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)654 static void SetGroupOfPicture(int first_is_key_frame, int use_alt_ref,
655                               int coding_frame_count, int first_show_idx,
656                               int last_gop_use_alt_ref, int start_coding_index,
657                               const RefFrameInfo &start_ref_frame_info,
658                               GroupOfPicture *group_of_picture) {
659   // Clean up the state of previous group of picture.
660   group_of_picture->encode_frame_list.clear();
661   group_of_picture->next_encode_frame_index = 0;
662   group_of_picture->show_frame_count = coding_frame_count - use_alt_ref;
663   group_of_picture->start_show_index = first_show_idx;
664   group_of_picture->start_coding_index = start_coding_index;
665   group_of_picture->first_is_key_frame = first_is_key_frame;
666   group_of_picture->use_alt_ref = use_alt_ref;
667   group_of_picture->last_gop_use_alt_ref = last_gop_use_alt_ref;
668 
669   // We need to make a copy of start reference frame info because we
670   // use it to simulate the ref frame update.
671   RefFrameInfo ref_frame_info = start_ref_frame_info;
672 
673   {
674     // First frame in the group of pictures. It's either key frame or show inter
675     // frame.
676     EncodeFrameInfo encode_frame_info;
677     // Set frame_type
678     if (first_is_key_frame) {
679       encode_frame_info.frame_type = kFrameTypeKey;
680     } else {
681       if (last_gop_use_alt_ref) {
682         encode_frame_info.frame_type = kFrameTypeOverlay;
683       } else {
684         encode_frame_info.frame_type = kFrameTypeGolden;
685       }
686     }
687 
688     encode_frame_info.show_idx = first_show_idx;
689     encode_frame_info.coding_index = start_coding_index;
690 
691     encode_frame_info.ref_frame_info = ref_frame_info;
692     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
693                            encode_frame_info.coding_index, &ref_frame_info);
694 
695     group_of_picture->encode_frame_list.push_back(encode_frame_info);
696   }
697 
698   const int show_frame_count = coding_frame_count - use_alt_ref;
699   if (use_alt_ref) {
700     // If there is alternate reference, it is always coded at the second place.
701     // Its show index (or timestamp) is at the last of this group
702     EncodeFrameInfo encode_frame_info;
703     encode_frame_info.frame_type = kFrameTypeAltRef;
704     encode_frame_info.show_idx = first_show_idx + show_frame_count;
705     encode_frame_info.coding_index = start_coding_index + 1;
706 
707     encode_frame_info.ref_frame_info = ref_frame_info;
708     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
709                            encode_frame_info.coding_index, &ref_frame_info);
710 
711     group_of_picture->encode_frame_list.push_back(encode_frame_info);
712   }
713 
714   // Encode the rest show inter frames.
715   for (int i = 1; i < show_frame_count; ++i) {
716     EncodeFrameInfo encode_frame_info;
717     encode_frame_info.frame_type = kFrameTypeInter;
718     encode_frame_info.show_idx = first_show_idx + i;
719     encode_frame_info.coding_index = start_coding_index + use_alt_ref + i;
720 
721     encode_frame_info.ref_frame_info = ref_frame_info;
722     PostUpdateRefFrameInfo(encode_frame_info.frame_type,
723                            encode_frame_info.coding_index, &ref_frame_info);
724 
725     group_of_picture->encode_frame_list.push_back(encode_frame_info);
726   }
727 }
728 
729 // Gets group of picture information from VP9's decision, and update
730 // |group_of_picture| accordingly.
731 // This is called at the starting of encoding of each group of picture.
UpdateGroupOfPicture(const VP9_COMP * cpi,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)732 static void UpdateGroupOfPicture(const VP9_COMP *cpi, int start_coding_index,
733                                  const RefFrameInfo &start_ref_frame_info,
734                                  GroupOfPicture *group_of_picture) {
735   int first_is_key_frame;
736   int use_alt_ref;
737   int coding_frame_count;
738   int first_show_idx;
739   int last_gop_use_alt_ref;
740   vp9_get_next_group_of_picture(cpi, &first_is_key_frame, &use_alt_ref,
741                                 &coding_frame_count, &first_show_idx,
742                                 &last_gop_use_alt_ref);
743   SetGroupOfPicture(first_is_key_frame, use_alt_ref, coding_frame_count,
744                     first_show_idx, last_gop_use_alt_ref, start_coding_index,
745                     start_ref_frame_info, group_of_picture);
746 }
747 
748 #define SET_STRUCT_VALUE(config, structure, ret, field) \
749   do {                                                  \
750     if (strcmp(config.name, #field) == 0) {             \
751       structure->field = atoi(config.value);            \
752       ret = 1;                                          \
753     }                                                   \
754   } while (false)
755 
UpdateEncodeConfig(const EncodeConfig & config,VP9EncoderConfig * oxcf)756 static void UpdateEncodeConfig(const EncodeConfig &config,
757                                VP9EncoderConfig *oxcf) {
758   int ret = 0;
759   SET_STRUCT_VALUE(config, oxcf, ret, key_freq);
760   SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmin_section);
761   SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmax_section);
762   SET_STRUCT_VALUE(config, oxcf, ret, under_shoot_pct);
763   SET_STRUCT_VALUE(config, oxcf, ret, over_shoot_pct);
764   SET_STRUCT_VALUE(config, oxcf, ret, max_threads);
765   SET_STRUCT_VALUE(config, oxcf, ret, frame_parallel_decoding_mode);
766   SET_STRUCT_VALUE(config, oxcf, ret, tile_columns);
767   SET_STRUCT_VALUE(config, oxcf, ret, arnr_max_frames);
768   SET_STRUCT_VALUE(config, oxcf, ret, arnr_strength);
769   SET_STRUCT_VALUE(config, oxcf, ret, lag_in_frames);
770   SET_STRUCT_VALUE(config, oxcf, ret, encode_breakout);
771   SET_STRUCT_VALUE(config, oxcf, ret, enable_tpl_model);
772   SET_STRUCT_VALUE(config, oxcf, ret, enable_auto_arf);
773   if (strcmp(config.name, "rc_mode") == 0) {
774     int rc_mode = atoi(config.value);
775     if (rc_mode >= VPX_VBR && rc_mode <= VPX_Q) {
776       oxcf->rc_mode = (enum vpx_rc_mode)rc_mode;
777       ret = 1;
778     } else {
779       fprintf(stderr, "Invalid rc_mode value: %d\n", rc_mode);
780     }
781   }
782   SET_STRUCT_VALUE(config, oxcf, ret, cq_level);
783   if (ret == 0) {
784     fprintf(stderr, "Ignored unsupported encode_config %s\n", config.name);
785   }
786 }
787 
GetEncodeConfig(int frame_width,int frame_height,vpx_rational_t frame_rate,int target_bitrate,int encode_speed,int target_level,vpx_enc_pass enc_pass,const std::vector<EncodeConfig> & encode_config_list)788 static VP9EncoderConfig GetEncodeConfig(
789     int frame_width, int frame_height, vpx_rational_t frame_rate,
790     int target_bitrate, int encode_speed, int target_level,
791     vpx_enc_pass enc_pass,
792     const std::vector<EncodeConfig> &encode_config_list) {
793   VP9EncoderConfig oxcf = vp9_get_encoder_config(
794       frame_width, frame_height, frame_rate, target_bitrate, encode_speed,
795       target_level, enc_pass);
796   for (const auto &config : encode_config_list) {
797     UpdateEncodeConfig(config, &oxcf);
798   }
799   if (enc_pass == VPX_RC_FIRST_PASS) {
800     oxcf.lag_in_frames = 0;
801   }
802   oxcf.use_simple_encode_api = 1;
803   return oxcf;
804 }
805 
SimpleEncode(int frame_width,int frame_height,int frame_rate_num,int frame_rate_den,int target_bitrate,int num_frames,int target_level,const char * infile_path,const char * outfile_path)806 SimpleEncode::SimpleEncode(int frame_width, int frame_height,
807                            int frame_rate_num, int frame_rate_den,
808                            int target_bitrate, int num_frames, int target_level,
809                            const char *infile_path, const char *outfile_path) {
810   impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
811   frame_width_ = frame_width;
812   frame_height_ = frame_height;
813   frame_rate_num_ = frame_rate_num;
814   frame_rate_den_ = frame_rate_den;
815   target_bitrate_ = target_bitrate;
816   num_frames_ = num_frames;
817   encode_speed_ = 0;
818   target_level_ = target_level;
819 
820   frame_coding_index_ = 0;
821   show_frame_count_ = 0;
822 
823   key_frame_group_index_ = 0;
824   key_frame_group_size_ = 0;
825 
826   // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
827   assert(infile_path != nullptr);
828   in_file_ = fopen(infile_path, "r");
829   if (outfile_path != nullptr) {
830     out_file_ = fopen(outfile_path, "w");
831   } else {
832     out_file_ = nullptr;
833   }
834   impl_ptr_->cpi = nullptr;
835   impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
836 
837   InitRefFrameInfo(&ref_frame_info_);
838 }
839 
SetEncodeSpeed(int encode_speed)840 void SimpleEncode::SetEncodeSpeed(int encode_speed) {
841   encode_speed_ = encode_speed;
842 }
843 
SetEncodeConfig(const char * name,const char * value)844 StatusCode SimpleEncode::SetEncodeConfig(const char *name, const char *value) {
845   if (name == nullptr || value == nullptr) {
846     fprintf(stderr, "SetEncodeConfig: null pointer, name %p value %p\n", name,
847             value);
848     return StatusError;
849   }
850   EncodeConfig config;
851   snprintf(config.name, ENCODE_CONFIG_BUF_SIZE, "%s", name);
852   snprintf(config.value, ENCODE_CONFIG_BUF_SIZE, "%s", value);
853   impl_ptr_->encode_config_list.push_back(config);
854   return StatusOk;
855 }
856 
DumpEncodeConfigs(int pass,FILE * fp)857 StatusCode SimpleEncode::DumpEncodeConfigs(int pass, FILE *fp) {
858   if (fp == nullptr) {
859     fprintf(stderr, "DumpEncodeConfigs: null pointer, fp %p\n", fp);
860     return StatusError;
861   }
862   vpx_enc_pass enc_pass;
863   if (pass == 1) {
864     enc_pass = VPX_RC_FIRST_PASS;
865   } else {
866     enc_pass = VPX_RC_LAST_PASS;
867   }
868   const vpx_rational_t frame_rate =
869       make_vpx_rational(frame_rate_num_, frame_rate_den_);
870   const VP9EncoderConfig oxcf = GetEncodeConfig(
871       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
872       target_level_, enc_pass, impl_ptr_->encode_config_list);
873   vp9_dump_encoder_config(&oxcf, fp);
874   return StatusOk;
875 }
876 
ComputeFirstPassStats()877 void SimpleEncode::ComputeFirstPassStats() {
878   vpx_rational_t frame_rate =
879       make_vpx_rational(frame_rate_num_, frame_rate_den_);
880   const VP9EncoderConfig oxcf = GetEncodeConfig(
881       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
882       target_level_, VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);
883   impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
884   struct lookahead_ctx *lookahead = impl_ptr_->cpi->lookahead;
885   int i;
886   int use_highbitdepth = 0;
887   const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
888   const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
889 #if CONFIG_VP9_HIGHBITDEPTH
890   use_highbitdepth = impl_ptr_->cpi->common.use_highbitdepth;
891 #endif
892   vpx_image_t img;
893   vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
894   rewind(in_file_);
895   impl_ptr_->first_pass_stats.clear();
896   for (i = 0; i < num_frames_; ++i) {
897     assert(!vp9_lookahead_full(lookahead));
898     if (img_read(&img, in_file_)) {
899       int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
900       int64_t ts_start =
901           timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
902       int64_t ts_end =
903           timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
904       YV12_BUFFER_CONFIG sd;
905       image2yuvconfig(&img, &sd);
906       vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
907       {
908         int64_t time_stamp;
909         int64_t time_end;
910         int flush = 1;  // Makes vp9_get_compressed_data process a frame
911         size_t size;
912         unsigned int frame_flags = 0;
913         ENCODE_FRAME_RESULT encode_frame_info;
914         vp9_init_encode_frame_result(&encode_frame_info);
915         // TODO(angiebird): Call vp9_first_pass directly
916         vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr, 0,
917                                 &time_stamp, &time_end, flush,
918                                 &encode_frame_info);
919         // vp9_get_compressed_data only generates first pass stats not
920         // compresses data
921         assert(size == 0);
922         // Get vp9 first pass motion vector info.
923         std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
924         update_motion_vector_info(
925             impl_ptr_->cpi->fp_motion_vector_info, num_rows_16x16,
926             num_cols_16x16, mv_info.data(), kMotionVectorFullPixelPrecision);
927         fp_motion_vector_info_.push_back(mv_info);
928       }
929       impl_ptr_->first_pass_stats.push_back(
930           vp9_get_frame_stats(&impl_ptr_->cpi->twopass));
931     }
932   }
933   // TODO(angiebird): Store the total_stats apart form first_pass_stats
934   impl_ptr_->first_pass_stats.push_back(
935       vp9_get_total_stats(&impl_ptr_->cpi->twopass));
936   vp9_end_first_pass(impl_ptr_->cpi);
937 
938   // Generate key_frame_map based on impl_ptr_->first_pass_stats.
939   key_frame_map_ = ComputeKeyFrameMap();
940 
941   free_encoder(impl_ptr_->cpi);
942   impl_ptr_->cpi = nullptr;
943   rewind(in_file_);
944   vpx_img_free(&img);
945 }
946 
ObserveFirstPassStats()947 std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
948   std::vector<std::vector<double>> output_stats;
949   // TODO(angiebird): This function make several assumptions of
950   // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
951   // last one. 2) The last entry of first_pass_stats is the total_stats.
952   // Change the code structure, so that we don't have to make these assumptions
953 
954   // Note the last entry of first_pass_stats is the total_stats, we don't need
955   // it.
956   for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
957     double *buf_start =
958         reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
959     // We use - 1 here because the last member in FIRSTPASS_STATS is not double
960     double *buf_end =
961         buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
962         1;
963     std::vector<double> this_stats(buf_start, buf_end);
964     output_stats.push_back(this_stats);
965   }
966   return output_stats;
967 }
968 
969 std::vector<std::vector<MotionVectorInfo>>
ObserveFirstPassMotionVectors()970 SimpleEncode::ObserveFirstPassMotionVectors() {
971   return fp_motion_vector_info_;
972 }
973 
SetExternalGroupOfPicturesMap(int * gop_map,int gop_map_size)974 void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
975                                                  int gop_map_size) {
976   for (int i = 0; i < gop_map_size; ++i) {
977     gop_map_.push_back(gop_map[i]);
978   }
979   // The following will check and modify gop_map_ to make sure the
980   // gop_map_ satisfies the constraints.
981   // 1) Each key frame position should be at the start of a gop.
982   // 2) The last gop should not use an alt ref.
983   assert(gop_map_.size() == key_frame_map_.size());
984   int last_gop_start = 0;
985   for (int i = 0; static_cast<size_t>(i) < gop_map_.size(); ++i) {
986     if (key_frame_map_[i] == 1 && gop_map_[i] == 0) {
987       fprintf(stderr, "Add an extra gop start at show_idx %d\n", i);
988       // Insert a gop start at key frame location.
989       gop_map_[i] |= kGopMapFlagStart;
990       gop_map_[i] |= kGopMapFlagUseAltRef;
991     }
992     if (gop_map_[i] & kGopMapFlagStart) {
993       last_gop_start = i;
994     }
995   }
996   if (gop_map_[last_gop_start] & kGopMapFlagUseAltRef) {
997     fprintf(stderr,
998             "Last group of pictures starting at show_idx %d shouldn't use alt "
999             "ref\n",
1000             last_gop_start);
1001     gop_map_[last_gop_start] &= ~kGopMapFlagUseAltRef;
1002   }
1003 }
1004 
ObserveExternalGroupOfPicturesMap()1005 std::vector<int> SimpleEncode::ObserveExternalGroupOfPicturesMap() {
1006   return gop_map_;
1007 }
1008 
1009 template <typename T>
GetVectorData(const std::vector<T> & v)1010 T *GetVectorData(const std::vector<T> &v) {
1011   if (v.empty()) {
1012     return nullptr;
1013   }
1014   return const_cast<T *>(v.data());
1015 }
1016 
GetGopCommand(const std::vector<int> & gop_map,int start_show_index)1017 static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,
1018                                  int start_show_index) {
1019   GOP_COMMAND gop_command;
1020   if (static_cast<size_t>(start_show_index) < gop_map.size()) {
1021     assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);
1022     int end_show_index = start_show_index + 1;
1023     // gop_map[end_show_index] & kGopMapFlagStart == 0 means this is
1024     // the start of a gop.
1025     while (static_cast<size_t>(end_show_index) < gop_map.size() &&
1026            (gop_map[end_show_index] & kGopMapFlagStart) == 0) {
1027       ++end_show_index;
1028     }
1029     const int show_frame_count = end_show_index - start_show_index;
1030     int use_alt_ref = (gop_map[start_show_index] & kGopMapFlagUseAltRef) != 0;
1031     if (static_cast<size_t>(end_show_index) == gop_map.size()) {
1032       // This is the last gop group, there must be no altref.
1033       use_alt_ref = 0;
1034     }
1035     gop_command_on(&gop_command, show_frame_count, use_alt_ref);
1036   } else {
1037     gop_command_off(&gop_command);
1038   }
1039   return gop_command;
1040 }
1041 
StartEncode()1042 void SimpleEncode::StartEncode() {
1043   assert(impl_ptr_->first_pass_stats.size() > 0);
1044   vpx_rational_t frame_rate =
1045       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1046   VP9EncoderConfig oxcf = GetEncodeConfig(
1047       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1048       target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1049 
1050   vpx_fixed_buf_t stats;
1051   stats.buf = GetVectorData(impl_ptr_->first_pass_stats);
1052   stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
1053              impl_ptr_->first_pass_stats.size();
1054 
1055   vp9_set_first_pass_stats(&oxcf, &stats);
1056   assert(impl_ptr_->cpi == nullptr);
1057   impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
1058   vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
1059                 frame_height_, 1);
1060 
1061   frame_coding_index_ = 0;
1062   show_frame_count_ = 0;
1063 
1064   assert(impl_ptr_->cpi != nullptr);
1065   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1066   unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1067   vp9_init_vizier_params(&impl_ptr_->cpi->twopass, screen_area);
1068 
1069   UpdateKeyFrameGroup(show_frame_count_);
1070 
1071   const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1072   encode_command_set_gop_command(&impl_ptr_->cpi->encode_command, gop_command);
1073   UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1074                        &group_of_picture_);
1075   rewind(in_file_);
1076 
1077   if (out_file_ != nullptr) {
1078     const char *fourcc = "VP90";
1079     // In SimpleEncode, we use time_base = 1 / TICKS_PER_SEC.
1080     // Based on that, the ivf_timestamp for each image is set to
1081     // show_idx * TICKS_PER_SEC / frame_rate
1082     // such that each image's actual timestamp in seconds can be computed as
1083     // ivf_timestamp * time_base == show_idx / frame_rate
1084     // TODO(angiebird): 1) Add unit test for ivf timestamp.
1085     // 2) Simplify the frame_rate setting process.
1086     vpx_rational_t time_base = make_vpx_rational(1, TICKS_PER_SEC);
1087     ivf_write_file_header_with_video_info(out_file_, *(const uint32_t *)fourcc,
1088                                           num_frames_, frame_width_,
1089                                           frame_height_, time_base);
1090   }
1091 }
1092 
EndEncode()1093 void SimpleEncode::EndEncode() {
1094   free_encoder(impl_ptr_->cpi);
1095   impl_ptr_->cpi = nullptr;
1096   vpx_img_free(&impl_ptr_->tmp_img);
1097   rewind(in_file_);
1098 }
1099 
UpdateKeyFrameGroup(int key_frame_show_index)1100 void SimpleEncode::UpdateKeyFrameGroup(int key_frame_show_index) {
1101   const VP9_COMP *cpi = impl_ptr_->cpi;
1102   key_frame_group_index_ = 0;
1103   key_frame_group_size_ = vp9_get_frames_to_next_key(
1104       &cpi->oxcf, &cpi->twopass, key_frame_show_index, cpi->rc.min_gf_interval);
1105   assert(key_frame_group_size_ > 0);
1106   // Init the reference frame info when a new key frame group appears.
1107   InitRefFrameInfo(&ref_frame_info_);
1108 }
1109 
PostUpdateKeyFrameGroupIndex(FrameType frame_type)1110 void SimpleEncode::PostUpdateKeyFrameGroupIndex(FrameType frame_type) {
1111   if (frame_type != kFrameTypeAltRef) {
1112     // key_frame_group_index_ only counts show frames
1113     ++key_frame_group_index_;
1114   }
1115 }
1116 
GetKeyFrameGroupSize() const1117 int SimpleEncode::GetKeyFrameGroupSize() const { return key_frame_group_size_; }
1118 
ObserveGroupOfPicture() const1119 GroupOfPicture SimpleEncode::ObserveGroupOfPicture() const {
1120   return group_of_picture_;
1121 }
1122 
GetNextEncodeFrameInfo() const1123 EncodeFrameInfo SimpleEncode::GetNextEncodeFrameInfo() const {
1124   return group_of_picture_
1125       .encode_frame_list[group_of_picture_.next_encode_frame_index];
1126 }
1127 
PostUpdateState(const EncodeFrameResult & encode_frame_result)1128 void SimpleEncode::PostUpdateState(
1129     const EncodeFrameResult &encode_frame_result) {
1130   // This function needs to be called before the increament of
1131   // frame_coding_index_
1132   PostUpdateRefFrameInfo(encode_frame_result.frame_type, frame_coding_index_,
1133                          &ref_frame_info_);
1134   ++frame_coding_index_;
1135   if (encode_frame_result.frame_type != kFrameTypeAltRef) {
1136     // Only kFrameTypeAltRef is not a show frame
1137     ++show_frame_count_;
1138   }
1139 
1140   PostUpdateKeyFrameGroupIndex(encode_frame_result.frame_type);
1141   if (key_frame_group_index_ == key_frame_group_size_) {
1142     UpdateKeyFrameGroup(show_frame_count_);
1143   }
1144 
1145   IncreaseGroupOfPictureIndex(&group_of_picture_);
1146   if (IsGroupOfPictureFinished(group_of_picture_)) {
1147     const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1148     encode_command_set_gop_command(&impl_ptr_->cpi->encode_command,
1149                                    gop_command);
1150     // This function needs to be called after ref_frame_info_ is updated
1151     // properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup().
1152     UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1153                          &group_of_picture_);
1154   }
1155 }
1156 
EncodeFrame(EncodeFrameResult * encode_frame_result)1157 void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
1158   VP9_COMP *cpi = impl_ptr_->cpi;
1159   struct lookahead_ctx *lookahead = cpi->lookahead;
1160   int use_highbitdepth = 0;
1161 #if CONFIG_VP9_HIGHBITDEPTH
1162   use_highbitdepth = cpi->common.use_highbitdepth;
1163 #endif
1164   // The lookahead's size is set to oxcf->lag_in_frames.
1165   // We want to fill lookahead to it's max capacity if possible so that the
1166   // encoder can construct alt ref frame in time.
1167   // In the other words, we hope vp9_get_compressed_data to encode a frame
1168   // every time in the function
1169   while (!vp9_lookahead_full(lookahead)) {
1170     // TODO(angiebird): Check whether we can move this file read logics to
1171     // lookahead
1172     if (img_read(&impl_ptr_->tmp_img, in_file_)) {
1173       int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
1174       int64_t ts_start =
1175           timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
1176       int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
1177                                                next_show_idx + 1);
1178       YV12_BUFFER_CONFIG sd;
1179       image2yuvconfig(&impl_ptr_->tmp_img, &sd);
1180       vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
1181     } else {
1182       break;
1183     }
1184   }
1185 
1186   if (init_encode_frame_result(encode_frame_result, frame_width_, frame_height_,
1187                                impl_ptr_->img_fmt)) {
1188     int64_t time_stamp;
1189     int64_t time_end;
1190     int flush = 1;  // Make vp9_get_compressed_data encode a frame
1191     unsigned int frame_flags = 0;
1192     ENCODE_FRAME_RESULT encode_frame_info;
1193     vp9_init_encode_frame_result(&encode_frame_info);
1194     ImageBuffer_to_IMAGE_BUFFER(encode_frame_result->coded_frame,
1195                                 &encode_frame_info.coded_frame);
1196     vp9_get_compressed_data(cpi, &frame_flags,
1197                             &encode_frame_result->coding_data_byte_size,
1198                             encode_frame_result->coding_data.get(),
1199                             encode_frame_result->max_coding_data_byte_size,
1200                             &time_stamp, &time_end, flush, &encode_frame_info);
1201     if (out_file_ != nullptr) {
1202       ivf_write_frame_header(out_file_, time_stamp,
1203                              encode_frame_result->coding_data_byte_size);
1204       fwrite(encode_frame_result->coding_data.get(), 1,
1205              encode_frame_result->coding_data_byte_size, out_file_);
1206     }
1207 
1208     // vp9_get_compressed_data is expected to encode a frame every time, so the
1209     // data size should be greater than zero.
1210     if (encode_frame_result->coding_data_byte_size <= 0) {
1211       fprintf(stderr, "Coding data size <= 0\n");
1212       abort();
1213     }
1214     if (encode_frame_result->coding_data_byte_size >
1215         encode_frame_result->max_coding_data_byte_size) {
1216       fprintf(stderr, "Coding data size exceeds the maximum.\n");
1217       abort();
1218     }
1219 
1220     const GroupOfPicture group_of_picture = this->ObserveGroupOfPicture();
1221     const int show_frame_count = group_of_picture.show_frame_count;
1222     update_encode_frame_result(encode_frame_result, show_frame_count,
1223                                &encode_frame_info);
1224     PostUpdateState(*encode_frame_result);
1225   } else {
1226     // TODO(angiebird): Clean up encode_frame_result.
1227     fprintf(stderr, "init_encode_frame_result() failed.\n");
1228     this->EndEncode();
1229   }
1230 }
1231 
EncodeFrameWithQuantizeIndex(EncodeFrameResult * encode_frame_result,int quantize_index)1232 void SimpleEncode::EncodeFrameWithQuantizeIndex(
1233     EncodeFrameResult *encode_frame_result, int quantize_index) {
1234   encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
1235                                              quantize_index);
1236   EncodeFrame(encode_frame_result);
1237   encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
1238 }
1239 
EncodeFrameWithTargetFrameBits(EncodeFrameResult * encode_frame_result,int target_frame_bits,double percent_diff)1240 void SimpleEncode::EncodeFrameWithTargetFrameBits(
1241     EncodeFrameResult *encode_frame_result, int target_frame_bits,
1242     double percent_diff) {
1243   encode_command_set_target_frame_bits(&impl_ptr_->cpi->encode_command,
1244                                        target_frame_bits, percent_diff);
1245   EncodeFrame(encode_frame_result);
1246   encode_command_reset_target_frame_bits(&impl_ptr_->cpi->encode_command);
1247 }
1248 
GetCodingFrameNumFromGopMap(const std::vector<int> & gop_map)1249 static int GetCodingFrameNumFromGopMap(const std::vector<int> &gop_map) {
1250   int start_show_index = 0;
1251   int coding_frame_count = 0;
1252   while (static_cast<size_t>(start_show_index) < gop_map.size()) {
1253     const GOP_COMMAND gop_command = GetGopCommand(gop_map, start_show_index);
1254     start_show_index += gop_command.show_frame_count;
1255     coding_frame_count += gop_command_coding_frame_count(&gop_command);
1256   }
1257   assert(static_cast<size_t>(start_show_index) == gop_map.size());
1258   return coding_frame_count;
1259 }
1260 
GetCodingFrameNum() const1261 int SimpleEncode::GetCodingFrameNum() const {
1262   assert(impl_ptr_->first_pass_stats.size() > 0);
1263   if (gop_map_.size() > 0) {
1264     return GetCodingFrameNumFromGopMap(gop_map_);
1265   }
1266 
1267   // These are the default settings for now.
1268   TWO_PASS twopass;
1269   const int multi_layer_arf = 0;
1270   const int allow_alt_ref = 1;
1271   vpx_rational_t frame_rate =
1272       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1273   const VP9EncoderConfig oxcf = GetEncodeConfig(
1274       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1275       target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1276   FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1277   fps_init_first_pass_info(&twopass.first_pass_info,
1278                            GetVectorData(impl_ptr_->first_pass_stats),
1279                            num_frames_);
1280   unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1281   vp9_init_vizier_params(&twopass, screen_area);
1282   return vp9_get_coding_frame_num(&oxcf, &twopass, &frame_info, multi_layer_arf,
1283                                   allow_alt_ref);
1284 }
1285 
ComputeKeyFrameMap() const1286 std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {
1287   // The last entry of first_pass_stats is the overall stats.
1288   assert(impl_ptr_->first_pass_stats.size() ==
1289          static_cast<size_t>(num_frames_) + 1);
1290   vpx_rational_t frame_rate =
1291       make_vpx_rational(frame_rate_num_, frame_rate_den_);
1292   const VP9EncoderConfig oxcf = GetEncodeConfig(
1293       frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1294       target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1295   TWO_PASS twopass;
1296   fps_init_first_pass_info(&twopass.first_pass_info,
1297                            GetVectorData(impl_ptr_->first_pass_stats),
1298                            num_frames_);
1299   std::vector<int> key_frame_map(num_frames_, 0);
1300   vp9_get_key_frame_map(&oxcf, &twopass, GetVectorData(key_frame_map));
1301   return key_frame_map;
1302 }
1303 
ObserveKeyFrameMap() const1304 std::vector<int> SimpleEncode::ObserveKeyFrameMap() const {
1305   return key_frame_map_;
1306 }
1307 
GetFramePixelCount() const1308 uint64_t SimpleEncode::GetFramePixelCount() const {
1309   assert(frame_width_ % 2 == 0);
1310   assert(frame_height_ % 2 == 0);
1311   switch (impl_ptr_->img_fmt) {
1312     case VPX_IMG_FMT_I420: return frame_width_ * frame_height_ * 3 / 2;
1313     case VPX_IMG_FMT_I422: return frame_width_ * frame_height_ * 2;
1314     case VPX_IMG_FMT_I444: return frame_width_ * frame_height_ * 3;
1315     case VPX_IMG_FMT_I440: return frame_width_ * frame_height_ * 2;
1316     case VPX_IMG_FMT_I42016: return frame_width_ * frame_height_ * 3 / 2;
1317     case VPX_IMG_FMT_I42216: return frame_width_ * frame_height_ * 2;
1318     case VPX_IMG_FMT_I44416: return frame_width_ * frame_height_ * 3;
1319     case VPX_IMG_FMT_I44016: return frame_width_ * frame_height_ * 2;
1320     default: return 0;
1321   }
1322 }
1323 
~SimpleEncode()1324 SimpleEncode::~SimpleEncode() {
1325   if (in_file_ != nullptr) {
1326     fclose(in_file_);
1327   }
1328   if (out_file_ != nullptr) {
1329     fclose(out_file_);
1330   }
1331 }
1332 
1333 }  // namespace vp9
1334