1 /*
2 * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <memory>
12 #include <vector>
13 #include "./ivfenc.h"
14 #include "vp9/common/vp9_entropymode.h"
15 #include "vp9/common/vp9_enums.h"
16 #include "vp9/common/vp9_onyxc_int.h"
17 #include "vp9/vp9_iface_common.h"
18 #include "vp9/encoder/vp9_encoder.h"
19 #include "vp9/encoder/vp9_firstpass.h"
20 #include "vp9/simple_encode.h"
21 #include "vp9/vp9_cx_iface.h"
22
23 namespace vp9 {
24
get_plane_height(vpx_img_fmt_t img_fmt,int frame_height,int plane)25 static int get_plane_height(vpx_img_fmt_t img_fmt, int frame_height,
26 int plane) {
27 assert(plane < 3);
28 if (plane == 0) {
29 return frame_height;
30 }
31 switch (img_fmt) {
32 case VPX_IMG_FMT_I420:
33 case VPX_IMG_FMT_I440:
34 case VPX_IMG_FMT_YV12:
35 case VPX_IMG_FMT_I42016:
36 case VPX_IMG_FMT_I44016: return (frame_height + 1) >> 1;
37 default: return frame_height;
38 }
39 }
40
get_plane_width(vpx_img_fmt_t img_fmt,int frame_width,int plane)41 static int get_plane_width(vpx_img_fmt_t img_fmt, int frame_width, int plane) {
42 assert(plane < 3);
43 if (plane == 0) {
44 return frame_width;
45 }
46 switch (img_fmt) {
47 case VPX_IMG_FMT_I420:
48 case VPX_IMG_FMT_YV12:
49 case VPX_IMG_FMT_I422:
50 case VPX_IMG_FMT_I42016:
51 case VPX_IMG_FMT_I42216: return (frame_width + 1) >> 1;
52 default: return frame_width;
53 }
54 }
55
56 // TODO(angiebird): Merge this function with vpx_img_plane_width()
img_plane_width(const vpx_image_t * img,int plane)57 static int img_plane_width(const vpx_image_t *img, int plane) {
58 if (plane > 0 && img->x_chroma_shift > 0)
59 return (img->d_w + 1) >> img->x_chroma_shift;
60 else
61 return img->d_w;
62 }
63
64 // TODO(angiebird): Merge this function with vpx_img_plane_height()
img_plane_height(const vpx_image_t * img,int plane)65 static int img_plane_height(const vpx_image_t *img, int plane) {
66 if (plane > 0 && img->y_chroma_shift > 0)
67 return (img->d_h + 1) >> img->y_chroma_shift;
68 else
69 return img->d_h;
70 }
71
72 // TODO(angiebird): Merge this function with vpx_img_read()
img_read(vpx_image_t * img,FILE * file)73 static int img_read(vpx_image_t *img, FILE *file) {
74 int plane;
75
76 for (plane = 0; plane < 3; ++plane) {
77 unsigned char *buf = img->planes[plane];
78 const int stride = img->stride[plane];
79 const int w = img_plane_width(img, plane) *
80 ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
81 const int h = img_plane_height(img, plane);
82 int y;
83
84 for (y = 0; y < h; ++y) {
85 if (fread(buf, 1, w, file) != (size_t)w) return 0;
86 buf += stride;
87 }
88 }
89
90 return 1;
91 }
92
93 // Assume every config in VP9EncoderConfig is less than 100 characters.
94 #define ENCODE_CONFIG_BUF_SIZE 100
95 struct EncodeConfig {
96 char name[ENCODE_CONFIG_BUF_SIZE];
97 char value[ENCODE_CONFIG_BUF_SIZE];
98 };
99
100 class SimpleEncode::EncodeImpl {
101 public:
102 VP9_COMP *cpi;
103 vpx_img_fmt_t img_fmt;
104 vpx_image_t tmp_img;
105 std::vector<FIRSTPASS_STATS> first_pass_stats;
106 std::vector<EncodeConfig> encode_config_list;
107 };
108
init_encoder(const VP9EncoderConfig * oxcf,vpx_img_fmt_t img_fmt)109 static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
110 vpx_img_fmt_t img_fmt) {
111 VP9_COMP *cpi;
112 BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
113 vp9_initialize_enc();
114 cpi = vp9_create_compressor(oxcf, buffer_pool);
115 vp9_update_compressor_with_img_fmt(cpi, img_fmt);
116 return cpi;
117 }
118
free_encoder(VP9_COMP * cpi)119 static void free_encoder(VP9_COMP *cpi) {
120 BufferPool *buffer_pool = cpi->common.buffer_pool;
121 vp9_remove_compressor(cpi);
122 // buffer_pool needs to be free after cpi because buffer_pool contains
123 // allocated buffers that will be free in vp9_remove_compressor()
124 vpx_free(buffer_pool);
125 }
126
make_vpx_rational(int num,int den)127 static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
128 vpx_rational_t v;
129 v.num = num;
130 v.den = den;
131 return v;
132 }
133
134 static INLINE FrameType
get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type)135 get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
136 switch (update_type) {
137 case KF_UPDATE: return kFrameTypeKey;
138 case ARF_UPDATE: return kFrameTypeAltRef;
139 case GF_UPDATE: return kFrameTypeGolden;
140 case OVERLAY_UPDATE: return kFrameTypeOverlay;
141 case LF_UPDATE: return kFrameTypeInter;
142 default:
143 fprintf(stderr, "Unsupported update_type %d\n", update_type);
144 abort();
145 return kFrameTypeInter;
146 }
147 }
148
update_partition_info(const PARTITION_INFO * input_partition_info,const int num_rows_4x4,const int num_cols_4x4,PartitionInfo * output_partition_info)149 static void update_partition_info(const PARTITION_INFO *input_partition_info,
150 const int num_rows_4x4,
151 const int num_cols_4x4,
152 PartitionInfo *output_partition_info) {
153 const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
154 for (int i = 0; i < num_units_4x4; ++i) {
155 output_partition_info[i].row = input_partition_info[i].row;
156 output_partition_info[i].column = input_partition_info[i].column;
157 output_partition_info[i].row_start = input_partition_info[i].row_start;
158 output_partition_info[i].column_start =
159 input_partition_info[i].column_start;
160 output_partition_info[i].width = input_partition_info[i].width;
161 output_partition_info[i].height = input_partition_info[i].height;
162 }
163 }
164
165 // translate MV_REFERENCE_FRAME to RefFrameType
mv_ref_frame_to_ref_frame_type(MV_REFERENCE_FRAME mv_ref_frame)166 static RefFrameType mv_ref_frame_to_ref_frame_type(
167 MV_REFERENCE_FRAME mv_ref_frame) {
168 switch (mv_ref_frame) {
169 case LAST_FRAME: return kRefFrameTypeLast;
170 case GOLDEN_FRAME: return kRefFrameTypePast;
171 case ALTREF_FRAME: return kRefFrameTypeFuture;
172 default: return kRefFrameTypeNone;
173 }
174 }
175
update_motion_vector_info(const MOTION_VECTOR_INFO * input_motion_vector_info,const int num_rows_4x4,const int num_cols_4x4,MotionVectorInfo * output_motion_vector_info,int motion_vector_scale)176 static void update_motion_vector_info(
177 const MOTION_VECTOR_INFO *input_motion_vector_info, const int num_rows_4x4,
178 const int num_cols_4x4, MotionVectorInfo *output_motion_vector_info,
179 int motion_vector_scale) {
180 const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
181 for (int i = 0; i < num_units_4x4; ++i) {
182 const MV_REFERENCE_FRAME *in_ref_frame =
183 input_motion_vector_info[i].ref_frame;
184 output_motion_vector_info[i].mv_count =
185 (in_ref_frame[0] == INTRA_FRAME) ? 0
186 : ((in_ref_frame[1] == NONE) ? 1 : 2);
187 if (in_ref_frame[0] == NONE) {
188 fprintf(stderr, "in_ref_frame[0] shouldn't be NONE\n");
189 abort();
190 }
191 output_motion_vector_info[i].ref_frame[0] =
192 mv_ref_frame_to_ref_frame_type(in_ref_frame[0]);
193 output_motion_vector_info[i].ref_frame[1] =
194 mv_ref_frame_to_ref_frame_type(in_ref_frame[1]);
195 output_motion_vector_info[i].mv_row[0] =
196 (double)input_motion_vector_info[i].mv[0].as_mv.row /
197 motion_vector_scale;
198 output_motion_vector_info[i].mv_column[0] =
199 (double)input_motion_vector_info[i].mv[0].as_mv.col /
200 motion_vector_scale;
201 output_motion_vector_info[i].mv_row[1] =
202 (double)input_motion_vector_info[i].mv[1].as_mv.row /
203 motion_vector_scale;
204 output_motion_vector_info[i].mv_column[1] =
205 (double)input_motion_vector_info[i].mv[1].as_mv.col /
206 motion_vector_scale;
207 }
208 }
209
update_tpl_stats_info(const TplDepStats * input_tpl_stats_info,const int show_frame_count,TplStatsInfo * output_tpl_stats_info)210 static void update_tpl_stats_info(const TplDepStats *input_tpl_stats_info,
211 const int show_frame_count,
212 TplStatsInfo *output_tpl_stats_info) {
213 int frame_idx;
214 for (frame_idx = 0; frame_idx < show_frame_count; ++frame_idx) {
215 output_tpl_stats_info[frame_idx].intra_cost =
216 input_tpl_stats_info[frame_idx].intra_cost;
217 output_tpl_stats_info[frame_idx].inter_cost =
218 input_tpl_stats_info[frame_idx].inter_cost;
219 output_tpl_stats_info[frame_idx].mc_flow =
220 input_tpl_stats_info[frame_idx].mc_flow;
221 output_tpl_stats_info[frame_idx].mc_dep_cost =
222 input_tpl_stats_info[frame_idx].mc_dep_cost;
223 output_tpl_stats_info[frame_idx].mc_ref_cost =
224 input_tpl_stats_info[frame_idx].mc_ref_cost;
225 }
226 }
227
update_frame_counts(const FRAME_COUNTS * input_counts,FrameCounts * output_counts)228 static void update_frame_counts(const FRAME_COUNTS *input_counts,
229 FrameCounts *output_counts) {
230 // Init array sizes.
231 output_counts->y_mode.resize(BLOCK_SIZE_GROUPS);
232 for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
233 output_counts->y_mode[i].resize(INTRA_MODES);
234 }
235
236 output_counts->uv_mode.resize(INTRA_MODES);
237 for (int i = 0; i < INTRA_MODES; ++i) {
238 output_counts->uv_mode[i].resize(INTRA_MODES);
239 }
240
241 output_counts->partition.resize(PARTITION_CONTEXTS);
242 for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
243 output_counts->partition[i].resize(PARTITION_TYPES);
244 }
245
246 output_counts->coef.resize(TX_SIZES);
247 output_counts->eob_branch.resize(TX_SIZES);
248 for (int i = 0; i < TX_SIZES; ++i) {
249 output_counts->coef[i].resize(PLANE_TYPES);
250 output_counts->eob_branch[i].resize(PLANE_TYPES);
251 for (int j = 0; j < PLANE_TYPES; ++j) {
252 output_counts->coef[i][j].resize(REF_TYPES);
253 output_counts->eob_branch[i][j].resize(REF_TYPES);
254 for (int k = 0; k < REF_TYPES; ++k) {
255 output_counts->coef[i][j][k].resize(COEF_BANDS);
256 output_counts->eob_branch[i][j][k].resize(COEF_BANDS);
257 for (int l = 0; l < COEF_BANDS; ++l) {
258 output_counts->coef[i][j][k][l].resize(COEFF_CONTEXTS);
259 output_counts->eob_branch[i][j][k][l].resize(COEFF_CONTEXTS);
260 for (int m = 0; m < COEFF_CONTEXTS; ++m) {
261 output_counts->coef[i][j][k][l][m].resize(UNCONSTRAINED_NODES + 1);
262 }
263 }
264 }
265 }
266 }
267
268 output_counts->switchable_interp.resize(SWITCHABLE_FILTER_CONTEXTS);
269 for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
270 output_counts->switchable_interp[i].resize(SWITCHABLE_FILTERS);
271 }
272
273 output_counts->inter_mode.resize(INTER_MODE_CONTEXTS);
274 for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
275 output_counts->inter_mode[i].resize(INTER_MODES);
276 }
277
278 output_counts->intra_inter.resize(INTRA_INTER_CONTEXTS);
279 for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
280 output_counts->intra_inter[i].resize(2);
281 }
282
283 output_counts->comp_inter.resize(COMP_INTER_CONTEXTS);
284 for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
285 output_counts->comp_inter[i].resize(2);
286 }
287
288 output_counts->single_ref.resize(REF_CONTEXTS);
289 for (int i = 0; i < REF_CONTEXTS; ++i) {
290 output_counts->single_ref[i].resize(2);
291 for (int j = 0; j < 2; ++j) {
292 output_counts->single_ref[i][j].resize(2);
293 }
294 }
295
296 output_counts->comp_ref.resize(REF_CONTEXTS);
297 for (int i = 0; i < REF_CONTEXTS; ++i) {
298 output_counts->comp_ref[i].resize(2);
299 }
300
301 output_counts->skip.resize(SKIP_CONTEXTS);
302 for (int i = 0; i < SKIP_CONTEXTS; ++i) {
303 output_counts->skip[i].resize(2);
304 }
305
306 output_counts->tx.p32x32.resize(TX_SIZE_CONTEXTS);
307 output_counts->tx.p16x16.resize(TX_SIZE_CONTEXTS);
308 output_counts->tx.p8x8.resize(TX_SIZE_CONTEXTS);
309 for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
310 output_counts->tx.p32x32[i].resize(TX_SIZES);
311 output_counts->tx.p16x16[i].resize(TX_SIZES - 1);
312 output_counts->tx.p8x8[i].resize(TX_SIZES - 2);
313 }
314 output_counts->tx.tx_totals.resize(TX_SIZES);
315
316 output_counts->mv.joints.resize(MV_JOINTS);
317 output_counts->mv.comps.resize(2);
318 for (int i = 0; i < 2; ++i) {
319 output_counts->mv.comps[i].sign.resize(2);
320 output_counts->mv.comps[i].classes.resize(MV_CLASSES);
321 output_counts->mv.comps[i].class0.resize(CLASS0_SIZE);
322 output_counts->mv.comps[i].bits.resize(MV_OFFSET_BITS);
323 for (int j = 0; j < MV_OFFSET_BITS; ++j) {
324 output_counts->mv.comps[i].bits[j].resize(2);
325 }
326 output_counts->mv.comps[i].class0_fp.resize(CLASS0_SIZE);
327 for (int j = 0; j < CLASS0_SIZE; ++j) {
328 output_counts->mv.comps[i].class0_fp[j].resize(MV_FP_SIZE);
329 }
330 output_counts->mv.comps[i].fp.resize(MV_FP_SIZE);
331 output_counts->mv.comps[i].class0_hp.resize(2);
332 output_counts->mv.comps[i].hp.resize(2);
333 }
334
335 // Populate counts.
336 for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
337 for (int j = 0; j < INTRA_MODES; ++j) {
338 output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
339 }
340 }
341 for (int i = 0; i < INTRA_MODES; ++i) {
342 for (int j = 0; j < INTRA_MODES; ++j) {
343 output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
344 }
345 }
346 for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
347 for (int j = 0; j < PARTITION_TYPES; ++j) {
348 output_counts->partition[i][j] = input_counts->partition[i][j];
349 }
350 }
351 for (int i = 0; i < TX_SIZES; ++i) {
352 for (int j = 0; j < PLANE_TYPES; ++j) {
353 for (int k = 0; k < REF_TYPES; ++k) {
354 for (int l = 0; l < COEF_BANDS; ++l) {
355 for (int m = 0; m < COEFF_CONTEXTS; ++m) {
356 output_counts->eob_branch[i][j][k][l][m] =
357 input_counts->eob_branch[i][j][k][l][m];
358 for (int n = 0; n < UNCONSTRAINED_NODES + 1; n++) {
359 output_counts->coef[i][j][k][l][m][n] =
360 input_counts->coef[i][j][k][l][m][n];
361 }
362 }
363 }
364 }
365 }
366 }
367 for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
368 for (int j = 0; j < SWITCHABLE_FILTERS; ++j) {
369 output_counts->switchable_interp[i][j] =
370 input_counts->switchable_interp[i][j];
371 }
372 }
373 for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
374 for (int j = 0; j < INTER_MODES; ++j) {
375 output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
376 }
377 }
378 for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
379 for (int j = 0; j < 2; ++j) {
380 output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
381 }
382 }
383 for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
384 for (int j = 0; j < 2; ++j) {
385 output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
386 }
387 }
388 for (int i = 0; i < REF_CONTEXTS; ++i) {
389 for (int j = 0; j < 2; ++j) {
390 for (int k = 0; k < 2; ++k) {
391 output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
392 }
393 }
394 }
395 for (int i = 0; i < REF_CONTEXTS; ++i) {
396 for (int j = 0; j < 2; ++j) {
397 output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
398 }
399 }
400 for (int i = 0; i < SKIP_CONTEXTS; ++i) {
401 for (int j = 0; j < 2; ++j) {
402 output_counts->skip[i][j] = input_counts->skip[i][j];
403 }
404 }
405 for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
406 for (int j = 0; j < TX_SIZES; j++) {
407 output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
408 }
409 for (int j = 0; j < TX_SIZES - 1; j++) {
410 output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
411 }
412 for (int j = 0; j < TX_SIZES - 2; j++) {
413 output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
414 }
415 }
416 for (int i = 0; i < TX_SIZES; i++) {
417 output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
418 }
419 for (int i = 0; i < MV_JOINTS; i++) {
420 output_counts->mv.joints[i] = input_counts->mv.joints[i];
421 }
422 for (int k = 0; k < 2; k++) {
423 const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
424 for (int i = 0; i < 2; i++) {
425 output_counts->mv.comps[k].sign[i] = comps_t->sign[i];
426 output_counts->mv.comps[k].class0_hp[i] = comps_t->class0_hp[i];
427 output_counts->mv.comps[k].hp[i] = comps_t->hp[i];
428 }
429 for (int i = 0; i < MV_CLASSES; i++) {
430 output_counts->mv.comps[k].classes[i] = comps_t->classes[i];
431 }
432 for (int i = 0; i < CLASS0_SIZE; i++) {
433 output_counts->mv.comps[k].class0[i] = comps_t->class0[i];
434 for (int j = 0; j < MV_FP_SIZE; j++) {
435 output_counts->mv.comps[k].class0_fp[i][j] = comps_t->class0_fp[i][j];
436 }
437 }
438 for (int i = 0; i < MV_OFFSET_BITS; i++) {
439 for (int j = 0; j < 2; j++) {
440 output_counts->mv.comps[k].bits[i][j] = comps_t->bits[i][j];
441 }
442 }
443 for (int i = 0; i < MV_FP_SIZE; i++) {
444 output_counts->mv.comps[k].fp[i] = comps_t->fp[i];
445 }
446 }
447 }
448
output_image_buffer(const ImageBuffer & image_buffer,std::FILE * out_file)449 void output_image_buffer(const ImageBuffer &image_buffer, std::FILE *out_file) {
450 for (int plane = 0; plane < 3; ++plane) {
451 const int w = image_buffer.plane_width[plane];
452 const int h = image_buffer.plane_height[plane];
453 const uint8_t *buf = image_buffer.plane_buffer[plane].get();
454 fprintf(out_file, "%d %d\n", h, w);
455 for (int i = 0; i < w * h; ++i) {
456 fprintf(out_file, "%d ", (int)buf[i]);
457 }
458 fprintf(out_file, "\n");
459 }
460 }
461
init_image_buffer(ImageBuffer * image_buffer,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)462 static bool init_image_buffer(ImageBuffer *image_buffer, int frame_width,
463 int frame_height, vpx_img_fmt_t img_fmt) {
464 for (int plane = 0; plane < 3; ++plane) {
465 const int w = get_plane_width(img_fmt, frame_width, plane);
466 const int h = get_plane_height(img_fmt, frame_height, plane);
467 image_buffer->plane_width[plane] = w;
468 image_buffer->plane_height[plane] = h;
469 image_buffer->plane_buffer[plane].reset(new (std::nothrow) uint8_t[w * h]);
470 if (image_buffer->plane_buffer[plane].get() == nullptr) {
471 return false;
472 }
473 }
474 return true;
475 }
476
ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer & image_buffer,IMAGE_BUFFER * image_buffer_c)477 static void ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer &image_buffer,
478 IMAGE_BUFFER *image_buffer_c) {
479 image_buffer_c->allocated = 1;
480 for (int plane = 0; plane < 3; ++plane) {
481 image_buffer_c->plane_width[plane] = image_buffer.plane_width[plane];
482 image_buffer_c->plane_height[plane] = image_buffer.plane_height[plane];
483 image_buffer_c->plane_buffer[plane] =
484 image_buffer.plane_buffer[plane].get();
485 }
486 }
487
get_max_coding_data_byte_size(int frame_width,int frame_height)488 static size_t get_max_coding_data_byte_size(int frame_width, int frame_height) {
489 return frame_width * frame_height * 3;
490 }
491
init_encode_frame_result(EncodeFrameResult * encode_frame_result,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)492 static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
493 int frame_width, int frame_height,
494 vpx_img_fmt_t img_fmt) {
495 const size_t max_coding_data_byte_size =
496 get_max_coding_data_byte_size(frame_width, frame_height);
497
498 encode_frame_result->coding_data.reset(
499 new (std::nothrow) uint8_t[max_coding_data_byte_size]);
500
501 encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
502 encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
503 encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
504 encode_frame_result->num_cols_4x4);
505 encode_frame_result->motion_vector_info.resize(
506 encode_frame_result->num_rows_4x4 * encode_frame_result->num_cols_4x4);
507 encode_frame_result->tpl_stats_info.resize(MAX_LAG_BUFFERS);
508
509 if (encode_frame_result->coding_data.get() == nullptr) {
510 return false;
511 }
512 return init_image_buffer(&encode_frame_result->coded_frame, frame_width,
513 frame_height, img_fmt);
514 }
515
encode_frame_result_update_rq_history(const RATE_QINDEX_HISTORY * rq_history,EncodeFrameResult * encode_frame_result)516 static void encode_frame_result_update_rq_history(
517 const RATE_QINDEX_HISTORY *rq_history,
518 EncodeFrameResult *encode_frame_result) {
519 encode_frame_result->recode_count = rq_history->recode_count;
520 for (int i = 0; i < encode_frame_result->recode_count; ++i) {
521 const int q_index = rq_history->q_index_history[i];
522 const int rate = rq_history->rate_history[i];
523 encode_frame_result->q_index_history.push_back(q_index);
524 encode_frame_result->rate_history.push_back(rate);
525 }
526 }
527
update_encode_frame_result(EncodeFrameResult * encode_frame_result,const int show_frame_count,const ENCODE_FRAME_RESULT * encode_frame_info)528 static void update_encode_frame_result(
529 EncodeFrameResult *encode_frame_result, const int show_frame_count,
530 const ENCODE_FRAME_RESULT *encode_frame_info) {
531 encode_frame_result->coding_data_bit_size =
532 encode_frame_result->coding_data_byte_size * 8;
533 encode_frame_result->show_idx = encode_frame_info->show_idx;
534 encode_frame_result->coding_idx = encode_frame_info->frame_coding_index;
535 assert(kRefFrameTypeMax == MAX_INTER_REF_FRAMES);
536 for (int i = 0; i < kRefFrameTypeMax; ++i) {
537 encode_frame_result->ref_frame_info.coding_indexes[i] =
538 encode_frame_info->ref_frame_coding_indexes[i];
539 encode_frame_result->ref_frame_info.valid_list[i] =
540 encode_frame_info->ref_frame_valid_list[i];
541 }
542 encode_frame_result->frame_type =
543 get_frame_type_from_update_type(encode_frame_info->update_type);
544 encode_frame_result->psnr = encode_frame_info->psnr;
545 encode_frame_result->sse = encode_frame_info->sse;
546 encode_frame_result->quantize_index = encode_frame_info->quantize_index;
547 update_partition_info(encode_frame_info->partition_info,
548 encode_frame_result->num_rows_4x4,
549 encode_frame_result->num_cols_4x4,
550 &encode_frame_result->partition_info[0]);
551 update_motion_vector_info(encode_frame_info->motion_vector_info,
552 encode_frame_result->num_rows_4x4,
553 encode_frame_result->num_cols_4x4,
554 &encode_frame_result->motion_vector_info[0],
555 kMotionVectorSubPixelPrecision);
556 update_frame_counts(&encode_frame_info->frame_counts,
557 &encode_frame_result->frame_counts);
558 if (encode_frame_result->frame_type == kFrameTypeAltRef) {
559 update_tpl_stats_info(encode_frame_info->tpl_stats_info, show_frame_count,
560 &encode_frame_result->tpl_stats_info[0]);
561 }
562 encode_frame_result_update_rq_history(&encode_frame_info->rq_history,
563 encode_frame_result);
564 }
565
IncreaseGroupOfPictureIndex(GroupOfPicture * group_of_picture)566 static void IncreaseGroupOfPictureIndex(GroupOfPicture *group_of_picture) {
567 ++group_of_picture->next_encode_frame_index;
568 }
569
IsGroupOfPictureFinished(const GroupOfPicture & group_of_picture)570 static int IsGroupOfPictureFinished(const GroupOfPicture &group_of_picture) {
571 return static_cast<size_t>(group_of_picture.next_encode_frame_index) ==
572 group_of_picture.encode_frame_list.size();
573 }
574
operator ==(const RefFrameInfo & a,const RefFrameInfo & b)575 bool operator==(const RefFrameInfo &a, const RefFrameInfo &b) {
576 bool match = true;
577 for (int i = 0; i < kRefFrameTypeMax; ++i) {
578 match &= a.coding_indexes[i] == b.coding_indexes[i];
579 match &= a.valid_list[i] == b.valid_list[i];
580 }
581 return match;
582 }
583
InitRefFrameInfo(RefFrameInfo * ref_frame_info)584 static void InitRefFrameInfo(RefFrameInfo *ref_frame_info) {
585 for (int i = 0; i < kRefFrameTypeMax; ++i) {
586 ref_frame_info->coding_indexes[i] = -1;
587 ref_frame_info->valid_list[i] = 0;
588 }
589 }
590
591 // After finishing coding a frame, this function will update the coded frame
592 // into the ref_frame_info based on the frame_type and the coding_index.
PostUpdateRefFrameInfo(FrameType frame_type,int frame_coding_index,RefFrameInfo * ref_frame_info)593 static void PostUpdateRefFrameInfo(FrameType frame_type, int frame_coding_index,
594 RefFrameInfo *ref_frame_info) {
595 // This part is written based on the logics in vp9_configure_buffer_updates()
596 // and update_ref_frames()
597 int *ref_frame_coding_indexes = ref_frame_info->coding_indexes;
598 switch (frame_type) {
599 case kFrameTypeKey:
600 ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
601 ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
602 ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
603 break;
604 case kFrameTypeInter:
605 ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
606 break;
607 case kFrameTypeAltRef:
608 ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
609 break;
610 case kFrameTypeOverlay:
611 // Reserve the past coding_index in the future slot. This logic is from
612 // update_ref_frames() with condition vp9_preserve_existing_gf() == 1
613 // TODO(angiebird): Invetegate why we need this.
614 ref_frame_coding_indexes[kRefFrameTypeFuture] =
615 ref_frame_coding_indexes[kRefFrameTypePast];
616 ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
617 break;
618 case kFrameTypeGolden:
619 ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
620 ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
621 break;
622 }
623
624 // This part is written based on the logics in get_ref_frame_flags() but we
625 // rename the flags alt, golden to future, past respectively. Mark
626 // non-duplicated reference frames as valid. The priorities are
627 // kRefFrameTypeLast > kRefFrameTypePast > kRefFrameTypeFuture.
628 const int last_index = ref_frame_coding_indexes[kRefFrameTypeLast];
629 const int past_index = ref_frame_coding_indexes[kRefFrameTypePast];
630 const int future_index = ref_frame_coding_indexes[kRefFrameTypeFuture];
631
632 int *ref_frame_valid_list = ref_frame_info->valid_list;
633 for (int ref_frame_idx = 0; ref_frame_idx < kRefFrameTypeMax;
634 ++ref_frame_idx) {
635 ref_frame_valid_list[ref_frame_idx] = 1;
636 }
637
638 if (past_index == last_index) {
639 ref_frame_valid_list[kRefFrameTypePast] = 0;
640 }
641
642 if (future_index == last_index) {
643 ref_frame_valid_list[kRefFrameTypeFuture] = 0;
644 }
645
646 if (future_index == past_index) {
647 ref_frame_valid_list[kRefFrameTypeFuture] = 0;
648 }
649 }
650
SetGroupOfPicture(int first_is_key_frame,int use_alt_ref,int coding_frame_count,int first_show_idx,int last_gop_use_alt_ref,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)651 static void SetGroupOfPicture(int first_is_key_frame, int use_alt_ref,
652 int coding_frame_count, int first_show_idx,
653 int last_gop_use_alt_ref, int start_coding_index,
654 const RefFrameInfo &start_ref_frame_info,
655 GroupOfPicture *group_of_picture) {
656 // Clean up the state of previous group of picture.
657 group_of_picture->encode_frame_list.clear();
658 group_of_picture->next_encode_frame_index = 0;
659 group_of_picture->show_frame_count = coding_frame_count - use_alt_ref;
660 group_of_picture->start_show_index = first_show_idx;
661 group_of_picture->start_coding_index = start_coding_index;
662 group_of_picture->first_is_key_frame = first_is_key_frame;
663 group_of_picture->use_alt_ref = use_alt_ref;
664 group_of_picture->last_gop_use_alt_ref = last_gop_use_alt_ref;
665
666 // We need to make a copy of start reference frame info because we
667 // use it to simulate the ref frame update.
668 RefFrameInfo ref_frame_info = start_ref_frame_info;
669
670 {
671 // First frame in the group of pictures. It's either key frame or show inter
672 // frame.
673 EncodeFrameInfo encode_frame_info;
674 // Set frame_type
675 if (first_is_key_frame) {
676 encode_frame_info.frame_type = kFrameTypeKey;
677 } else {
678 if (last_gop_use_alt_ref) {
679 encode_frame_info.frame_type = kFrameTypeOverlay;
680 } else {
681 encode_frame_info.frame_type = kFrameTypeGolden;
682 }
683 }
684
685 encode_frame_info.show_idx = first_show_idx;
686 encode_frame_info.coding_index = start_coding_index;
687
688 encode_frame_info.ref_frame_info = ref_frame_info;
689 PostUpdateRefFrameInfo(encode_frame_info.frame_type,
690 encode_frame_info.coding_index, &ref_frame_info);
691
692 group_of_picture->encode_frame_list.push_back(encode_frame_info);
693 }
694
695 const int show_frame_count = coding_frame_count - use_alt_ref;
696 if (use_alt_ref) {
697 // If there is alternate reference, it is always coded at the second place.
698 // Its show index (or timestamp) is at the last of this group
699 EncodeFrameInfo encode_frame_info;
700 encode_frame_info.frame_type = kFrameTypeAltRef;
701 encode_frame_info.show_idx = first_show_idx + show_frame_count;
702 encode_frame_info.coding_index = start_coding_index + 1;
703
704 encode_frame_info.ref_frame_info = ref_frame_info;
705 PostUpdateRefFrameInfo(encode_frame_info.frame_type,
706 encode_frame_info.coding_index, &ref_frame_info);
707
708 group_of_picture->encode_frame_list.push_back(encode_frame_info);
709 }
710
711 // Encode the rest show inter frames.
712 for (int i = 1; i < show_frame_count; ++i) {
713 EncodeFrameInfo encode_frame_info;
714 encode_frame_info.frame_type = kFrameTypeInter;
715 encode_frame_info.show_idx = first_show_idx + i;
716 encode_frame_info.coding_index = start_coding_index + use_alt_ref + i;
717
718 encode_frame_info.ref_frame_info = ref_frame_info;
719 PostUpdateRefFrameInfo(encode_frame_info.frame_type,
720 encode_frame_info.coding_index, &ref_frame_info);
721
722 group_of_picture->encode_frame_list.push_back(encode_frame_info);
723 }
724 }
725
726 // Gets group of picture information from VP9's decision, and update
727 // |group_of_picture| accordingly.
728 // This is called at the starting of encoding of each group of picture.
UpdateGroupOfPicture(const VP9_COMP * cpi,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)729 static void UpdateGroupOfPicture(const VP9_COMP *cpi, int start_coding_index,
730 const RefFrameInfo &start_ref_frame_info,
731 GroupOfPicture *group_of_picture) {
732 int first_is_key_frame;
733 int use_alt_ref;
734 int coding_frame_count;
735 int first_show_idx;
736 int last_gop_use_alt_ref;
737 vp9_get_next_group_of_picture(cpi, &first_is_key_frame, &use_alt_ref,
738 &coding_frame_count, &first_show_idx,
739 &last_gop_use_alt_ref);
740 SetGroupOfPicture(first_is_key_frame, use_alt_ref, coding_frame_count,
741 first_show_idx, last_gop_use_alt_ref, start_coding_index,
742 start_ref_frame_info, group_of_picture);
743 }
744
745 #define SET_STRUCT_VALUE(config, structure, ret, field) \
746 if (strcmp(config.name, #field) == 0) { \
747 structure->field = atoi(config.value); \
748 ret = 1; \
749 }
750
UpdateEncodeConfig(const EncodeConfig & config,VP9EncoderConfig * oxcf)751 static void UpdateEncodeConfig(const EncodeConfig &config,
752 VP9EncoderConfig *oxcf) {
753 int ret = 0;
754 SET_STRUCT_VALUE(config, oxcf, ret, key_freq);
755 SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmin_section);
756 SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmax_section);
757 SET_STRUCT_VALUE(config, oxcf, ret, under_shoot_pct);
758 SET_STRUCT_VALUE(config, oxcf, ret, over_shoot_pct);
759 SET_STRUCT_VALUE(config, oxcf, ret, max_threads);
760 SET_STRUCT_VALUE(config, oxcf, ret, frame_parallel_decoding_mode);
761 SET_STRUCT_VALUE(config, oxcf, ret, tile_columns);
762 SET_STRUCT_VALUE(config, oxcf, ret, arnr_max_frames);
763 SET_STRUCT_VALUE(config, oxcf, ret, arnr_strength);
764 SET_STRUCT_VALUE(config, oxcf, ret, lag_in_frames);
765 SET_STRUCT_VALUE(config, oxcf, ret, encode_breakout);
766 SET_STRUCT_VALUE(config, oxcf, ret, enable_tpl_model);
767 SET_STRUCT_VALUE(config, oxcf, ret, enable_auto_arf);
768 if (strcmp(config.name, "rc_mode") == 0) {
769 int rc_mode = atoi(config.value);
770 if (rc_mode >= VPX_VBR && rc_mode <= VPX_Q) {
771 oxcf->rc_mode = (enum vpx_rc_mode)rc_mode;
772 ret = 1;
773 } else {
774 fprintf(stderr, "Invalid rc_mode value: %d\n", rc_mode);
775 }
776 }
777 SET_STRUCT_VALUE(config, oxcf, ret, cq_level);
778 if (ret == 0) {
779 fprintf(stderr, "Ignored unsupported encode_config %s\n", config.name);
780 }
781 }
782
GetEncodeConfig(int frame_width,int frame_height,vpx_rational_t frame_rate,int target_bitrate,int encode_speed,vpx_enc_pass enc_pass,const std::vector<EncodeConfig> & encode_config_list)783 static VP9EncoderConfig GetEncodeConfig(
784 int frame_width, int frame_height, vpx_rational_t frame_rate,
785 int target_bitrate, int encode_speed, vpx_enc_pass enc_pass,
786 const std::vector<EncodeConfig> &encode_config_list) {
787 VP9EncoderConfig oxcf =
788 vp9_get_encoder_config(frame_width, frame_height, frame_rate,
789 target_bitrate, encode_speed, enc_pass);
790 for (const auto &config : encode_config_list) {
791 UpdateEncodeConfig(config, &oxcf);
792 }
793 if (enc_pass == VPX_RC_FIRST_PASS) {
794 oxcf.lag_in_frames = 0;
795 }
796 oxcf.use_simple_encode_api = 1;
797 return oxcf;
798 }
799
SimpleEncode(int frame_width,int frame_height,int frame_rate_num,int frame_rate_den,int target_bitrate,int num_frames,const char * infile_path,const char * outfile_path)800 SimpleEncode::SimpleEncode(int frame_width, int frame_height,
801 int frame_rate_num, int frame_rate_den,
802 int target_bitrate, int num_frames,
803 const char *infile_path, const char *outfile_path) {
804 impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
805 frame_width_ = frame_width;
806 frame_height_ = frame_height;
807 frame_rate_num_ = frame_rate_num;
808 frame_rate_den_ = frame_rate_den;
809 target_bitrate_ = target_bitrate;
810 num_frames_ = num_frames;
811 encode_speed_ = 0;
812
813 frame_coding_index_ = 0;
814 show_frame_count_ = 0;
815
816 key_frame_group_index_ = 0;
817 key_frame_group_size_ = 0;
818
819 // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
820 assert(infile_path != nullptr);
821 in_file_ = fopen(infile_path, "r");
822 if (outfile_path != nullptr) {
823 out_file_ = fopen(outfile_path, "w");
824 } else {
825 out_file_ = nullptr;
826 }
827 impl_ptr_->cpi = nullptr;
828 impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
829
830 InitRefFrameInfo(&ref_frame_info_);
831 }
832
SetEncodeSpeed(int encode_speed)833 void SimpleEncode::SetEncodeSpeed(int encode_speed) {
834 encode_speed_ = encode_speed;
835 }
836
SetEncodeConfig(const char * name,const char * value)837 StatusCode SimpleEncode::SetEncodeConfig(const char *name, const char *value) {
838 if (name == nullptr || value == nullptr) {
839 fprintf(stderr, "SetEncodeConfig: null pointer, name %p value %p\n", name,
840 value);
841 return StatusError;
842 }
843 EncodeConfig config;
844 snprintf(config.name, ENCODE_CONFIG_BUF_SIZE, "%s", name);
845 snprintf(config.value, ENCODE_CONFIG_BUF_SIZE, "%s", value);
846 impl_ptr_->encode_config_list.push_back(config);
847 return StatusOk;
848 }
849
DumpEncodeConfigs(int pass,FILE * fp)850 StatusCode SimpleEncode::DumpEncodeConfigs(int pass, FILE *fp) {
851 if (fp == nullptr) {
852 fprintf(stderr, "DumpEncodeConfigs: null pointer, fp %p\n", fp);
853 return StatusError;
854 }
855 vpx_enc_pass enc_pass;
856 if (pass == 1) {
857 enc_pass = VPX_RC_FIRST_PASS;
858 } else {
859 enc_pass = VPX_RC_LAST_PASS;
860 }
861 const vpx_rational_t frame_rate =
862 make_vpx_rational(frame_rate_num_, frame_rate_den_);
863 const VP9EncoderConfig oxcf =
864 GetEncodeConfig(frame_width_, frame_height_, frame_rate, target_bitrate_,
865 encode_speed_, enc_pass, impl_ptr_->encode_config_list);
866 vp9_dump_encoder_config(&oxcf, fp);
867 return StatusOk;
868 }
869
ComputeFirstPassStats()870 void SimpleEncode::ComputeFirstPassStats() {
871 vpx_rational_t frame_rate =
872 make_vpx_rational(frame_rate_num_, frame_rate_den_);
873 const VP9EncoderConfig oxcf = GetEncodeConfig(
874 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
875 VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);
876 impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
877 struct lookahead_ctx *lookahead = impl_ptr_->cpi->lookahead;
878 int i;
879 int use_highbitdepth = 0;
880 const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
881 const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
882 #if CONFIG_VP9_HIGHBITDEPTH
883 use_highbitdepth = impl_ptr_->cpi->common.use_highbitdepth;
884 #endif
885 vpx_image_t img;
886 vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
887 rewind(in_file_);
888 impl_ptr_->first_pass_stats.clear();
889 for (i = 0; i < num_frames_; ++i) {
890 assert(!vp9_lookahead_full(lookahead));
891 if (img_read(&img, in_file_)) {
892 int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
893 int64_t ts_start =
894 timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
895 int64_t ts_end =
896 timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
897 YV12_BUFFER_CONFIG sd;
898 image2yuvconfig(&img, &sd);
899 vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
900 {
901 int64_t time_stamp;
902 int64_t time_end;
903 int flush = 1; // Makes vp9_get_compressed_data process a frame
904 size_t size;
905 unsigned int frame_flags = 0;
906 ENCODE_FRAME_RESULT encode_frame_info;
907 vp9_init_encode_frame_result(&encode_frame_info);
908 // TODO(angiebird): Call vp9_first_pass directly
909 vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr,
910 &time_stamp, &time_end, flush,
911 &encode_frame_info);
912 // vp9_get_compressed_data only generates first pass stats not
913 // compresses data
914 assert(size == 0);
915 // Get vp9 first pass motion vector info.
916 std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
917 update_motion_vector_info(
918 impl_ptr_->cpi->fp_motion_vector_info, num_rows_16x16,
919 num_cols_16x16, mv_info.data(), kMotionVectorFullPixelPrecision);
920 fp_motion_vector_info_.push_back(mv_info);
921 }
922 impl_ptr_->first_pass_stats.push_back(
923 vp9_get_frame_stats(&impl_ptr_->cpi->twopass));
924 }
925 }
926 // TODO(angiebird): Store the total_stats apart form first_pass_stats
927 impl_ptr_->first_pass_stats.push_back(
928 vp9_get_total_stats(&impl_ptr_->cpi->twopass));
929 vp9_end_first_pass(impl_ptr_->cpi);
930
931 // Generate key_frame_map based on impl_ptr_->first_pass_stats.
932 key_frame_map_ = ComputeKeyFrameMap();
933
934 free_encoder(impl_ptr_->cpi);
935 impl_ptr_->cpi = nullptr;
936 rewind(in_file_);
937 vpx_img_free(&img);
938 }
939
ObserveFirstPassStats()940 std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
941 std::vector<std::vector<double>> output_stats;
942 // TODO(angiebird): This function make several assumptions of
943 // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
944 // last one. 2) The last entry of first_pass_stats is the total_stats.
945 // Change the code structure, so that we don't have to make these assumptions
946
947 // Note the last entry of first_pass_stats is the total_stats, we don't need
948 // it.
949 for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
950 double *buf_start =
951 reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
952 // We use - 1 here because the last member in FIRSTPASS_STATS is not double
953 double *buf_end =
954 buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
955 1;
956 std::vector<double> this_stats(buf_start, buf_end);
957 output_stats.push_back(this_stats);
958 }
959 return output_stats;
960 }
961
962 std::vector<std::vector<MotionVectorInfo>>
ObserveFirstPassMotionVectors()963 SimpleEncode::ObserveFirstPassMotionVectors() {
964 return fp_motion_vector_info_;
965 }
966
SetExternalGroupOfPicturesMap(int * gop_map,int gop_map_size)967 void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
968 int gop_map_size) {
969 for (int i = 0; i < gop_map_size; ++i) {
970 gop_map_.push_back(gop_map[i]);
971 }
972 // The following will check and modify gop_map_ to make sure the
973 // gop_map_ satisfies the constraints.
974 // 1) Each key frame position should be at the start of a gop.
975 // 2) The last gop should not use an alt ref.
976 assert(gop_map_.size() == key_frame_map_.size());
977 int last_gop_start = 0;
978 for (int i = 0; static_cast<size_t>(i) < gop_map_.size(); ++i) {
979 if (key_frame_map_[i] == 1 && gop_map_[i] == 0) {
980 fprintf(stderr, "Add an extra gop start at show_idx %d\n", i);
981 // Insert a gop start at key frame location.
982 gop_map_[i] |= kGopMapFlagStart;
983 gop_map_[i] |= kGopMapFlagUseAltRef;
984 }
985 if (gop_map_[i] & kGopMapFlagStart) {
986 last_gop_start = i;
987 }
988 }
989 if (gop_map_[last_gop_start] & kGopMapFlagUseAltRef) {
990 fprintf(stderr,
991 "Last group of pictures starting at show_idx %d shouldn't use alt "
992 "ref\n",
993 last_gop_start);
994 gop_map_[last_gop_start] &= ~kGopMapFlagUseAltRef;
995 }
996 }
997
ObserveExternalGroupOfPicturesMap()998 std::vector<int> SimpleEncode::ObserveExternalGroupOfPicturesMap() {
999 return gop_map_;
1000 }
1001
1002 template <typename T>
GetVectorData(const std::vector<T> & v)1003 T *GetVectorData(const std::vector<T> &v) {
1004 if (v.empty()) {
1005 return nullptr;
1006 }
1007 return const_cast<T *>(v.data());
1008 }
1009
GetGopCommand(const std::vector<int> & gop_map,int start_show_index)1010 static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,
1011 int start_show_index) {
1012 GOP_COMMAND gop_command;
1013 if (static_cast<size_t>(start_show_index) < gop_map.size()) {
1014 assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);
1015 int end_show_index = start_show_index + 1;
1016 // gop_map[end_show_index] & kGopMapFlagStart == 0 means this is
1017 // the start of a gop.
1018 while (static_cast<size_t>(end_show_index) < gop_map.size() &&
1019 (gop_map[end_show_index] & kGopMapFlagStart) == 0) {
1020 ++end_show_index;
1021 }
1022 const int show_frame_count = end_show_index - start_show_index;
1023 int use_alt_ref = (gop_map[start_show_index] & kGopMapFlagUseAltRef) != 0;
1024 if (static_cast<size_t>(end_show_index) == gop_map.size()) {
1025 // This is the last gop group, there must be no altref.
1026 use_alt_ref = 0;
1027 }
1028 gop_command_on(&gop_command, show_frame_count, use_alt_ref);
1029 } else {
1030 gop_command_off(&gop_command);
1031 }
1032 return gop_command;
1033 }
1034
StartEncode()1035 void SimpleEncode::StartEncode() {
1036 assert(impl_ptr_->first_pass_stats.size() > 0);
1037 vpx_rational_t frame_rate =
1038 make_vpx_rational(frame_rate_num_, frame_rate_den_);
1039 VP9EncoderConfig oxcf = GetEncodeConfig(
1040 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1041 VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1042
1043 vpx_fixed_buf_t stats;
1044 stats.buf = GetVectorData(impl_ptr_->first_pass_stats);
1045 stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
1046 impl_ptr_->first_pass_stats.size();
1047
1048 vp9_set_first_pass_stats(&oxcf, &stats);
1049 assert(impl_ptr_->cpi == nullptr);
1050 impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
1051 vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
1052 frame_height_, 1);
1053
1054 frame_coding_index_ = 0;
1055 show_frame_count_ = 0;
1056
1057 assert(impl_ptr_->cpi != nullptr);
1058 FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1059 unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1060 vp9_init_vizier_params(&impl_ptr_->cpi->twopass, screen_area);
1061
1062 UpdateKeyFrameGroup(show_frame_count_);
1063
1064 const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1065 encode_command_set_gop_command(&impl_ptr_->cpi->encode_command, gop_command);
1066 UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1067 &group_of_picture_);
1068 rewind(in_file_);
1069
1070 if (out_file_ != nullptr) {
1071 const char *fourcc = "VP90";
1072 // In SimpleEncode, we use time_base = 1 / TICKS_PER_SEC.
1073 // Based on that, the ivf_timestamp for each image is set to
1074 // show_idx * TICKS_PER_SEC / frame_rate
1075 // such that each image's actual timestamp in seconds can be computed as
1076 // ivf_timestamp * time_base == show_idx / frame_rate
1077 // TODO(angiebird): 1) Add unit test for ivf timestamp.
1078 // 2) Simplify the frame_rate setting process.
1079 vpx_rational_t time_base = make_vpx_rational(1, TICKS_PER_SEC);
1080 ivf_write_file_header_with_video_info(out_file_, *(const uint32_t *)fourcc,
1081 num_frames_, frame_width_,
1082 frame_height_, time_base);
1083 }
1084 }
1085
EndEncode()1086 void SimpleEncode::EndEncode() {
1087 free_encoder(impl_ptr_->cpi);
1088 impl_ptr_->cpi = nullptr;
1089 vpx_img_free(&impl_ptr_->tmp_img);
1090 rewind(in_file_);
1091 }
1092
UpdateKeyFrameGroup(int key_frame_show_index)1093 void SimpleEncode::UpdateKeyFrameGroup(int key_frame_show_index) {
1094 const VP9_COMP *cpi = impl_ptr_->cpi;
1095 key_frame_group_index_ = 0;
1096 key_frame_group_size_ = vp9_get_frames_to_next_key(
1097 &cpi->oxcf, &cpi->twopass, key_frame_show_index, cpi->rc.min_gf_interval);
1098 assert(key_frame_group_size_ > 0);
1099 // Init the reference frame info when a new key frame group appears.
1100 InitRefFrameInfo(&ref_frame_info_);
1101 }
1102
PostUpdateKeyFrameGroupIndex(FrameType frame_type)1103 void SimpleEncode::PostUpdateKeyFrameGroupIndex(FrameType frame_type) {
1104 if (frame_type != kFrameTypeAltRef) {
1105 // key_frame_group_index_ only counts show frames
1106 ++key_frame_group_index_;
1107 }
1108 }
1109
GetKeyFrameGroupSize() const1110 int SimpleEncode::GetKeyFrameGroupSize() const { return key_frame_group_size_; }
1111
ObserveGroupOfPicture() const1112 GroupOfPicture SimpleEncode::ObserveGroupOfPicture() const {
1113 return group_of_picture_;
1114 }
1115
GetNextEncodeFrameInfo() const1116 EncodeFrameInfo SimpleEncode::GetNextEncodeFrameInfo() const {
1117 return group_of_picture_
1118 .encode_frame_list[group_of_picture_.next_encode_frame_index];
1119 }
1120
PostUpdateState(const EncodeFrameResult & encode_frame_result)1121 void SimpleEncode::PostUpdateState(
1122 const EncodeFrameResult &encode_frame_result) {
1123 // This function needs to be called before the increament of
1124 // frame_coding_index_
1125 PostUpdateRefFrameInfo(encode_frame_result.frame_type, frame_coding_index_,
1126 &ref_frame_info_);
1127 ++frame_coding_index_;
1128 if (encode_frame_result.frame_type != kFrameTypeAltRef) {
1129 // Only kFrameTypeAltRef is not a show frame
1130 ++show_frame_count_;
1131 }
1132
1133 PostUpdateKeyFrameGroupIndex(encode_frame_result.frame_type);
1134 if (key_frame_group_index_ == key_frame_group_size_) {
1135 UpdateKeyFrameGroup(show_frame_count_);
1136 }
1137
1138 IncreaseGroupOfPictureIndex(&group_of_picture_);
1139 if (IsGroupOfPictureFinished(group_of_picture_)) {
1140 const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1141 encode_command_set_gop_command(&impl_ptr_->cpi->encode_command,
1142 gop_command);
1143 // This function needs to be called after ref_frame_info_ is updated
1144 // properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup().
1145 UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1146 &group_of_picture_);
1147 }
1148 }
1149
EncodeFrame(EncodeFrameResult * encode_frame_result)1150 void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
1151 VP9_COMP *cpi = impl_ptr_->cpi;
1152 struct lookahead_ctx *lookahead = cpi->lookahead;
1153 int use_highbitdepth = 0;
1154 #if CONFIG_VP9_HIGHBITDEPTH
1155 use_highbitdepth = cpi->common.use_highbitdepth;
1156 #endif
1157 // The lookahead's size is set to oxcf->lag_in_frames.
1158 // We want to fill lookahead to it's max capacity if possible so that the
1159 // encoder can construct alt ref frame in time.
1160 // In the other words, we hope vp9_get_compressed_data to encode a frame
1161 // every time in the function
1162 while (!vp9_lookahead_full(lookahead)) {
1163 // TODO(angiebird): Check whether we can move this file read logics to
1164 // lookahead
1165 if (img_read(&impl_ptr_->tmp_img, in_file_)) {
1166 int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
1167 int64_t ts_start =
1168 timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
1169 int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
1170 next_show_idx + 1);
1171 YV12_BUFFER_CONFIG sd;
1172 image2yuvconfig(&impl_ptr_->tmp_img, &sd);
1173 vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
1174 } else {
1175 break;
1176 }
1177 }
1178
1179 if (init_encode_frame_result(encode_frame_result, frame_width_, frame_height_,
1180 impl_ptr_->img_fmt)) {
1181 int64_t time_stamp;
1182 int64_t time_end;
1183 int flush = 1; // Make vp9_get_compressed_data encode a frame
1184 unsigned int frame_flags = 0;
1185 ENCODE_FRAME_RESULT encode_frame_info;
1186 vp9_init_encode_frame_result(&encode_frame_info);
1187 ImageBuffer_to_IMAGE_BUFFER(encode_frame_result->coded_frame,
1188 &encode_frame_info.coded_frame);
1189 vp9_get_compressed_data(cpi, &frame_flags,
1190 &encode_frame_result->coding_data_byte_size,
1191 encode_frame_result->coding_data.get(), &time_stamp,
1192 &time_end, flush, &encode_frame_info);
1193 if (out_file_ != nullptr) {
1194 ivf_write_frame_header(out_file_, time_stamp,
1195 encode_frame_result->coding_data_byte_size);
1196 fwrite(encode_frame_result->coding_data.get(), 1,
1197 encode_frame_result->coding_data_byte_size, out_file_);
1198 }
1199
1200 // vp9_get_compressed_data is expected to encode a frame every time, so the
1201 // data size should be greater than zero.
1202 if (encode_frame_result->coding_data_byte_size <= 0) {
1203 fprintf(stderr, "Coding data size <= 0\n");
1204 abort();
1205 }
1206 const size_t max_coding_data_byte_size =
1207 get_max_coding_data_byte_size(frame_width_, frame_height_);
1208 if (encode_frame_result->coding_data_byte_size >
1209 max_coding_data_byte_size) {
1210 fprintf(stderr, "Coding data size exceeds the maximum.\n");
1211 abort();
1212 }
1213
1214 const GroupOfPicture group_of_picture = this->ObserveGroupOfPicture();
1215 const int show_frame_count = group_of_picture.show_frame_count;
1216 update_encode_frame_result(encode_frame_result, show_frame_count,
1217 &encode_frame_info);
1218 PostUpdateState(*encode_frame_result);
1219 } else {
1220 // TODO(angiebird): Clean up encode_frame_result.
1221 fprintf(stderr, "init_encode_frame_result() failed.\n");
1222 this->EndEncode();
1223 }
1224 }
1225
EncodeFrameWithQuantizeIndex(EncodeFrameResult * encode_frame_result,int quantize_index)1226 void SimpleEncode::EncodeFrameWithQuantizeIndex(
1227 EncodeFrameResult *encode_frame_result, int quantize_index) {
1228 encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
1229 quantize_index);
1230 EncodeFrame(encode_frame_result);
1231 encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
1232 }
1233
EncodeFrameWithTargetFrameBits(EncodeFrameResult * encode_frame_result,int target_frame_bits,double percent_diff)1234 void SimpleEncode::EncodeFrameWithTargetFrameBits(
1235 EncodeFrameResult *encode_frame_result, int target_frame_bits,
1236 double percent_diff) {
1237 encode_command_set_target_frame_bits(&impl_ptr_->cpi->encode_command,
1238 target_frame_bits, percent_diff);
1239 EncodeFrame(encode_frame_result);
1240 encode_command_reset_target_frame_bits(&impl_ptr_->cpi->encode_command);
1241 }
1242
GetCodingFrameNumFromGopMap(const std::vector<int> & gop_map)1243 static int GetCodingFrameNumFromGopMap(const std::vector<int> &gop_map) {
1244 int start_show_index = 0;
1245 int coding_frame_count = 0;
1246 while (static_cast<size_t>(start_show_index) < gop_map.size()) {
1247 const GOP_COMMAND gop_command = GetGopCommand(gop_map, start_show_index);
1248 start_show_index += gop_command.show_frame_count;
1249 coding_frame_count += gop_command_coding_frame_count(&gop_command);
1250 }
1251 assert(static_cast<size_t>(start_show_index) == gop_map.size());
1252 return coding_frame_count;
1253 }
1254
GetCodingFrameNum() const1255 int SimpleEncode::GetCodingFrameNum() const {
1256 assert(impl_ptr_->first_pass_stats.size() > 0);
1257 if (gop_map_.size() > 0) {
1258 return GetCodingFrameNumFromGopMap(gop_map_);
1259 }
1260
1261 // These are the default settings for now.
1262 TWO_PASS twopass;
1263 const int multi_layer_arf = 0;
1264 const int allow_alt_ref = 1;
1265 vpx_rational_t frame_rate =
1266 make_vpx_rational(frame_rate_num_, frame_rate_den_);
1267 const VP9EncoderConfig oxcf = GetEncodeConfig(
1268 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1269 VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1270 FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1271 fps_init_first_pass_info(&twopass.first_pass_info,
1272 GetVectorData(impl_ptr_->first_pass_stats),
1273 num_frames_);
1274 unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1275 vp9_init_vizier_params(&twopass, screen_area);
1276 return vp9_get_coding_frame_num(&oxcf, &twopass, &frame_info, multi_layer_arf,
1277 allow_alt_ref);
1278 }
1279
ComputeKeyFrameMap() const1280 std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {
1281 // The last entry of first_pass_stats is the overall stats.
1282 assert(impl_ptr_->first_pass_stats.size() ==
1283 static_cast<size_t>(num_frames_) + 1);
1284 vpx_rational_t frame_rate =
1285 make_vpx_rational(frame_rate_num_, frame_rate_den_);
1286 const VP9EncoderConfig oxcf = GetEncodeConfig(
1287 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1288 VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1289 TWO_PASS twopass;
1290 fps_init_first_pass_info(&twopass.first_pass_info,
1291 GetVectorData(impl_ptr_->first_pass_stats),
1292 num_frames_);
1293 std::vector<int> key_frame_map(num_frames_, 0);
1294 vp9_get_key_frame_map(&oxcf, &twopass, GetVectorData(key_frame_map));
1295 return key_frame_map;
1296 }
1297
ObserveKeyFrameMap() const1298 std::vector<int> SimpleEncode::ObserveKeyFrameMap() const {
1299 return key_frame_map_;
1300 }
1301
GetFramePixelCount() const1302 uint64_t SimpleEncode::GetFramePixelCount() const {
1303 assert(frame_width_ % 2 == 0);
1304 assert(frame_height_ % 2 == 0);
1305 switch (impl_ptr_->img_fmt) {
1306 case VPX_IMG_FMT_I420: return frame_width_ * frame_height_ * 3 / 2;
1307 case VPX_IMG_FMT_I422: return frame_width_ * frame_height_ * 2;
1308 case VPX_IMG_FMT_I444: return frame_width_ * frame_height_ * 3;
1309 case VPX_IMG_FMT_I440: return frame_width_ * frame_height_ * 2;
1310 case VPX_IMG_FMT_I42016: return frame_width_ * frame_height_ * 3 / 2;
1311 case VPX_IMG_FMT_I42216: return frame_width_ * frame_height_ * 2;
1312 case VPX_IMG_FMT_I44416: return frame_width_ * frame_height_ * 3;
1313 case VPX_IMG_FMT_I44016: return frame_width_ * frame_height_ * 2;
1314 default: return 0;
1315 }
1316 }
1317
~SimpleEncode()1318 SimpleEncode::~SimpleEncode() {
1319 if (in_file_ != nullptr) {
1320 fclose(in_file_);
1321 }
1322 if (out_file_ != nullptr) {
1323 fclose(out_file_);
1324 }
1325 }
1326
1327 } // namespace vp9
1328