1 /*
2 * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <memory>
12 #include <vector>
13 #include "./ivfenc.h"
14 #include "vp9/common/vp9_entropymode.h"
15 #include "vp9/common/vp9_enums.h"
16 #include "vp9/common/vp9_onyxc_int.h"
17 #include "vp9/vp9_iface_common.h"
18 #include "vp9/encoder/vp9_encoder.h"
19 #include "vp9/encoder/vp9_firstpass.h"
20 #include "vp9/simple_encode.h"
21 #include "vp9/vp9_cx_iface.h"
22
23 namespace vp9 {
24
get_plane_height(vpx_img_fmt_t img_fmt,int frame_height,int plane)25 static int get_plane_height(vpx_img_fmt_t img_fmt, int frame_height,
26 int plane) {
27 assert(plane < 3);
28 if (plane == 0) {
29 return frame_height;
30 }
31 switch (img_fmt) {
32 case VPX_IMG_FMT_I420:
33 case VPX_IMG_FMT_I440:
34 case VPX_IMG_FMT_YV12:
35 case VPX_IMG_FMT_I42016:
36 case VPX_IMG_FMT_I44016: return (frame_height + 1) >> 1;
37 default: return frame_height;
38 }
39 }
40
get_plane_width(vpx_img_fmt_t img_fmt,int frame_width,int plane)41 static int get_plane_width(vpx_img_fmt_t img_fmt, int frame_width, int plane) {
42 assert(plane < 3);
43 if (plane == 0) {
44 return frame_width;
45 }
46 switch (img_fmt) {
47 case VPX_IMG_FMT_I420:
48 case VPX_IMG_FMT_YV12:
49 case VPX_IMG_FMT_I422:
50 case VPX_IMG_FMT_I42016:
51 case VPX_IMG_FMT_I42216: return (frame_width + 1) >> 1;
52 default: return frame_width;
53 }
54 }
55
56 // TODO(angiebird): Merge this function with vpx_img_plane_width()
img_plane_width(const vpx_image_t * img,int plane)57 static int img_plane_width(const vpx_image_t *img, int plane) {
58 if (plane > 0 && img->x_chroma_shift > 0)
59 return (img->d_w + 1) >> img->x_chroma_shift;
60 else
61 return img->d_w;
62 }
63
64 // TODO(angiebird): Merge this function with vpx_img_plane_height()
img_plane_height(const vpx_image_t * img,int plane)65 static int img_plane_height(const vpx_image_t *img, int plane) {
66 if (plane > 0 && img->y_chroma_shift > 0)
67 return (img->d_h + 1) >> img->y_chroma_shift;
68 else
69 return img->d_h;
70 }
71
72 // TODO(angiebird): Merge this function with vpx_img_read()
img_read(vpx_image_t * img,FILE * file)73 static int img_read(vpx_image_t *img, FILE *file) {
74 int plane;
75
76 for (plane = 0; plane < 3; ++plane) {
77 unsigned char *buf = img->planes[plane];
78 const int stride = img->stride[plane];
79 const int w = img_plane_width(img, plane) *
80 ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
81 const int h = img_plane_height(img, plane);
82 int y;
83
84 for (y = 0; y < h; ++y) {
85 if (fread(buf, 1, w, file) != (size_t)w) return 0;
86 buf += stride;
87 }
88 }
89
90 return 1;
91 }
92
93 // Assume every config in VP9EncoderConfig is less than 100 characters.
94 #define ENCODE_CONFIG_BUF_SIZE 100
95 struct EncodeConfig {
96 char name[ENCODE_CONFIG_BUF_SIZE];
97 char value[ENCODE_CONFIG_BUF_SIZE];
98 };
99
100 class SimpleEncode::EncodeImpl {
101 public:
102 VP9_COMP *cpi;
103 vpx_img_fmt_t img_fmt;
104 vpx_image_t tmp_img;
105 std::vector<FIRSTPASS_STATS> first_pass_stats;
106 std::vector<EncodeConfig> encode_config_list;
107 };
108
init_encoder(const VP9EncoderConfig * oxcf,vpx_img_fmt_t img_fmt)109 static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
110 vpx_img_fmt_t img_fmt) {
111 VP9_COMP *cpi;
112 BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
113 if (!buffer_pool) return NULL;
114 vp9_initialize_enc();
115 cpi = vp9_create_compressor(oxcf, buffer_pool);
116 vp9_update_compressor_with_img_fmt(cpi, img_fmt);
117 return cpi;
118 }
119
free_encoder(VP9_COMP * cpi)120 static void free_encoder(VP9_COMP *cpi) {
121 BufferPool *buffer_pool = cpi->common.buffer_pool;
122 vp9_remove_compressor(cpi);
123 // buffer_pool needs to be free after cpi because buffer_pool contains
124 // allocated buffers that will be free in vp9_remove_compressor()
125 vpx_free(buffer_pool);
126 }
127
make_vpx_rational(int num,int den)128 static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
129 vpx_rational_t v;
130 v.num = num;
131 v.den = den;
132 return v;
133 }
134
135 static INLINE FrameType
get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type)136 get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
137 switch (update_type) {
138 case KF_UPDATE: return kFrameTypeKey;
139 case ARF_UPDATE: return kFrameTypeAltRef;
140 case GF_UPDATE: return kFrameTypeGolden;
141 case OVERLAY_UPDATE: return kFrameTypeOverlay;
142 case LF_UPDATE: return kFrameTypeInter;
143 default:
144 fprintf(stderr, "Unsupported update_type %d\n", update_type);
145 abort();
146 return kFrameTypeInter;
147 }
148 }
149
update_partition_info(const PARTITION_INFO * input_partition_info,const int num_rows_4x4,const int num_cols_4x4,PartitionInfo * output_partition_info)150 static void update_partition_info(const PARTITION_INFO *input_partition_info,
151 const int num_rows_4x4,
152 const int num_cols_4x4,
153 PartitionInfo *output_partition_info) {
154 const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
155 for (int i = 0; i < num_units_4x4; ++i) {
156 output_partition_info[i].row = input_partition_info[i].row;
157 output_partition_info[i].column = input_partition_info[i].column;
158 output_partition_info[i].row_start = input_partition_info[i].row_start;
159 output_partition_info[i].column_start =
160 input_partition_info[i].column_start;
161 output_partition_info[i].width = input_partition_info[i].width;
162 output_partition_info[i].height = input_partition_info[i].height;
163 }
164 }
165
166 // translate MV_REFERENCE_FRAME to RefFrameType
mv_ref_frame_to_ref_frame_type(MV_REFERENCE_FRAME mv_ref_frame)167 static RefFrameType mv_ref_frame_to_ref_frame_type(
168 MV_REFERENCE_FRAME mv_ref_frame) {
169 switch (mv_ref_frame) {
170 case LAST_FRAME: return kRefFrameTypeLast;
171 case GOLDEN_FRAME: return kRefFrameTypePast;
172 case ALTREF_FRAME: return kRefFrameTypeFuture;
173 default: return kRefFrameTypeNone;
174 }
175 }
176
update_motion_vector_info(const MOTION_VECTOR_INFO * input_motion_vector_info,const int num_rows_4x4,const int num_cols_4x4,MotionVectorInfo * output_motion_vector_info,int motion_vector_scale)177 static void update_motion_vector_info(
178 const MOTION_VECTOR_INFO *input_motion_vector_info, const int num_rows_4x4,
179 const int num_cols_4x4, MotionVectorInfo *output_motion_vector_info,
180 int motion_vector_scale) {
181 const int num_units_4x4 = num_rows_4x4 * num_cols_4x4;
182 for (int i = 0; i < num_units_4x4; ++i) {
183 const MV_REFERENCE_FRAME *in_ref_frame =
184 input_motion_vector_info[i].ref_frame;
185 output_motion_vector_info[i].mv_count =
186 (in_ref_frame[0] == INTRA_FRAME) ? 0
187 : ((in_ref_frame[1] == NONE) ? 1 : 2);
188 if (in_ref_frame[0] == NONE) {
189 fprintf(stderr, "in_ref_frame[0] shouldn't be NONE\n");
190 abort();
191 }
192 output_motion_vector_info[i].ref_frame[0] =
193 mv_ref_frame_to_ref_frame_type(in_ref_frame[0]);
194 output_motion_vector_info[i].ref_frame[1] =
195 mv_ref_frame_to_ref_frame_type(in_ref_frame[1]);
196 output_motion_vector_info[i].mv_row[0] =
197 (double)input_motion_vector_info[i].mv[0].as_mv.row /
198 motion_vector_scale;
199 output_motion_vector_info[i].mv_column[0] =
200 (double)input_motion_vector_info[i].mv[0].as_mv.col /
201 motion_vector_scale;
202 output_motion_vector_info[i].mv_row[1] =
203 (double)input_motion_vector_info[i].mv[1].as_mv.row /
204 motion_vector_scale;
205 output_motion_vector_info[i].mv_column[1] =
206 (double)input_motion_vector_info[i].mv[1].as_mv.col /
207 motion_vector_scale;
208 }
209 }
210
update_tpl_stats_info(const TplDepStats * input_tpl_stats_info,const int show_frame_count,TplStatsInfo * output_tpl_stats_info)211 static void update_tpl_stats_info(const TplDepStats *input_tpl_stats_info,
212 const int show_frame_count,
213 TplStatsInfo *output_tpl_stats_info) {
214 int frame_idx;
215 for (frame_idx = 0; frame_idx < show_frame_count; ++frame_idx) {
216 output_tpl_stats_info[frame_idx].intra_cost =
217 input_tpl_stats_info[frame_idx].intra_cost;
218 output_tpl_stats_info[frame_idx].inter_cost =
219 input_tpl_stats_info[frame_idx].inter_cost;
220 output_tpl_stats_info[frame_idx].mc_flow =
221 input_tpl_stats_info[frame_idx].mc_flow;
222 output_tpl_stats_info[frame_idx].mc_dep_cost =
223 input_tpl_stats_info[frame_idx].mc_dep_cost;
224 output_tpl_stats_info[frame_idx].mc_ref_cost =
225 input_tpl_stats_info[frame_idx].mc_ref_cost;
226 }
227 }
228
update_frame_counts(const FRAME_COUNTS * input_counts,FrameCounts * output_counts)229 static void update_frame_counts(const FRAME_COUNTS *input_counts,
230 FrameCounts *output_counts) {
231 // Init array sizes.
232 output_counts->y_mode.resize(BLOCK_SIZE_GROUPS);
233 for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
234 output_counts->y_mode[i].resize(INTRA_MODES);
235 }
236
237 output_counts->uv_mode.resize(INTRA_MODES);
238 for (int i = 0; i < INTRA_MODES; ++i) {
239 output_counts->uv_mode[i].resize(INTRA_MODES);
240 }
241
242 output_counts->partition.resize(PARTITION_CONTEXTS);
243 for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
244 output_counts->partition[i].resize(PARTITION_TYPES);
245 }
246
247 output_counts->coef.resize(TX_SIZES);
248 output_counts->eob_branch.resize(TX_SIZES);
249 for (int i = 0; i < TX_SIZES; ++i) {
250 output_counts->coef[i].resize(PLANE_TYPES);
251 output_counts->eob_branch[i].resize(PLANE_TYPES);
252 for (int j = 0; j < PLANE_TYPES; ++j) {
253 output_counts->coef[i][j].resize(REF_TYPES);
254 output_counts->eob_branch[i][j].resize(REF_TYPES);
255 for (int k = 0; k < REF_TYPES; ++k) {
256 output_counts->coef[i][j][k].resize(COEF_BANDS);
257 output_counts->eob_branch[i][j][k].resize(COEF_BANDS);
258 for (int l = 0; l < COEF_BANDS; ++l) {
259 output_counts->coef[i][j][k][l].resize(COEFF_CONTEXTS);
260 output_counts->eob_branch[i][j][k][l].resize(COEFF_CONTEXTS);
261 for (int m = 0; m < COEFF_CONTEXTS; ++m) {
262 output_counts->coef[i][j][k][l][m].resize(UNCONSTRAINED_NODES + 1);
263 }
264 }
265 }
266 }
267 }
268
269 output_counts->switchable_interp.resize(SWITCHABLE_FILTER_CONTEXTS);
270 for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
271 output_counts->switchable_interp[i].resize(SWITCHABLE_FILTERS);
272 }
273
274 output_counts->inter_mode.resize(INTER_MODE_CONTEXTS);
275 for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
276 output_counts->inter_mode[i].resize(INTER_MODES);
277 }
278
279 output_counts->intra_inter.resize(INTRA_INTER_CONTEXTS);
280 for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
281 output_counts->intra_inter[i].resize(2);
282 }
283
284 output_counts->comp_inter.resize(COMP_INTER_CONTEXTS);
285 for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
286 output_counts->comp_inter[i].resize(2);
287 }
288
289 output_counts->single_ref.resize(REF_CONTEXTS);
290 for (int i = 0; i < REF_CONTEXTS; ++i) {
291 output_counts->single_ref[i].resize(2);
292 for (int j = 0; j < 2; ++j) {
293 output_counts->single_ref[i][j].resize(2);
294 }
295 }
296
297 output_counts->comp_ref.resize(REF_CONTEXTS);
298 for (int i = 0; i < REF_CONTEXTS; ++i) {
299 output_counts->comp_ref[i].resize(2);
300 }
301
302 output_counts->skip.resize(SKIP_CONTEXTS);
303 for (int i = 0; i < SKIP_CONTEXTS; ++i) {
304 output_counts->skip[i].resize(2);
305 }
306
307 output_counts->tx.p32x32.resize(TX_SIZE_CONTEXTS);
308 output_counts->tx.p16x16.resize(TX_SIZE_CONTEXTS);
309 output_counts->tx.p8x8.resize(TX_SIZE_CONTEXTS);
310 for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
311 output_counts->tx.p32x32[i].resize(TX_SIZES);
312 output_counts->tx.p16x16[i].resize(TX_SIZES - 1);
313 output_counts->tx.p8x8[i].resize(TX_SIZES - 2);
314 }
315 output_counts->tx.tx_totals.resize(TX_SIZES);
316
317 output_counts->mv.joints.resize(MV_JOINTS);
318 output_counts->mv.comps.resize(2);
319 for (int i = 0; i < 2; ++i) {
320 output_counts->mv.comps[i].sign.resize(2);
321 output_counts->mv.comps[i].classes.resize(MV_CLASSES);
322 output_counts->mv.comps[i].class0.resize(CLASS0_SIZE);
323 output_counts->mv.comps[i].bits.resize(MV_OFFSET_BITS);
324 for (int j = 0; j < MV_OFFSET_BITS; ++j) {
325 output_counts->mv.comps[i].bits[j].resize(2);
326 }
327 output_counts->mv.comps[i].class0_fp.resize(CLASS0_SIZE);
328 for (int j = 0; j < CLASS0_SIZE; ++j) {
329 output_counts->mv.comps[i].class0_fp[j].resize(MV_FP_SIZE);
330 }
331 output_counts->mv.comps[i].fp.resize(MV_FP_SIZE);
332 output_counts->mv.comps[i].class0_hp.resize(2);
333 output_counts->mv.comps[i].hp.resize(2);
334 }
335
336 // Populate counts.
337 for (int i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
338 for (int j = 0; j < INTRA_MODES; ++j) {
339 output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
340 }
341 }
342 for (int i = 0; i < INTRA_MODES; ++i) {
343 for (int j = 0; j < INTRA_MODES; ++j) {
344 output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
345 }
346 }
347 for (int i = 0; i < PARTITION_CONTEXTS; ++i) {
348 for (int j = 0; j < PARTITION_TYPES; ++j) {
349 output_counts->partition[i][j] = input_counts->partition[i][j];
350 }
351 }
352 for (int i = 0; i < TX_SIZES; ++i) {
353 for (int j = 0; j < PLANE_TYPES; ++j) {
354 for (int k = 0; k < REF_TYPES; ++k) {
355 for (int l = 0; l < COEF_BANDS; ++l) {
356 for (int m = 0; m < COEFF_CONTEXTS; ++m) {
357 output_counts->eob_branch[i][j][k][l][m] =
358 input_counts->eob_branch[i][j][k][l][m];
359 for (int n = 0; n < UNCONSTRAINED_NODES + 1; n++) {
360 output_counts->coef[i][j][k][l][m][n] =
361 input_counts->coef[i][j][k][l][m][n];
362 }
363 }
364 }
365 }
366 }
367 }
368 for (int i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
369 for (int j = 0; j < SWITCHABLE_FILTERS; ++j) {
370 output_counts->switchable_interp[i][j] =
371 input_counts->switchable_interp[i][j];
372 }
373 }
374 for (int i = 0; i < INTER_MODE_CONTEXTS; ++i) {
375 for (int j = 0; j < INTER_MODES; ++j) {
376 output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
377 }
378 }
379 for (int i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
380 for (int j = 0; j < 2; ++j) {
381 output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
382 }
383 }
384 for (int i = 0; i < COMP_INTER_CONTEXTS; ++i) {
385 for (int j = 0; j < 2; ++j) {
386 output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
387 }
388 }
389 for (int i = 0; i < REF_CONTEXTS; ++i) {
390 for (int j = 0; j < 2; ++j) {
391 for (int k = 0; k < 2; ++k) {
392 output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
393 }
394 }
395 }
396 for (int i = 0; i < REF_CONTEXTS; ++i) {
397 for (int j = 0; j < 2; ++j) {
398 output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
399 }
400 }
401 for (int i = 0; i < SKIP_CONTEXTS; ++i) {
402 for (int j = 0; j < 2; ++j) {
403 output_counts->skip[i][j] = input_counts->skip[i][j];
404 }
405 }
406 for (int i = 0; i < TX_SIZE_CONTEXTS; i++) {
407 for (int j = 0; j < TX_SIZES; j++) {
408 output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
409 }
410 for (int j = 0; j < TX_SIZES - 1; j++) {
411 output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
412 }
413 for (int j = 0; j < TX_SIZES - 2; j++) {
414 output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
415 }
416 }
417 for (int i = 0; i < TX_SIZES; i++) {
418 output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
419 }
420 for (int i = 0; i < MV_JOINTS; i++) {
421 output_counts->mv.joints[i] = input_counts->mv.joints[i];
422 }
423 for (int k = 0; k < 2; k++) {
424 const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
425 for (int i = 0; i < 2; i++) {
426 output_counts->mv.comps[k].sign[i] = comps_t->sign[i];
427 output_counts->mv.comps[k].class0_hp[i] = comps_t->class0_hp[i];
428 output_counts->mv.comps[k].hp[i] = comps_t->hp[i];
429 }
430 for (int i = 0; i < MV_CLASSES; i++) {
431 output_counts->mv.comps[k].classes[i] = comps_t->classes[i];
432 }
433 for (int i = 0; i < CLASS0_SIZE; i++) {
434 output_counts->mv.comps[k].class0[i] = comps_t->class0[i];
435 for (int j = 0; j < MV_FP_SIZE; j++) {
436 output_counts->mv.comps[k].class0_fp[i][j] = comps_t->class0_fp[i][j];
437 }
438 }
439 for (int i = 0; i < MV_OFFSET_BITS; i++) {
440 for (int j = 0; j < 2; j++) {
441 output_counts->mv.comps[k].bits[i][j] = comps_t->bits[i][j];
442 }
443 }
444 for (int i = 0; i < MV_FP_SIZE; i++) {
445 output_counts->mv.comps[k].fp[i] = comps_t->fp[i];
446 }
447 }
448 }
449
output_image_buffer(const ImageBuffer & image_buffer,std::FILE * out_file)450 void output_image_buffer(const ImageBuffer &image_buffer, std::FILE *out_file) {
451 for (int plane = 0; plane < 3; ++plane) {
452 const int w = image_buffer.plane_width[plane];
453 const int h = image_buffer.plane_height[plane];
454 const uint8_t *buf = image_buffer.plane_buffer[plane].get();
455 fprintf(out_file, "%d %d\n", h, w);
456 for (int i = 0; i < w * h; ++i) {
457 fprintf(out_file, "%d ", (int)buf[i]);
458 }
459 fprintf(out_file, "\n");
460 }
461 }
462
init_image_buffer(ImageBuffer * image_buffer,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)463 static bool init_image_buffer(ImageBuffer *image_buffer, int frame_width,
464 int frame_height, vpx_img_fmt_t img_fmt) {
465 for (int plane = 0; plane < 3; ++plane) {
466 const int w = get_plane_width(img_fmt, frame_width, plane);
467 const int h = get_plane_height(img_fmt, frame_height, plane);
468 image_buffer->plane_width[plane] = w;
469 image_buffer->plane_height[plane] = h;
470 image_buffer->plane_buffer[plane].reset(new (std::nothrow) uint8_t[w * h]);
471 if (image_buffer->plane_buffer[plane].get() == nullptr) {
472 return false;
473 }
474 }
475 return true;
476 }
477
ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer & image_buffer,IMAGE_BUFFER * image_buffer_c)478 static void ImageBuffer_to_IMAGE_BUFFER(const ImageBuffer &image_buffer,
479 IMAGE_BUFFER *image_buffer_c) {
480 image_buffer_c->allocated = 1;
481 for (int plane = 0; plane < 3; ++plane) {
482 image_buffer_c->plane_width[plane] = image_buffer.plane_width[plane];
483 image_buffer_c->plane_height[plane] = image_buffer.plane_height[plane];
484 image_buffer_c->plane_buffer[plane] =
485 image_buffer.plane_buffer[plane].get();
486 }
487 }
488
get_max_coding_data_byte_size(int frame_width,int frame_height)489 static size_t get_max_coding_data_byte_size(int frame_width, int frame_height) {
490 return frame_width * frame_height * 3;
491 }
492
init_encode_frame_result(EncodeFrameResult * encode_frame_result,int frame_width,int frame_height,vpx_img_fmt_t img_fmt)493 static bool init_encode_frame_result(EncodeFrameResult *encode_frame_result,
494 int frame_width, int frame_height,
495 vpx_img_fmt_t img_fmt) {
496 const size_t max_coding_data_byte_size =
497 get_max_coding_data_byte_size(frame_width, frame_height);
498
499 encode_frame_result->coding_data.reset(
500 new (std::nothrow) uint8_t[max_coding_data_byte_size]);
501
502 encode_frame_result->num_rows_4x4 = get_num_unit_4x4(frame_height);
503 encode_frame_result->num_cols_4x4 = get_num_unit_4x4(frame_width);
504 encode_frame_result->partition_info.resize(encode_frame_result->num_rows_4x4 *
505 encode_frame_result->num_cols_4x4);
506 encode_frame_result->motion_vector_info.resize(
507 encode_frame_result->num_rows_4x4 * encode_frame_result->num_cols_4x4);
508 encode_frame_result->tpl_stats_info.resize(MAX_LAG_BUFFERS);
509
510 if (encode_frame_result->coding_data.get() == nullptr) {
511 return false;
512 }
513 return init_image_buffer(&encode_frame_result->coded_frame, frame_width,
514 frame_height, img_fmt);
515 }
516
encode_frame_result_update_rq_history(const RATE_QINDEX_HISTORY * rq_history,EncodeFrameResult * encode_frame_result)517 static void encode_frame_result_update_rq_history(
518 const RATE_QINDEX_HISTORY *rq_history,
519 EncodeFrameResult *encode_frame_result) {
520 encode_frame_result->recode_count = rq_history->recode_count;
521 for (int i = 0; i < encode_frame_result->recode_count; ++i) {
522 const int q_index = rq_history->q_index_history[i];
523 const int rate = rq_history->rate_history[i];
524 encode_frame_result->q_index_history.push_back(q_index);
525 encode_frame_result->rate_history.push_back(rate);
526 }
527 }
528
update_encode_frame_result(EncodeFrameResult * encode_frame_result,const int show_frame_count,const ENCODE_FRAME_RESULT * encode_frame_info)529 static void update_encode_frame_result(
530 EncodeFrameResult *encode_frame_result, const int show_frame_count,
531 const ENCODE_FRAME_RESULT *encode_frame_info) {
532 encode_frame_result->coding_data_bit_size =
533 encode_frame_result->coding_data_byte_size * 8;
534 encode_frame_result->show_idx = encode_frame_info->show_idx;
535 encode_frame_result->coding_idx = encode_frame_info->frame_coding_index;
536 assert(kRefFrameTypeMax == MAX_INTER_REF_FRAMES);
537 for (int i = 0; i < kRefFrameTypeMax; ++i) {
538 encode_frame_result->ref_frame_info.coding_indexes[i] =
539 encode_frame_info->ref_frame_coding_indexes[i];
540 encode_frame_result->ref_frame_info.valid_list[i] =
541 encode_frame_info->ref_frame_valid_list[i];
542 }
543 encode_frame_result->frame_type =
544 get_frame_type_from_update_type(encode_frame_info->update_type);
545 encode_frame_result->psnr = encode_frame_info->psnr;
546 encode_frame_result->sse = encode_frame_info->sse;
547 encode_frame_result->quantize_index = encode_frame_info->quantize_index;
548 update_partition_info(encode_frame_info->partition_info,
549 encode_frame_result->num_rows_4x4,
550 encode_frame_result->num_cols_4x4,
551 &encode_frame_result->partition_info[0]);
552 update_motion_vector_info(encode_frame_info->motion_vector_info,
553 encode_frame_result->num_rows_4x4,
554 encode_frame_result->num_cols_4x4,
555 &encode_frame_result->motion_vector_info[0],
556 kMotionVectorSubPixelPrecision);
557 update_frame_counts(&encode_frame_info->frame_counts,
558 &encode_frame_result->frame_counts);
559 if (encode_frame_result->frame_type == kFrameTypeAltRef) {
560 update_tpl_stats_info(encode_frame_info->tpl_stats_info, show_frame_count,
561 &encode_frame_result->tpl_stats_info[0]);
562 }
563 encode_frame_result_update_rq_history(&encode_frame_info->rq_history,
564 encode_frame_result);
565 }
566
IncreaseGroupOfPictureIndex(GroupOfPicture * group_of_picture)567 static void IncreaseGroupOfPictureIndex(GroupOfPicture *group_of_picture) {
568 ++group_of_picture->next_encode_frame_index;
569 }
570
IsGroupOfPictureFinished(const GroupOfPicture & group_of_picture)571 static int IsGroupOfPictureFinished(const GroupOfPicture &group_of_picture) {
572 return static_cast<size_t>(group_of_picture.next_encode_frame_index) ==
573 group_of_picture.encode_frame_list.size();
574 }
575
operator ==(const RefFrameInfo & a,const RefFrameInfo & b)576 bool operator==(const RefFrameInfo &a, const RefFrameInfo &b) {
577 bool match = true;
578 for (int i = 0; i < kRefFrameTypeMax; ++i) {
579 match &= a.coding_indexes[i] == b.coding_indexes[i];
580 match &= a.valid_list[i] == b.valid_list[i];
581 }
582 return match;
583 }
584
InitRefFrameInfo(RefFrameInfo * ref_frame_info)585 static void InitRefFrameInfo(RefFrameInfo *ref_frame_info) {
586 for (int i = 0; i < kRefFrameTypeMax; ++i) {
587 ref_frame_info->coding_indexes[i] = -1;
588 ref_frame_info->valid_list[i] = 0;
589 }
590 }
591
592 // After finishing coding a frame, this function will update the coded frame
593 // into the ref_frame_info based on the frame_type and the coding_index.
PostUpdateRefFrameInfo(FrameType frame_type,int frame_coding_index,RefFrameInfo * ref_frame_info)594 static void PostUpdateRefFrameInfo(FrameType frame_type, int frame_coding_index,
595 RefFrameInfo *ref_frame_info) {
596 // This part is written based on the logics in vp9_configure_buffer_updates()
597 // and update_ref_frames()
598 int *ref_frame_coding_indexes = ref_frame_info->coding_indexes;
599 switch (frame_type) {
600 case kFrameTypeKey:
601 ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
602 ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
603 ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
604 break;
605 case kFrameTypeInter:
606 ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
607 break;
608 case kFrameTypeAltRef:
609 ref_frame_coding_indexes[kRefFrameTypeFuture] = frame_coding_index;
610 break;
611 case kFrameTypeOverlay:
612 // Reserve the past coding_index in the future slot. This logic is from
613 // update_ref_frames() with condition vp9_preserve_existing_gf() == 1
614 // TODO(angiebird): Invetegate why we need this.
615 ref_frame_coding_indexes[kRefFrameTypeFuture] =
616 ref_frame_coding_indexes[kRefFrameTypePast];
617 ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
618 break;
619 case kFrameTypeGolden:
620 ref_frame_coding_indexes[kRefFrameTypePast] = frame_coding_index;
621 ref_frame_coding_indexes[kRefFrameTypeLast] = frame_coding_index;
622 break;
623 }
624
625 // This part is written based on the logics in get_ref_frame_flags() but we
626 // rename the flags alt, golden to future, past respectively. Mark
627 // non-duplicated reference frames as valid. The priorities are
628 // kRefFrameTypeLast > kRefFrameTypePast > kRefFrameTypeFuture.
629 const int last_index = ref_frame_coding_indexes[kRefFrameTypeLast];
630 const int past_index = ref_frame_coding_indexes[kRefFrameTypePast];
631 const int future_index = ref_frame_coding_indexes[kRefFrameTypeFuture];
632
633 int *ref_frame_valid_list = ref_frame_info->valid_list;
634 for (int ref_frame_idx = 0; ref_frame_idx < kRefFrameTypeMax;
635 ++ref_frame_idx) {
636 ref_frame_valid_list[ref_frame_idx] = 1;
637 }
638
639 if (past_index == last_index) {
640 ref_frame_valid_list[kRefFrameTypePast] = 0;
641 }
642
643 if (future_index == last_index) {
644 ref_frame_valid_list[kRefFrameTypeFuture] = 0;
645 }
646
647 if (future_index == past_index) {
648 ref_frame_valid_list[kRefFrameTypeFuture] = 0;
649 }
650 }
651
SetGroupOfPicture(int first_is_key_frame,int use_alt_ref,int coding_frame_count,int first_show_idx,int last_gop_use_alt_ref,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)652 static void SetGroupOfPicture(int first_is_key_frame, int use_alt_ref,
653 int coding_frame_count, int first_show_idx,
654 int last_gop_use_alt_ref, int start_coding_index,
655 const RefFrameInfo &start_ref_frame_info,
656 GroupOfPicture *group_of_picture) {
657 // Clean up the state of previous group of picture.
658 group_of_picture->encode_frame_list.clear();
659 group_of_picture->next_encode_frame_index = 0;
660 group_of_picture->show_frame_count = coding_frame_count - use_alt_ref;
661 group_of_picture->start_show_index = first_show_idx;
662 group_of_picture->start_coding_index = start_coding_index;
663 group_of_picture->first_is_key_frame = first_is_key_frame;
664 group_of_picture->use_alt_ref = use_alt_ref;
665 group_of_picture->last_gop_use_alt_ref = last_gop_use_alt_ref;
666
667 // We need to make a copy of start reference frame info because we
668 // use it to simulate the ref frame update.
669 RefFrameInfo ref_frame_info = start_ref_frame_info;
670
671 {
672 // First frame in the group of pictures. It's either key frame or show inter
673 // frame.
674 EncodeFrameInfo encode_frame_info;
675 // Set frame_type
676 if (first_is_key_frame) {
677 encode_frame_info.frame_type = kFrameTypeKey;
678 } else {
679 if (last_gop_use_alt_ref) {
680 encode_frame_info.frame_type = kFrameTypeOverlay;
681 } else {
682 encode_frame_info.frame_type = kFrameTypeGolden;
683 }
684 }
685
686 encode_frame_info.show_idx = first_show_idx;
687 encode_frame_info.coding_index = start_coding_index;
688
689 encode_frame_info.ref_frame_info = ref_frame_info;
690 PostUpdateRefFrameInfo(encode_frame_info.frame_type,
691 encode_frame_info.coding_index, &ref_frame_info);
692
693 group_of_picture->encode_frame_list.push_back(encode_frame_info);
694 }
695
696 const int show_frame_count = coding_frame_count - use_alt_ref;
697 if (use_alt_ref) {
698 // If there is alternate reference, it is always coded at the second place.
699 // Its show index (or timestamp) is at the last of this group
700 EncodeFrameInfo encode_frame_info;
701 encode_frame_info.frame_type = kFrameTypeAltRef;
702 encode_frame_info.show_idx = first_show_idx + show_frame_count;
703 encode_frame_info.coding_index = start_coding_index + 1;
704
705 encode_frame_info.ref_frame_info = ref_frame_info;
706 PostUpdateRefFrameInfo(encode_frame_info.frame_type,
707 encode_frame_info.coding_index, &ref_frame_info);
708
709 group_of_picture->encode_frame_list.push_back(encode_frame_info);
710 }
711
712 // Encode the rest show inter frames.
713 for (int i = 1; i < show_frame_count; ++i) {
714 EncodeFrameInfo encode_frame_info;
715 encode_frame_info.frame_type = kFrameTypeInter;
716 encode_frame_info.show_idx = first_show_idx + i;
717 encode_frame_info.coding_index = start_coding_index + use_alt_ref + i;
718
719 encode_frame_info.ref_frame_info = ref_frame_info;
720 PostUpdateRefFrameInfo(encode_frame_info.frame_type,
721 encode_frame_info.coding_index, &ref_frame_info);
722
723 group_of_picture->encode_frame_list.push_back(encode_frame_info);
724 }
725 }
726
727 // Gets group of picture information from VP9's decision, and update
728 // |group_of_picture| accordingly.
729 // This is called at the starting of encoding of each group of picture.
UpdateGroupOfPicture(const VP9_COMP * cpi,int start_coding_index,const RefFrameInfo & start_ref_frame_info,GroupOfPicture * group_of_picture)730 static void UpdateGroupOfPicture(const VP9_COMP *cpi, int start_coding_index,
731 const RefFrameInfo &start_ref_frame_info,
732 GroupOfPicture *group_of_picture) {
733 int first_is_key_frame;
734 int use_alt_ref;
735 int coding_frame_count;
736 int first_show_idx;
737 int last_gop_use_alt_ref;
738 vp9_get_next_group_of_picture(cpi, &first_is_key_frame, &use_alt_ref,
739 &coding_frame_count, &first_show_idx,
740 &last_gop_use_alt_ref);
741 SetGroupOfPicture(first_is_key_frame, use_alt_ref, coding_frame_count,
742 first_show_idx, last_gop_use_alt_ref, start_coding_index,
743 start_ref_frame_info, group_of_picture);
744 }
745
746 #define SET_STRUCT_VALUE(config, structure, ret, field) \
747 do { \
748 if (strcmp(config.name, #field) == 0) { \
749 structure->field = atoi(config.value); \
750 ret = 1; \
751 } \
752 } while (false)
753
UpdateEncodeConfig(const EncodeConfig & config,VP9EncoderConfig * oxcf)754 static void UpdateEncodeConfig(const EncodeConfig &config,
755 VP9EncoderConfig *oxcf) {
756 int ret = 0;
757 SET_STRUCT_VALUE(config, oxcf, ret, key_freq);
758 SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmin_section);
759 SET_STRUCT_VALUE(config, oxcf, ret, two_pass_vbrmax_section);
760 SET_STRUCT_VALUE(config, oxcf, ret, under_shoot_pct);
761 SET_STRUCT_VALUE(config, oxcf, ret, over_shoot_pct);
762 SET_STRUCT_VALUE(config, oxcf, ret, max_threads);
763 SET_STRUCT_VALUE(config, oxcf, ret, frame_parallel_decoding_mode);
764 SET_STRUCT_VALUE(config, oxcf, ret, tile_columns);
765 SET_STRUCT_VALUE(config, oxcf, ret, arnr_max_frames);
766 SET_STRUCT_VALUE(config, oxcf, ret, arnr_strength);
767 SET_STRUCT_VALUE(config, oxcf, ret, lag_in_frames);
768 SET_STRUCT_VALUE(config, oxcf, ret, encode_breakout);
769 SET_STRUCT_VALUE(config, oxcf, ret, enable_tpl_model);
770 SET_STRUCT_VALUE(config, oxcf, ret, enable_auto_arf);
771 if (strcmp(config.name, "rc_mode") == 0) {
772 int rc_mode = atoi(config.value);
773 if (rc_mode >= VPX_VBR && rc_mode <= VPX_Q) {
774 oxcf->rc_mode = (enum vpx_rc_mode)rc_mode;
775 ret = 1;
776 } else {
777 fprintf(stderr, "Invalid rc_mode value: %d\n", rc_mode);
778 }
779 }
780 SET_STRUCT_VALUE(config, oxcf, ret, cq_level);
781 if (ret == 0) {
782 fprintf(stderr, "Ignored unsupported encode_config %s\n", config.name);
783 }
784 }
785
GetEncodeConfig(int frame_width,int frame_height,vpx_rational_t frame_rate,int target_bitrate,int encode_speed,int target_level,vpx_enc_pass enc_pass,const std::vector<EncodeConfig> & encode_config_list)786 static VP9EncoderConfig GetEncodeConfig(
787 int frame_width, int frame_height, vpx_rational_t frame_rate,
788 int target_bitrate, int encode_speed, int target_level,
789 vpx_enc_pass enc_pass,
790 const std::vector<EncodeConfig> &encode_config_list) {
791 VP9EncoderConfig oxcf = vp9_get_encoder_config(
792 frame_width, frame_height, frame_rate, target_bitrate, encode_speed,
793 target_level, enc_pass);
794 for (const auto &config : encode_config_list) {
795 UpdateEncodeConfig(config, &oxcf);
796 }
797 if (enc_pass == VPX_RC_FIRST_PASS) {
798 oxcf.lag_in_frames = 0;
799 }
800 oxcf.use_simple_encode_api = 1;
801 return oxcf;
802 }
803
SimpleEncode(int frame_width,int frame_height,int frame_rate_num,int frame_rate_den,int target_bitrate,int num_frames,int target_level,const char * infile_path,const char * outfile_path)804 SimpleEncode::SimpleEncode(int frame_width, int frame_height,
805 int frame_rate_num, int frame_rate_den,
806 int target_bitrate, int num_frames, int target_level,
807 const char *infile_path, const char *outfile_path) {
808 impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
809 frame_width_ = frame_width;
810 frame_height_ = frame_height;
811 frame_rate_num_ = frame_rate_num;
812 frame_rate_den_ = frame_rate_den;
813 target_bitrate_ = target_bitrate;
814 num_frames_ = num_frames;
815 encode_speed_ = 0;
816 target_level_ = target_level;
817
818 frame_coding_index_ = 0;
819 show_frame_count_ = 0;
820
821 key_frame_group_index_ = 0;
822 key_frame_group_size_ = 0;
823
824 // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
825 assert(infile_path != nullptr);
826 in_file_ = fopen(infile_path, "r");
827 if (outfile_path != nullptr) {
828 out_file_ = fopen(outfile_path, "w");
829 } else {
830 out_file_ = nullptr;
831 }
832 impl_ptr_->cpi = nullptr;
833 impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
834
835 InitRefFrameInfo(&ref_frame_info_);
836 }
837
SetEncodeSpeed(int encode_speed)838 void SimpleEncode::SetEncodeSpeed(int encode_speed) {
839 encode_speed_ = encode_speed;
840 }
841
SetEncodeConfig(const char * name,const char * value)842 StatusCode SimpleEncode::SetEncodeConfig(const char *name, const char *value) {
843 if (name == nullptr || value == nullptr) {
844 fprintf(stderr, "SetEncodeConfig: null pointer, name %p value %p\n", name,
845 value);
846 return StatusError;
847 }
848 EncodeConfig config;
849 snprintf(config.name, ENCODE_CONFIG_BUF_SIZE, "%s", name);
850 snprintf(config.value, ENCODE_CONFIG_BUF_SIZE, "%s", value);
851 impl_ptr_->encode_config_list.push_back(config);
852 return StatusOk;
853 }
854
DumpEncodeConfigs(int pass,FILE * fp)855 StatusCode SimpleEncode::DumpEncodeConfigs(int pass, FILE *fp) {
856 if (fp == nullptr) {
857 fprintf(stderr, "DumpEncodeConfigs: null pointer, fp %p\n", fp);
858 return StatusError;
859 }
860 vpx_enc_pass enc_pass;
861 if (pass == 1) {
862 enc_pass = VPX_RC_FIRST_PASS;
863 } else {
864 enc_pass = VPX_RC_LAST_PASS;
865 }
866 const vpx_rational_t frame_rate =
867 make_vpx_rational(frame_rate_num_, frame_rate_den_);
868 const VP9EncoderConfig oxcf = GetEncodeConfig(
869 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
870 target_level_, enc_pass, impl_ptr_->encode_config_list);
871 vp9_dump_encoder_config(&oxcf, fp);
872 return StatusOk;
873 }
874
ComputeFirstPassStats()875 void SimpleEncode::ComputeFirstPassStats() {
876 vpx_rational_t frame_rate =
877 make_vpx_rational(frame_rate_num_, frame_rate_den_);
878 const VP9EncoderConfig oxcf = GetEncodeConfig(
879 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
880 target_level_, VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);
881 impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
882 struct lookahead_ctx *lookahead = impl_ptr_->cpi->lookahead;
883 int i;
884 int use_highbitdepth = 0;
885 const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
886 const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
887 #if CONFIG_VP9_HIGHBITDEPTH
888 use_highbitdepth = impl_ptr_->cpi->common.use_highbitdepth;
889 #endif
890 vpx_image_t img;
891 vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
892 rewind(in_file_);
893 impl_ptr_->first_pass_stats.clear();
894 for (i = 0; i < num_frames_; ++i) {
895 assert(!vp9_lookahead_full(lookahead));
896 if (img_read(&img, in_file_)) {
897 int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
898 int64_t ts_start =
899 timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
900 int64_t ts_end =
901 timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
902 YV12_BUFFER_CONFIG sd;
903 image2yuvconfig(&img, &sd);
904 vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
905 {
906 int64_t time_stamp;
907 int64_t time_end;
908 int flush = 1; // Makes vp9_get_compressed_data process a frame
909 size_t size;
910 unsigned int frame_flags = 0;
911 ENCODE_FRAME_RESULT encode_frame_info;
912 vp9_init_encode_frame_result(&encode_frame_info);
913 // TODO(angiebird): Call vp9_first_pass directly
914 vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr,
915 &time_stamp, &time_end, flush,
916 &encode_frame_info);
917 // vp9_get_compressed_data only generates first pass stats not
918 // compresses data
919 assert(size == 0);
920 // Get vp9 first pass motion vector info.
921 std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
922 update_motion_vector_info(
923 impl_ptr_->cpi->fp_motion_vector_info, num_rows_16x16,
924 num_cols_16x16, mv_info.data(), kMotionVectorFullPixelPrecision);
925 fp_motion_vector_info_.push_back(mv_info);
926 }
927 impl_ptr_->first_pass_stats.push_back(
928 vp9_get_frame_stats(&impl_ptr_->cpi->twopass));
929 }
930 }
931 // TODO(angiebird): Store the total_stats apart form first_pass_stats
932 impl_ptr_->first_pass_stats.push_back(
933 vp9_get_total_stats(&impl_ptr_->cpi->twopass));
934 vp9_end_first_pass(impl_ptr_->cpi);
935
936 // Generate key_frame_map based on impl_ptr_->first_pass_stats.
937 key_frame_map_ = ComputeKeyFrameMap();
938
939 free_encoder(impl_ptr_->cpi);
940 impl_ptr_->cpi = nullptr;
941 rewind(in_file_);
942 vpx_img_free(&img);
943 }
944
ObserveFirstPassStats()945 std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
946 std::vector<std::vector<double>> output_stats;
947 // TODO(angiebird): This function make several assumptions of
948 // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
949 // last one. 2) The last entry of first_pass_stats is the total_stats.
950 // Change the code structure, so that we don't have to make these assumptions
951
952 // Note the last entry of first_pass_stats is the total_stats, we don't need
953 // it.
954 for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
955 double *buf_start =
956 reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
957 // We use - 1 here because the last member in FIRSTPASS_STATS is not double
958 double *buf_end =
959 buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
960 1;
961 std::vector<double> this_stats(buf_start, buf_end);
962 output_stats.push_back(this_stats);
963 }
964 return output_stats;
965 }
966
967 std::vector<std::vector<MotionVectorInfo>>
ObserveFirstPassMotionVectors()968 SimpleEncode::ObserveFirstPassMotionVectors() {
969 return fp_motion_vector_info_;
970 }
971
SetExternalGroupOfPicturesMap(int * gop_map,int gop_map_size)972 void SimpleEncode::SetExternalGroupOfPicturesMap(int *gop_map,
973 int gop_map_size) {
974 for (int i = 0; i < gop_map_size; ++i) {
975 gop_map_.push_back(gop_map[i]);
976 }
977 // The following will check and modify gop_map_ to make sure the
978 // gop_map_ satisfies the constraints.
979 // 1) Each key frame position should be at the start of a gop.
980 // 2) The last gop should not use an alt ref.
981 assert(gop_map_.size() == key_frame_map_.size());
982 int last_gop_start = 0;
983 for (int i = 0; static_cast<size_t>(i) < gop_map_.size(); ++i) {
984 if (key_frame_map_[i] == 1 && gop_map_[i] == 0) {
985 fprintf(stderr, "Add an extra gop start at show_idx %d\n", i);
986 // Insert a gop start at key frame location.
987 gop_map_[i] |= kGopMapFlagStart;
988 gop_map_[i] |= kGopMapFlagUseAltRef;
989 }
990 if (gop_map_[i] & kGopMapFlagStart) {
991 last_gop_start = i;
992 }
993 }
994 if (gop_map_[last_gop_start] & kGopMapFlagUseAltRef) {
995 fprintf(stderr,
996 "Last group of pictures starting at show_idx %d shouldn't use alt "
997 "ref\n",
998 last_gop_start);
999 gop_map_[last_gop_start] &= ~kGopMapFlagUseAltRef;
1000 }
1001 }
1002
ObserveExternalGroupOfPicturesMap()1003 std::vector<int> SimpleEncode::ObserveExternalGroupOfPicturesMap() {
1004 return gop_map_;
1005 }
1006
1007 template <typename T>
GetVectorData(const std::vector<T> & v)1008 T *GetVectorData(const std::vector<T> &v) {
1009 if (v.empty()) {
1010 return nullptr;
1011 }
1012 return const_cast<T *>(v.data());
1013 }
1014
GetGopCommand(const std::vector<int> & gop_map,int start_show_index)1015 static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,
1016 int start_show_index) {
1017 GOP_COMMAND gop_command;
1018 if (static_cast<size_t>(start_show_index) < gop_map.size()) {
1019 assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);
1020 int end_show_index = start_show_index + 1;
1021 // gop_map[end_show_index] & kGopMapFlagStart == 0 means this is
1022 // the start of a gop.
1023 while (static_cast<size_t>(end_show_index) < gop_map.size() &&
1024 (gop_map[end_show_index] & kGopMapFlagStart) == 0) {
1025 ++end_show_index;
1026 }
1027 const int show_frame_count = end_show_index - start_show_index;
1028 int use_alt_ref = (gop_map[start_show_index] & kGopMapFlagUseAltRef) != 0;
1029 if (static_cast<size_t>(end_show_index) == gop_map.size()) {
1030 // This is the last gop group, there must be no altref.
1031 use_alt_ref = 0;
1032 }
1033 gop_command_on(&gop_command, show_frame_count, use_alt_ref);
1034 } else {
1035 gop_command_off(&gop_command);
1036 }
1037 return gop_command;
1038 }
1039
StartEncode()1040 void SimpleEncode::StartEncode() {
1041 assert(impl_ptr_->first_pass_stats.size() > 0);
1042 vpx_rational_t frame_rate =
1043 make_vpx_rational(frame_rate_num_, frame_rate_den_);
1044 VP9EncoderConfig oxcf = GetEncodeConfig(
1045 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1046 target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1047
1048 vpx_fixed_buf_t stats;
1049 stats.buf = GetVectorData(impl_ptr_->first_pass_stats);
1050 stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
1051 impl_ptr_->first_pass_stats.size();
1052
1053 vp9_set_first_pass_stats(&oxcf, &stats);
1054 assert(impl_ptr_->cpi == nullptr);
1055 impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
1056 vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
1057 frame_height_, 1);
1058
1059 frame_coding_index_ = 0;
1060 show_frame_count_ = 0;
1061
1062 assert(impl_ptr_->cpi != nullptr);
1063 FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1064 unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1065 vp9_init_vizier_params(&impl_ptr_->cpi->twopass, screen_area);
1066
1067 UpdateKeyFrameGroup(show_frame_count_);
1068
1069 const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1070 encode_command_set_gop_command(&impl_ptr_->cpi->encode_command, gop_command);
1071 UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1072 &group_of_picture_);
1073 rewind(in_file_);
1074
1075 if (out_file_ != nullptr) {
1076 const char *fourcc = "VP90";
1077 // In SimpleEncode, we use time_base = 1 / TICKS_PER_SEC.
1078 // Based on that, the ivf_timestamp for each image is set to
1079 // show_idx * TICKS_PER_SEC / frame_rate
1080 // such that each image's actual timestamp in seconds can be computed as
1081 // ivf_timestamp * time_base == show_idx / frame_rate
1082 // TODO(angiebird): 1) Add unit test for ivf timestamp.
1083 // 2) Simplify the frame_rate setting process.
1084 vpx_rational_t time_base = make_vpx_rational(1, TICKS_PER_SEC);
1085 ivf_write_file_header_with_video_info(out_file_, *(const uint32_t *)fourcc,
1086 num_frames_, frame_width_,
1087 frame_height_, time_base);
1088 }
1089 }
1090
EndEncode()1091 void SimpleEncode::EndEncode() {
1092 free_encoder(impl_ptr_->cpi);
1093 impl_ptr_->cpi = nullptr;
1094 vpx_img_free(&impl_ptr_->tmp_img);
1095 rewind(in_file_);
1096 }
1097
UpdateKeyFrameGroup(int key_frame_show_index)1098 void SimpleEncode::UpdateKeyFrameGroup(int key_frame_show_index) {
1099 const VP9_COMP *cpi = impl_ptr_->cpi;
1100 key_frame_group_index_ = 0;
1101 key_frame_group_size_ = vp9_get_frames_to_next_key(
1102 &cpi->oxcf, &cpi->twopass, key_frame_show_index, cpi->rc.min_gf_interval);
1103 assert(key_frame_group_size_ > 0);
1104 // Init the reference frame info when a new key frame group appears.
1105 InitRefFrameInfo(&ref_frame_info_);
1106 }
1107
PostUpdateKeyFrameGroupIndex(FrameType frame_type)1108 void SimpleEncode::PostUpdateKeyFrameGroupIndex(FrameType frame_type) {
1109 if (frame_type != kFrameTypeAltRef) {
1110 // key_frame_group_index_ only counts show frames
1111 ++key_frame_group_index_;
1112 }
1113 }
1114
GetKeyFrameGroupSize() const1115 int SimpleEncode::GetKeyFrameGroupSize() const { return key_frame_group_size_; }
1116
ObserveGroupOfPicture() const1117 GroupOfPicture SimpleEncode::ObserveGroupOfPicture() const {
1118 return group_of_picture_;
1119 }
1120
GetNextEncodeFrameInfo() const1121 EncodeFrameInfo SimpleEncode::GetNextEncodeFrameInfo() const {
1122 return group_of_picture_
1123 .encode_frame_list[group_of_picture_.next_encode_frame_index];
1124 }
1125
PostUpdateState(const EncodeFrameResult & encode_frame_result)1126 void SimpleEncode::PostUpdateState(
1127 const EncodeFrameResult &encode_frame_result) {
1128 // This function needs to be called before the increament of
1129 // frame_coding_index_
1130 PostUpdateRefFrameInfo(encode_frame_result.frame_type, frame_coding_index_,
1131 &ref_frame_info_);
1132 ++frame_coding_index_;
1133 if (encode_frame_result.frame_type != kFrameTypeAltRef) {
1134 // Only kFrameTypeAltRef is not a show frame
1135 ++show_frame_count_;
1136 }
1137
1138 PostUpdateKeyFrameGroupIndex(encode_frame_result.frame_type);
1139 if (key_frame_group_index_ == key_frame_group_size_) {
1140 UpdateKeyFrameGroup(show_frame_count_);
1141 }
1142
1143 IncreaseGroupOfPictureIndex(&group_of_picture_);
1144 if (IsGroupOfPictureFinished(group_of_picture_)) {
1145 const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
1146 encode_command_set_gop_command(&impl_ptr_->cpi->encode_command,
1147 gop_command);
1148 // This function needs to be called after ref_frame_info_ is updated
1149 // properly in PostUpdateRefFrameInfo() and UpdateKeyFrameGroup().
1150 UpdateGroupOfPicture(impl_ptr_->cpi, frame_coding_index_, ref_frame_info_,
1151 &group_of_picture_);
1152 }
1153 }
1154
EncodeFrame(EncodeFrameResult * encode_frame_result)1155 void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
1156 VP9_COMP *cpi = impl_ptr_->cpi;
1157 struct lookahead_ctx *lookahead = cpi->lookahead;
1158 int use_highbitdepth = 0;
1159 #if CONFIG_VP9_HIGHBITDEPTH
1160 use_highbitdepth = cpi->common.use_highbitdepth;
1161 #endif
1162 // The lookahead's size is set to oxcf->lag_in_frames.
1163 // We want to fill lookahead to it's max capacity if possible so that the
1164 // encoder can construct alt ref frame in time.
1165 // In the other words, we hope vp9_get_compressed_data to encode a frame
1166 // every time in the function
1167 while (!vp9_lookahead_full(lookahead)) {
1168 // TODO(angiebird): Check whether we can move this file read logics to
1169 // lookahead
1170 if (img_read(&impl_ptr_->tmp_img, in_file_)) {
1171 int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
1172 int64_t ts_start =
1173 timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
1174 int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
1175 next_show_idx + 1);
1176 YV12_BUFFER_CONFIG sd;
1177 image2yuvconfig(&impl_ptr_->tmp_img, &sd);
1178 vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
1179 } else {
1180 break;
1181 }
1182 }
1183
1184 if (init_encode_frame_result(encode_frame_result, frame_width_, frame_height_,
1185 impl_ptr_->img_fmt)) {
1186 int64_t time_stamp;
1187 int64_t time_end;
1188 int flush = 1; // Make vp9_get_compressed_data encode a frame
1189 unsigned int frame_flags = 0;
1190 ENCODE_FRAME_RESULT encode_frame_info;
1191 vp9_init_encode_frame_result(&encode_frame_info);
1192 ImageBuffer_to_IMAGE_BUFFER(encode_frame_result->coded_frame,
1193 &encode_frame_info.coded_frame);
1194 vp9_get_compressed_data(cpi, &frame_flags,
1195 &encode_frame_result->coding_data_byte_size,
1196 encode_frame_result->coding_data.get(), &time_stamp,
1197 &time_end, flush, &encode_frame_info);
1198 if (out_file_ != nullptr) {
1199 ivf_write_frame_header(out_file_, time_stamp,
1200 encode_frame_result->coding_data_byte_size);
1201 fwrite(encode_frame_result->coding_data.get(), 1,
1202 encode_frame_result->coding_data_byte_size, out_file_);
1203 }
1204
1205 // vp9_get_compressed_data is expected to encode a frame every time, so the
1206 // data size should be greater than zero.
1207 if (encode_frame_result->coding_data_byte_size <= 0) {
1208 fprintf(stderr, "Coding data size <= 0\n");
1209 abort();
1210 }
1211 const size_t max_coding_data_byte_size =
1212 get_max_coding_data_byte_size(frame_width_, frame_height_);
1213 if (encode_frame_result->coding_data_byte_size >
1214 max_coding_data_byte_size) {
1215 fprintf(stderr, "Coding data size exceeds the maximum.\n");
1216 abort();
1217 }
1218
1219 const GroupOfPicture group_of_picture = this->ObserveGroupOfPicture();
1220 const int show_frame_count = group_of_picture.show_frame_count;
1221 update_encode_frame_result(encode_frame_result, show_frame_count,
1222 &encode_frame_info);
1223 PostUpdateState(*encode_frame_result);
1224 } else {
1225 // TODO(angiebird): Clean up encode_frame_result.
1226 fprintf(stderr, "init_encode_frame_result() failed.\n");
1227 this->EndEncode();
1228 }
1229 }
1230
EncodeFrameWithQuantizeIndex(EncodeFrameResult * encode_frame_result,int quantize_index)1231 void SimpleEncode::EncodeFrameWithQuantizeIndex(
1232 EncodeFrameResult *encode_frame_result, int quantize_index) {
1233 encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
1234 quantize_index);
1235 EncodeFrame(encode_frame_result);
1236 encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
1237 }
1238
EncodeFrameWithTargetFrameBits(EncodeFrameResult * encode_frame_result,int target_frame_bits,double percent_diff)1239 void SimpleEncode::EncodeFrameWithTargetFrameBits(
1240 EncodeFrameResult *encode_frame_result, int target_frame_bits,
1241 double percent_diff) {
1242 encode_command_set_target_frame_bits(&impl_ptr_->cpi->encode_command,
1243 target_frame_bits, percent_diff);
1244 EncodeFrame(encode_frame_result);
1245 encode_command_reset_target_frame_bits(&impl_ptr_->cpi->encode_command);
1246 }
1247
GetCodingFrameNumFromGopMap(const std::vector<int> & gop_map)1248 static int GetCodingFrameNumFromGopMap(const std::vector<int> &gop_map) {
1249 int start_show_index = 0;
1250 int coding_frame_count = 0;
1251 while (static_cast<size_t>(start_show_index) < gop_map.size()) {
1252 const GOP_COMMAND gop_command = GetGopCommand(gop_map, start_show_index);
1253 start_show_index += gop_command.show_frame_count;
1254 coding_frame_count += gop_command_coding_frame_count(&gop_command);
1255 }
1256 assert(static_cast<size_t>(start_show_index) == gop_map.size());
1257 return coding_frame_count;
1258 }
1259
GetCodingFrameNum() const1260 int SimpleEncode::GetCodingFrameNum() const {
1261 assert(impl_ptr_->first_pass_stats.size() > 0);
1262 if (gop_map_.size() > 0) {
1263 return GetCodingFrameNumFromGopMap(gop_map_);
1264 }
1265
1266 // These are the default settings for now.
1267 TWO_PASS twopass;
1268 const int multi_layer_arf = 0;
1269 const int allow_alt_ref = 1;
1270 vpx_rational_t frame_rate =
1271 make_vpx_rational(frame_rate_num_, frame_rate_den_);
1272 const VP9EncoderConfig oxcf = GetEncodeConfig(
1273 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1274 target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1275 FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
1276 fps_init_first_pass_info(&twopass.first_pass_info,
1277 GetVectorData(impl_ptr_->first_pass_stats),
1278 num_frames_);
1279 unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
1280 vp9_init_vizier_params(&twopass, screen_area);
1281 return vp9_get_coding_frame_num(&oxcf, &twopass, &frame_info, multi_layer_arf,
1282 allow_alt_ref);
1283 }
1284
ComputeKeyFrameMap() const1285 std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {
1286 // The last entry of first_pass_stats is the overall stats.
1287 assert(impl_ptr_->first_pass_stats.size() ==
1288 static_cast<size_t>(num_frames_) + 1);
1289 vpx_rational_t frame_rate =
1290 make_vpx_rational(frame_rate_num_, frame_rate_den_);
1291 const VP9EncoderConfig oxcf = GetEncodeConfig(
1292 frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
1293 target_level_, VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
1294 TWO_PASS twopass;
1295 fps_init_first_pass_info(&twopass.first_pass_info,
1296 GetVectorData(impl_ptr_->first_pass_stats),
1297 num_frames_);
1298 std::vector<int> key_frame_map(num_frames_, 0);
1299 vp9_get_key_frame_map(&oxcf, &twopass, GetVectorData(key_frame_map));
1300 return key_frame_map;
1301 }
1302
ObserveKeyFrameMap() const1303 std::vector<int> SimpleEncode::ObserveKeyFrameMap() const {
1304 return key_frame_map_;
1305 }
1306
GetFramePixelCount() const1307 uint64_t SimpleEncode::GetFramePixelCount() const {
1308 assert(frame_width_ % 2 == 0);
1309 assert(frame_height_ % 2 == 0);
1310 switch (impl_ptr_->img_fmt) {
1311 case VPX_IMG_FMT_I420: return frame_width_ * frame_height_ * 3 / 2;
1312 case VPX_IMG_FMT_I422: return frame_width_ * frame_height_ * 2;
1313 case VPX_IMG_FMT_I444: return frame_width_ * frame_height_ * 3;
1314 case VPX_IMG_FMT_I440: return frame_width_ * frame_height_ * 2;
1315 case VPX_IMG_FMT_I42016: return frame_width_ * frame_height_ * 3 / 2;
1316 case VPX_IMG_FMT_I42216: return frame_width_ * frame_height_ * 2;
1317 case VPX_IMG_FMT_I44416: return frame_width_ * frame_height_ * 3;
1318 case VPX_IMG_FMT_I44016: return frame_width_ * frame_height_ * 2;
1319 default: return 0;
1320 }
1321 }
1322
~SimpleEncode()1323 SimpleEncode::~SimpleEncode() {
1324 if (in_file_ != nullptr) {
1325 fclose(in_file_);
1326 }
1327 if (out_file_ != nullptr) {
1328 fclose(out_file_);
1329 }
1330 }
1331
1332 } // namespace vp9
1333