• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 #include <stdlib.h>
12 #include <string.h>
13 #include <algorithm>
14 #include <memory>
15 #include <numeric>
16 #include <vector>
17 
18 #include "av1/common/enums.h"
19 #include "av1/encoder/rd.h"
20 #include "config/aom_config.h"
21 
22 #include "aom/aom_encoder.h"
23 
24 #include "av1/av1_cx_iface.h"
25 #include "av1/av1_iface_common.h"
26 #include "av1/encoder/encoder.h"
27 #include "av1/encoder/ethread.h"
28 #include "av1/encoder/firstpass.h"
29 #include "av1/encoder/temporal_filter.h"
30 #include "av1/qmode_rc/ducky_encode.h"
31 
32 #include "common/tools_common.h"
33 
34 namespace aom {
35 struct EncoderResource {
36   STATS_BUFFER_CTX *stats_buf_ctx;
37   FIRSTPASS_STATS *stats_buffer;
38   aom_image_t img;
39   AV1_PRIMARY *ppi;
40   int lookahead_push_count;
41   int encode_frame_count;  // Use in second pass only
42 };
43 
44 class DuckyEncode::EncodeImpl {
45  public:
46   VideoInfo video_info;
47   int g_usage;
48   int max_ref_frames;
49   int speed;
50   int base_qindex;
51   BLOCK_SIZE sb_size;
52   enum aom_rc_mode rc_end_usage;
53   aom_rational64_t timestamp_ratio;
54   std::vector<FIRSTPASS_STATS> stats_list;
55   EncoderResource enc_resource;
56   struct AvxInputContext input;
57 };
58 
DuckyEncode(const VideoInfo & video_info,BLOCK_SIZE sb_size,int max_ref_frames,int speed,int base_qindex)59 DuckyEncode::DuckyEncode(const VideoInfo &video_info, BLOCK_SIZE sb_size,
60                          int max_ref_frames, int speed, int base_qindex) {
61   impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
62   impl_ptr_->video_info = video_info;
63   impl_ptr_->g_usage = GOOD;
64   impl_ptr_->max_ref_frames = max_ref_frames;
65   impl_ptr_->speed = speed;
66   impl_ptr_->base_qindex = base_qindex;
67   impl_ptr_->sb_size = sb_size;
68   impl_ptr_->rc_end_usage = AOM_Q;
69   // TODO(angiebird): Set timestamp_ratio properly
70   // timestamp_ratio.den = cfg->g_timebase.den;
71   // timestamp_ratio.num = (int64_t)cfg->g_timebase.num * TICKS_PER_SEC;
72   impl_ptr_->timestamp_ratio = { 1, 1 };
73   // TODO(angiebird): How to set ptsvol and duration?
74   impl_ptr_->input.filename = impl_ptr_->video_info.file_path.c_str();
75 }
76 
~DuckyEncode()77 DuckyEncode::~DuckyEncode() {}
78 
GetEncoderConfig(const VideoInfo & video_info,int g_usage,aom_enc_pass pass)79 static AV1EncoderConfig GetEncoderConfig(const VideoInfo &video_info,
80                                          int g_usage, aom_enc_pass pass) {
81   const aom_codec_iface *codec = aom_codec_av1_cx();
82   aom_codec_enc_cfg_t cfg;
83   aom_codec_enc_config_default(codec, &cfg, g_usage);
84   cfg.g_w = video_info.frame_width;
85   cfg.g_h = video_info.frame_height;
86   cfg.g_pass = pass;
87   // g_timebase is the inverse of frame_rate
88   cfg.g_timebase.num = video_info.frame_rate.den;
89   cfg.g_timebase.den = video_info.frame_rate.num;
90   if (pass == AOM_RC_SECOND_PASS) {
91     cfg.rc_twopass_stats_in.sz =
92         (video_info.frame_count + 1) * sizeof(FIRSTPASS_STATS);
93   }
94   AV1EncoderConfig oxcf = av1_get_encoder_config(&cfg);
95   // TODO(angiebird): Why didn't we init use_highbitdepth in
96   // av1_get_encoder_config()?
97   oxcf.use_highbitdepth = 0;
98 
99   // TODO(jingning): Change this to 35 when the baseline rate control
100   // logic is in place.
101   // Force maximum look ahead buffer to be 19. This will disable the use
102   // of maximum 32 GOP length.
103   oxcf.gf_cfg.lag_in_frames = 19;
104 
105   return oxcf;
106 }
107 
CreateStatsBufferCtx(int frame_count,FIRSTPASS_STATS ** stats_buffer)108 static STATS_BUFFER_CTX *CreateStatsBufferCtx(int frame_count,
109                                               FIRSTPASS_STATS **stats_buffer) {
110   STATS_BUFFER_CTX *stats_buf_ctx = new STATS_BUFFER_CTX;
111   // +2 is for total_stats and total_left_stats
112   *stats_buffer = new FIRSTPASS_STATS[frame_count + 2];
113   stats_buf_ctx->stats_in_start = *stats_buffer;
114   stats_buf_ctx->stats_in_end = stats_buf_ctx->stats_in_start;
115   stats_buf_ctx->stats_in_buf_end = stats_buf_ctx->stats_in_start + frame_count;
116   stats_buf_ctx->total_stats = stats_buf_ctx->stats_in_buf_end;
117   stats_buf_ctx->total_left_stats =
118       stats_buf_ctx->stats_in_start + frame_count + 1;
119   for (FIRSTPASS_STATS *buffer = stats_buf_ctx->stats_in_start;
120        buffer <= stats_buf_ctx->total_left_stats; ++buffer) {
121     av1_twopass_zero_stats(buffer);
122   }
123   return stats_buf_ctx;
124 }
125 
DestroyStatsBufferCtx(STATS_BUFFER_CTX ** stats_buf_context,FIRSTPASS_STATS ** stats_buffer)126 static void DestroyStatsBufferCtx(STATS_BUFFER_CTX **stats_buf_context,
127                                   FIRSTPASS_STATS **stats_buffer) {
128   (*stats_buf_context)->stats_in_start = nullptr;
129   (*stats_buf_context)->stats_in_end = nullptr;
130   (*stats_buf_context)->stats_in_buf_end = nullptr;
131   (*stats_buf_context)->total_stats = nullptr;
132   (*stats_buf_context)->total_left_stats = nullptr;
133   delete *stats_buf_context;
134   *stats_buf_context = nullptr;
135   delete[](*stats_buffer);
136   *stats_buffer = nullptr;
137 }
138 
ComputeTotalStats(const std::vector<FIRSTPASS_STATS> & stats_list)139 static FIRSTPASS_STATS ComputeTotalStats(
140     const std::vector<FIRSTPASS_STATS> &stats_list) {
141   FIRSTPASS_STATS total_stats = {};
142   for (size_t i = 0; i < stats_list.size(); ++i) {
143     av1_accumulate_stats(&total_stats, &stats_list[i]);
144   }
145   return total_stats;
146 }
147 
FileIsY4m(const char detect[4])148 static bool FileIsY4m(const char detect[4]) {
149   return memcmp(detect, "YUV4", 4) == 0;
150 }
151 
FourccIsIvf(const char detect[4])152 static bool FourccIsIvf(const char detect[4]) {
153   return memcmp(detect, "DKIF", 4) == 0;
154 }
155 
OpenInputFile(struct AvxInputContext * input)156 static void OpenInputFile(struct AvxInputContext *input) {
157   input->file = fopen(input->filename, "rb");
158   /* For RAW input sources, these bytes will applied on the first frame
159    *  in read_frame().
160    */
161   input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
162   input->detect.position = 0;
163   aom_chroma_sample_position_t const csp = AOM_CSP_UNKNOWN;
164   if (input->detect.buf_read == 4 && FileIsY4m(input->detect.buf)) {
165     if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
166                        input->only_i420) >= 0) {
167       input->file_type = FILE_TYPE_Y4M;
168       input->width = input->y4m.pic_w;
169       input->height = input->y4m.pic_h;
170       input->pixel_aspect_ratio.numerator = input->y4m.par_n;
171       input->pixel_aspect_ratio.denominator = input->y4m.par_d;
172       input->framerate.numerator = input->y4m.fps_n;
173       input->framerate.denominator = input->y4m.fps_d;
174       input->fmt = input->y4m.aom_fmt;
175       input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
176       input->color_range = input->y4m.color_range;
177     } else
178       fatal("Unsupported Y4M stream.");
179   } else if (input->detect.buf_read == 4 && FourccIsIvf(input->detect.buf)) {
180     fatal("IVF is not supported as input.");
181   } else {
182     input->file_type = FILE_TYPE_RAW;
183   }
184 }
185 
InitEncoder(aom_enc_pass pass,const std::vector<FIRSTPASS_STATS> * stats_list)186 void DuckyEncode::InitEncoder(aom_enc_pass pass,
187                               const std::vector<FIRSTPASS_STATS> *stats_list) {
188   EncoderResource enc_resource = {};
189   enc_resource.lookahead_push_count = 0;
190   OpenInputFile(&impl_ptr_->input);
191   if (impl_ptr_->input.file_type != FILE_TYPE_Y4M) {
192     aom_img_alloc(&enc_resource.img, impl_ptr_->video_info.img_fmt,
193                   impl_ptr_->video_info.frame_width,
194                   impl_ptr_->video_info.frame_height, /*align=*/1);
195   }
196   AV1EncoderConfig oxcf =
197       GetEncoderConfig(impl_ptr_->video_info, impl_ptr_->g_usage, pass);
198   oxcf.dec_model_cfg.decoder_model_info_present_flag = 0;
199   oxcf.dec_model_cfg.display_model_info_present_flag = 0;
200   oxcf.ref_frm_cfg.max_reference_frames = impl_ptr_->max_ref_frames;
201   oxcf.speed = impl_ptr_->speed;
202   if (impl_ptr_->sb_size == BLOCK_64X64)
203     oxcf.tool_cfg.superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
204   else
205     oxcf.tool_cfg.superblock_size = AOM_SUPERBLOCK_SIZE_128X128;
206 
207   av1_initialize_enc(impl_ptr_->g_usage, impl_ptr_->rc_end_usage);
208   AV1_PRIMARY *ppi =
209       av1_create_primary_compressor(nullptr,
210                                     /*num_lap_buffers=*/0, &oxcf);
211   enc_resource.ppi = ppi;
212 
213   assert(ppi != nullptr);
214   // Turn off ppi->b_calculate_psnr to avoid calling generate_psnr_packet() in
215   // av1_post_encode_updates().
216   // TODO(angiebird): Modify generate_psnr_packet() to handle the case that
217   // cpi->ppi->output_pkt_list = nullptr.
218   ppi->b_calculate_psnr = 0;
219 
220   aom_codec_err_t res = AOM_CODEC_OK;
221   (void)res;
222   enc_resource.stats_buf_ctx = CreateStatsBufferCtx(
223       impl_ptr_->video_info.frame_count, &enc_resource.stats_buffer);
224   if (pass == AOM_RC_SECOND_PASS) {
225     assert(stats_list != nullptr);
226     std::copy(stats_list->begin(), stats_list->end(),
227               enc_resource.stats_buffer);
228     *enc_resource.stats_buf_ctx->total_stats = ComputeTotalStats(*stats_list);
229     oxcf.twopass_stats_in.buf = enc_resource.stats_buffer;
230     // We need +1 here because av1 encoder assumes
231     // oxcf.twopass_stats_in.buf[video_info.frame_count] has the total_stats
232     oxcf.twopass_stats_in.sz = (impl_ptr_->video_info.frame_count + 1) *
233                                sizeof(enc_resource.stats_buffer[0]);
234   } else {
235     assert(pass == AOM_RC_FIRST_PASS);
236     // We don't use stats_list for AOM_RC_FIRST_PASS.
237     assert(stats_list == nullptr);
238   }
239   ppi->twopass.stats_buf_ctx = enc_resource.stats_buf_ctx;
240   BufferPool *buffer_pool = nullptr;
241   res = av1_create_context_and_bufferpool(ppi, &ppi->cpi, &buffer_pool, &oxcf,
242                                           ENCODE_STAGE, -1);
243   // TODO(angiebird): Why didn't we set initial_dimensions in
244   // av1_create_compressor()?
245   ppi->cpi->initial_dimensions.width = oxcf.frm_dim_cfg.width;
246   ppi->cpi->initial_dimensions.height = oxcf.frm_dim_cfg.height;
247   // use_ducky_encode is the flag we use to change AV1 behavior
248   // slightly based on DuckyEncode's need. We should minimize this kind of
249   // change unless it's necessary.
250   ppi->cpi->use_ducky_encode = 1;
251   assert(res == AOM_CODEC_OK);
252   assert(ppi->cpi != nullptr);
253   assert(buffer_pool != nullptr);
254   const AV1_COMP *cpi = ppi->cpi;
255   SequenceHeader *seq_params = ppi->cpi->common.seq_params;
256   set_sb_size(seq_params, impl_ptr_->sb_size);
257   ppi->seq_params_locked = 1;
258   assert(ppi->lookahead == nullptr);
259 
260   int lag_in_frames = cpi->oxcf.gf_cfg.lag_in_frames;
261   ppi->lookahead = av1_lookahead_init(
262       cpi->oxcf.frm_dim_cfg.width, cpi->oxcf.frm_dim_cfg.height,
263       seq_params->subsampling_x, seq_params->subsampling_y,
264       seq_params->use_highbitdepth, lag_in_frames, cpi->oxcf.border_in_pixels,
265       cpi->common.features.byte_alignment,
266       /*num_lap_buffers=*/0, /*is_all_intra=*/0,
267       cpi->oxcf.tool_cfg.enable_global_motion);
268 
269   av1_tf_info_alloc(&cpi->ppi->tf_info, cpi);
270   assert(ppi->lookahead != nullptr);
271 
272   impl_ptr_->enc_resource = enc_resource;
273 }
274 
CloseInputFile(struct AvxInputContext * input)275 static void CloseInputFile(struct AvxInputContext *input) {
276   fclose(input->file);
277   if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
278 }
279 
FreeEncoder()280 void DuckyEncode::FreeEncoder() {
281   EncoderResource *enc_resource = &impl_ptr_->enc_resource;
282   CloseInputFile(&impl_ptr_->input);
283   aom_img_free(&enc_resource->img);
284   DestroyStatsBufferCtx(&enc_resource->stats_buf_ctx,
285                         &enc_resource->stats_buffer);
286   BufferPool *buffer_pool = enc_resource->ppi->cpi->common.buffer_pool;
287   av1_destroy_context_and_bufferpool(enc_resource->ppi->cpi, &buffer_pool);
288   av1_remove_primary_compressor(enc_resource->ppi);
289   enc_resource->ppi = nullptr;
290 }
291 
ReadFrame(struct AvxInputContext * input_ctx,aom_image_t * img)292 static int ReadFrame(struct AvxInputContext *input_ctx, aom_image_t *img) {
293   FILE *f = input_ctx->file;
294   y4m_input *y4m = &input_ctx->y4m;
295   int shortread = 0;
296 
297   if (input_ctx->file_type == FILE_TYPE_Y4M) {
298     if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
299   } else {
300     shortread = read_yuv_frame(input_ctx, img);
301   }
302 
303   return !shortread;
304 }
305 
ComputeFirstPassStats()306 std::vector<FIRSTPASS_STATS> DuckyEncode::ComputeFirstPassStats() {
307   aom_enc_pass pass = AOM_RC_FIRST_PASS;
308   InitEncoder(pass, nullptr);
309   AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
310   EncoderResource *enc_resource = &impl_ptr_->enc_resource;
311   struct lookahead_ctx *lookahead = ppi->lookahead;
312   int frame_count = impl_ptr_->video_info.frame_count;
313   aom_rational64_t timestamp_ratio = impl_ptr_->timestamp_ratio;
314   // TODO(angiebird): Ideally, ComputeFirstPassStats() doesn't output
315   // bitstream. Do we need bitstream buffer here?
316   std::vector<uint8_t> buf(1000);
317   std::vector<FIRSTPASS_STATS> stats_list;
318   for (int i = 0; i < frame_count; ++i) {
319     if (ReadFrame(&impl_ptr_->input, &impl_ptr_->enc_resource.img)) {
320       // TODO(angiebird): Set ts_start/ts_end properly
321       int64_t ts_start = enc_resource->lookahead_push_count;
322       int64_t ts_end = ts_start + 1;
323       YV12_BUFFER_CONFIG sd;
324       image2yuvconfig(&enc_resource->img, &sd);
325       av1_lookahead_push(lookahead, &sd, ts_start, ts_end,
326                          /*use_highbitdepth=*/0, /*flags=*/0);
327       ++enc_resource->lookahead_push_count;
328       AV1_COMP_DATA cpi_data = {};
329       cpi_data.cx_data = buf.data();
330       cpi_data.cx_data_sz = buf.size();
331       cpi_data.frame_size = 0;
332       cpi_data.flush = 1;  // Makes av1_get_compressed_data process a frame
333       cpi_data.ts_frame_start = ts_start;
334       cpi_data.ts_frame_end = ts_end;
335       cpi_data.pop_lookahead = 1;
336       cpi_data.timestamp_ratio = &timestamp_ratio;
337       // av1_get_compressed_data only generates first pass stats not
338       // compresses data
339       int res = av1_get_compressed_data(ppi->cpi, &cpi_data);
340       (void)res;
341       assert(res == static_cast<int>(AOM_CODEC_OK));
342       stats_list.push_back(*(ppi->twopass.stats_buf_ctx->stats_in_end - 1));
343       av1_post_encode_updates(ppi->cpi, &cpi_data);
344     }
345   }
346   av1_end_first_pass(ppi->cpi);
347 
348   FreeEncoder();
349   return stats_list;
350 }
351 
StartEncode(const std::vector<FIRSTPASS_STATS> & stats_list)352 void DuckyEncode::StartEncode(const std::vector<FIRSTPASS_STATS> &stats_list) {
353   aom_enc_pass pass = AOM_RC_SECOND_PASS;
354   impl_ptr_->stats_list = stats_list;
355   InitEncoder(pass, &stats_list);
356   write_temp_delimiter_ = true;
357 }
358 
DuckyEncodeInfoSetGopStruct(AV1_PRIMARY * ppi,const GopStruct & gop_struct,const GopEncodeInfo & gop_encode_info)359 static void DuckyEncodeInfoSetGopStruct(AV1_PRIMARY *ppi,
360                                         const GopStruct &gop_struct,
361                                         const GopEncodeInfo &gop_encode_info) {
362   GF_GROUP *gf_group = &ppi->gf_group;
363   ppi->p_rc.baseline_gf_interval = gop_struct.show_frame_count;
364   ppi->internal_altref_allowed = 1;
365 
366   gf_group->size = static_cast<int>(gop_struct.gop_frame_list.size());
367   gf_group->max_layer_depth = 0;
368 
369   int i = 0;
370   for (const auto &frame : gop_struct.gop_frame_list) {
371     gf_group->update_type[i] = (int)frame.update_type;
372     if (frame.update_type == GopFrameType::kRegularArf) gf_group->arf_index = i;
373 
374     gf_group->frame_type[i] = !frame.is_key_frame;
375 
376     gf_group->q_val[i] = gop_encode_info.param_list[i].q_index;
377     gf_group->rdmult_val[i] = gop_encode_info.param_list[i].rdmult;
378 
379     gf_group->cur_frame_idx[i] = 0;
380     gf_group->arf_src_offset[i] = frame.order_idx - frame.display_idx;
381     gf_group->cur_frame_idx[i] = frame.display_idx;
382     gf_group->src_offset[i] = 0;
383 
384     // TODO(jingning): Placeholder - update the arf boost.
385     gf_group->arf_boost[i] = 500;
386     gf_group->layer_depth[i] = frame.layer_depth;
387     gf_group->max_layer_depth =
388         AOMMAX(frame.layer_depth, gf_group->max_layer_depth);
389     gf_group->refbuf_state[i] =
390         frame.is_key_frame ? REFBUF_RESET : REFBUF_UPDATE;
391 
392     std::fill_n(gf_group->ref_frame_list[i], REF_FRAMES, -1);
393     gf_group->update_ref_idx[i] = -1;
394     for (int ref_idx = 0;
395          ref_idx < static_cast<int>(frame.ref_frame_list.size()); ++ref_idx) {
396       int ref_frame = static_cast<int>(frame.ref_frame_list[ref_idx].name);
397       gf_group->ref_frame_list[i][ref_frame] =
398           static_cast<int8_t>(frame.ref_frame_list[ref_idx].index);
399     }
400     gf_group->update_ref_idx[i] = frame.update_ref_idx;
401     gf_group->primary_ref_idx[i] = frame.primary_ref_frame.index;
402     ++i;
403   }
404   ppi->cpi->gf_frame_index = 0;
405 }
406 
DuckyEncodeInfoSetEncodeFrameDecision(DuckyEncodeInfo * ducky_encode_info,const EncodeFrameDecision & decision)407 static void DuckyEncodeInfoSetEncodeFrameDecision(
408     DuckyEncodeInfo *ducky_encode_info, const EncodeFrameDecision &decision) {
409   DuckyEncodeFrameInfo *frame_info = &ducky_encode_info->frame_info;
410   *frame_info = {};
411   frame_info->qp_mode = static_cast<DUCKY_ENCODE_FRAME_MODE>(decision.qp_mode);
412   frame_info->gop_mode = static_cast<DUCKY_ENCODE_GOP_MODE>(decision.gop_mode);
413   frame_info->q_index = decision.parameters.q_index;
414   frame_info->rdmult = decision.parameters.rdmult;
415   const size_t num_superblocks =
416       decision.parameters.superblock_encode_params.size();
417   frame_info->delta_q_enabled = 0;
418   if (num_superblocks > 1) {
419     frame_info->delta_q_enabled = 1;
420     frame_info->superblock_encode_qindex = new int[num_superblocks];
421     frame_info->superblock_encode_rdmult = new int[num_superblocks];
422     for (size_t i = 0; i < num_superblocks; ++i) {
423       frame_info->superblock_encode_qindex[i] =
424           decision.parameters.superblock_encode_params[i].q_index;
425       frame_info->superblock_encode_rdmult[i] =
426           decision.parameters.superblock_encode_params[i].rdmult;
427     }
428   }
429 }
430 
DuckyEncodeInfoGetEncodeFrameResult(const DuckyEncodeInfo * ducky_encode_info,EncodeFrameResult * result)431 static void DuckyEncodeInfoGetEncodeFrameResult(
432     const DuckyEncodeInfo *ducky_encode_info, EncodeFrameResult *result) {
433   const DuckyEncodeFrameResult &frame_result = ducky_encode_info->frame_result;
434   result->global_order_idx = frame_result.global_order_idx;
435   result->q_index = frame_result.q_index;
436   result->rdmult = frame_result.rdmult;
437   result->rate = frame_result.rate;
438   result->dist = frame_result.dist;
439   result->psnr = frame_result.psnr;
440 }
441 
WriteObu(AV1_PRIMARY * ppi,AV1_COMP_DATA * cpi_data)442 static void WriteObu(AV1_PRIMARY *ppi, AV1_COMP_DATA *cpi_data) {
443   AV1_COMP *const cpi = ppi->cpi;
444   uint32_t obu_header_size = 1;
445   const uint32_t obu_payload_size = 0;
446   const size_t length_field_size = aom_uleb_size_in_bytes(obu_payload_size);
447 
448   const size_t move_offset = obu_header_size + length_field_size;
449   memmove(cpi_data->cx_data + move_offset, cpi_data->cx_data,
450           cpi_data->frame_size);
451   obu_header_size =
452       av1_write_obu_header(&ppi->level_params, &cpi->frame_header_count,
453                            OBU_TEMPORAL_DELIMITER, 0, cpi_data->cx_data);
454 
455   // OBUs are preceded/succeeded by an unsigned leb128 coded integer.
456   if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size,
457                               cpi_data->cx_data) != AOM_CODEC_OK) {
458     aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
459   }
460 
461   cpi_data->frame_size +=
462       obu_header_size + obu_payload_size + length_field_size;
463 }
464 
ObtainTplStats(const GopStruct gop_struct,bool rate_dist_present)465 TplGopStats DuckyEncode::ObtainTplStats(const GopStruct gop_struct,
466                                         bool rate_dist_present) {
467   TplGopStats tpl_gop_stats;
468 
469   AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
470   const uint8_t block_mis_log2 = ppi->tpl_data.tpl_stats_block_mis_log2;
471 
472   for (size_t idx = 0; idx < gop_struct.gop_frame_list.size(); ++idx) {
473     TplFrameStats tpl_frame_stats = {};
474     tpl_frame_stats.rate_dist_present = rate_dist_present;
475 
476     TplDepFrame *tpl_frame = &ppi->tpl_data.tpl_frame[idx];
477     if (gop_struct.gop_frame_list[idx].update_type == GopFrameType::kOverlay ||
478         gop_struct.gop_frame_list[idx].update_type ==
479             GopFrameType::kIntermediateOverlay) {
480       tpl_gop_stats.frame_stats_list.push_back(tpl_frame_stats);
481       continue;
482     }
483 
484     int ref_frame_index_mapping[REF_FRAMES] = { 0 };
485     const GopFrame &gop_frame = gop_struct.gop_frame_list[idx];
486 
487     for (auto &rf : gop_frame.ref_frame_list) {
488       ref_frame_index_mapping[static_cast<int>(rf.name)] = rf.index;
489     }
490 
491     const int mi_rows = tpl_frame->mi_rows;
492     const int mi_cols = tpl_frame->mi_cols;
493     const int tpl_frame_stride = tpl_frame->stride;
494     tpl_frame_stats.frame_height = mi_rows * MI_SIZE;
495     tpl_frame_stats.frame_width = mi_cols * MI_SIZE;
496     tpl_frame_stats.min_block_size = (1 << block_mis_log2) * MI_SIZE;
497 
498     const int mi_step = 1 << block_mis_log2;
499     for (int mi_row = 0; mi_row < mi_rows; mi_row += mi_step) {
500       for (int mi_col = 0; mi_col < mi_cols; mi_col += mi_step) {
501         int tpl_blk_pos = (mi_row >> block_mis_log2) * tpl_frame_stride +
502                           (mi_col >> block_mis_log2);
503         TplDepStats *tpl_stats_ptr = &tpl_frame->tpl_stats_ptr[tpl_blk_pos];
504 
505         TplBlockStats block_stats;
506         block_stats.row = mi_row * MI_SIZE;
507         block_stats.col = mi_col * MI_SIZE;
508         block_stats.height = (1 << block_mis_log2) * MI_SIZE;
509         block_stats.width = (1 << block_mis_log2) * MI_SIZE;
510 
511         block_stats.inter_cost =
512             RDCOST(tpl_frame->base_rdmult, tpl_stats_ptr->recrf_rate,
513                    tpl_stats_ptr->recrf_dist);
514         block_stats.intra_cost =
515             RDCOST(tpl_frame->base_rdmult, tpl_stats_ptr->intra_rate,
516                    tpl_stats_ptr->intra_dist);
517 
518         if (tpl_frame_stats.rate_dist_present) {
519           block_stats.recrf_dist = tpl_stats_ptr->recrf_dist;
520           block_stats.recrf_rate = tpl_stats_ptr->recrf_rate;
521           block_stats.intra_pred_err = tpl_stats_ptr->intra_sse;
522           block_stats.inter_pred_err = tpl_stats_ptr->recrf_sse;
523         }
524 
525         block_stats.ref_frame_index = { -1, -1 };
526 
527         for (int i = 0; i < kBlockRefCount; ++i) {
528           if (tpl_stats_ptr->ref_frame_index[i] >= 0) {
529             block_stats.ref_frame_index[i] =
530                 ref_frame_index_mapping[tpl_stats_ptr->ref_frame_index[i] + 1];
531             block_stats.mv[i] = {
532               tpl_stats_ptr->mv[tpl_stats_ptr->ref_frame_index[i]].as_mv.row,
533               tpl_stats_ptr->mv[tpl_stats_ptr->ref_frame_index[i]].as_mv.col, 3
534             };
535           }
536         }
537         tpl_frame_stats.block_stats_list.push_back(block_stats);
538       }
539     }
540 
541     tpl_gop_stats.frame_stats_list.push_back(tpl_frame_stats);
542   }
543 
544   return tpl_gop_stats;
545 }
546 
547 // Obtain TPL stats through ducky_encode.
548 // TODO(jianj): Populate rate_dist_present flag through qmode_rc_encoder
ComputeTplStats(const std::vector<FIRSTPASS_STATS> & stats_list,const GopStructList & gop_list,const GopEncodeInfoList & gop_encode_info_list)549 std::vector<TplGopStats> DuckyEncode::ComputeTplStats(
550     const std::vector<FIRSTPASS_STATS> &stats_list,
551     const GopStructList &gop_list,
552     const GopEncodeInfoList &gop_encode_info_list) {
553   StartEncode(stats_list);
554   std::vector<TplGopStats> tpl_gop_stats_list;
555   AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
556   const VideoInfo &video_info = impl_ptr_->video_info;
557   write_temp_delimiter_ = true;
558   AllocateBitstreamBuffer(video_info);
559 
560   // Go through each gop and encode each frame in the gop
561   for (size_t i = 0; i < gop_list.size(); ++i) {
562     const aom::GopStruct &gop_struct = gop_list[i];
563     const aom::GopEncodeInfo &gop_encode_info = gop_encode_info_list[i];
564 
565     DuckyEncodeInfoSetGopStruct(ppi, gop_struct, gop_encode_info);
566 
567     aom::TplGopStats tpl_gop_stats;
568     for (auto &frame_param : gop_encode_info.param_list) {
569       // encoding frame frame_number
570       aom::EncodeFrameDecision frame_decision = { aom::EncodeFrameMode::kQindex,
571                                                   aom::EncodeGopMode::kGopRcl,
572                                                   frame_param };
573       EncodeFrame(frame_decision);
574       if (ppi->cpi->common.show_frame) pending_ctx_size_ = 0;
575       write_temp_delimiter_ = ppi->cpi->common.show_frame;
576     }
577     // The rate_dist_present needs to be populated.
578     tpl_gop_stats = ObtainTplStats(gop_struct, 0);
579     tpl_gop_stats_list.push_back(tpl_gop_stats);
580   }
581   EndEncode();
582   return tpl_gop_stats_list;
583 }
584 
ComputeTwoPassTplStats(const std::vector<FIRSTPASS_STATS> & stats_list,const GopStructList & gop_list,const GopEncodeInfoList & gop_encode_info_list,const GopEncodeInfoList & alt_gop_encode_info_list)585 std::vector<TplGopStats> DuckyEncode::ComputeTwoPassTplStats(
586     const std::vector<FIRSTPASS_STATS> &stats_list,
587     const GopStructList &gop_list,
588     const GopEncodeInfoList &gop_encode_info_list,
589     const GopEncodeInfoList &alt_gop_encode_info_list) {
590   std::vector<TplGopStats> first_tpl_gop_stats_list =
591       ComputeTplStats(stats_list, gop_list, gop_encode_info_list);
592   const std::vector<TplGopStats> second_tpl_gop_stats_list =
593       ComputeTplStats(stats_list, gop_list, alt_gop_encode_info_list);
594   assert(first_tpl_gop_stats_list.size() == second_tpl_gop_stats_list.size());
595 
596   // Set alternate_block_stats_list in first_tpl_gop_stats_list
597   // and return first_tpl_gop_stats_list
598   for (size_t i = 0; i < first_tpl_gop_stats_list.size(); ++i) {
599     for (size_t j = 0; j < first_tpl_gop_stats_list[i].frame_stats_list.size();
600          ++j) {
601       first_tpl_gop_stats_list[i]
602           .frame_stats_list[j]
603           .alternate_block_stats_list =
604           second_tpl_gop_stats_list[i].frame_stats_list[j].block_stats_list;
605     }
606   }
607   return first_tpl_gop_stats_list;
608 }
609 
610 // Conduct final encoding process.
EncodeVideo(const GopStructList & gop_list,const GopEncodeInfoList & gop_encode_info_list)611 std::vector<EncodeFrameResult> DuckyEncode::EncodeVideo(
612     const GopStructList &gop_list,
613     const GopEncodeInfoList &gop_encode_info_list) {
614   AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
615   std::vector<EncodeFrameResult> encoded_frame_list;
616   const VideoInfo &video_info = impl_ptr_->video_info;
617 
618   write_temp_delimiter_ = true;
619   AllocateBitstreamBuffer(video_info);
620 
621   // Go through each gop and encode each frame in the gop
622   for (size_t i = 0; i < gop_list.size(); ++i) {
623     const aom::GopStruct &gop_struct = gop_list[i];
624     const aom::GopEncodeInfo &gop_encode_info = gop_encode_info_list[i];
625     DuckyEncodeInfoSetGopStruct(ppi, gop_struct, gop_encode_info);
626 
627     for (auto &frame_param : gop_encode_info.param_list) {
628       aom::EncodeFrameDecision frame_decision = { aom::EncodeFrameMode::kQindex,
629                                                   aom::EncodeGopMode::kGopRcl,
630                                                   frame_param };
631       EncodeFrameResult temp_result = EncodeFrame(frame_decision);
632       if (ppi->cpi->common.show_frame) {
633         bitstream_buf_.resize(pending_ctx_size_);
634         EncodeFrameResult encode_frame_result = temp_result;
635         encode_frame_result.bitstream_buf = bitstream_buf_;
636         encoded_frame_list.push_back(encode_frame_result);
637 
638         AllocateBitstreamBuffer(video_info);
639       }
640       write_temp_delimiter_ = ppi->cpi->common.show_frame;
641     }
642   }
643 
644   return encoded_frame_list;
645 }
646 
EncodeFrame(const EncodeFrameDecision & decision)647 EncodeFrameResult DuckyEncode::EncodeFrame(
648     const EncodeFrameDecision &decision) {
649   EncodeFrameResult encode_frame_result = {};
650   encode_frame_result.bitstream_buf = bitstream_buf_;
651   AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
652   aom_image_t *img = &impl_ptr_->enc_resource.img;
653   AV1_COMP *const cpi = ppi->cpi;
654   struct lookahead_ctx *lookahead = ppi->lookahead;
655 
656   while (!av1_lookahead_full(lookahead)) {
657     if (ReadFrame(&impl_ptr_->input, img)) {
658       YV12_BUFFER_CONFIG sd;
659       image2yuvconfig(img, &sd);
660       int64_t ts_start = impl_ptr_->enc_resource.lookahead_push_count;
661       int64_t ts_end = ts_start + 1;
662       av1_lookahead_push(lookahead, &sd, ts_start, ts_end,
663                          /*use_highbitdepth=*/0, /*flags=*/0);
664       ++impl_ptr_->enc_resource.lookahead_push_count;
665     } else {
666       break;
667     }
668   }
669 
670   AV1_COMP_DATA cpi_data = {};
671   cpi_data.cx_data = bitstream_buf_.data() + pending_ctx_size_;
672   cpi_data.cx_data_sz = bitstream_buf_.size() - pending_ctx_size_;
673   cpi_data.frame_size = 0;
674   cpi_data.flush = 1;
675   // ts_frame_start and ts_frame_end are not as important since we are focusing
676   // on q mode
677   cpi_data.ts_frame_start = impl_ptr_->enc_resource.encode_frame_count;
678   cpi_data.ts_frame_end = cpi_data.ts_frame_start + 1;
679   cpi_data.pop_lookahead = 1;
680   cpi_data.timestamp_ratio = &impl_ptr_->timestamp_ratio;
681   ++impl_ptr_->enc_resource.encode_frame_count;
682 
683   av1_compute_num_workers_for_mt(cpi);
684   av1_init_frame_mt(ppi, cpi);
685 
686   DuckyEncodeInfoSetEncodeFrameDecision(&cpi->ducky_encode_info, decision);
687   const int status = av1_get_compressed_data(cpi, &cpi_data);
688 
689   if (write_temp_delimiter_) WriteObu(ppi, &cpi_data);
690   (void)status;
691   assert(status == static_cast<int>(AOM_CODEC_OK));
692   DuckyEncodeInfoGetEncodeFrameResult(&cpi->ducky_encode_info,
693                                       &encode_frame_result);
694   av1_post_encode_updates(cpi, &cpi_data);
695   if (cpi->common.show_frame) {
696     // decrement frames_left counter
697     ppi->frames_left = AOMMAX(0, ppi->frames_left - 1);
698   }
699 
700   pending_ctx_size_ += cpi_data.frame_size;
701 
702   fprintf(stderr, "frame %d, qp = %d, size %d, PSNR %f\n",
703           encode_frame_result.global_order_idx, encode_frame_result.q_index,
704           encode_frame_result.rate, encode_frame_result.psnr);
705   delete[] cpi->ducky_encode_info.frame_info.superblock_encode_qindex;
706   delete[] cpi->ducky_encode_info.frame_info.superblock_encode_rdmult;
707   return encode_frame_result;
708 }
709 
EndEncode()710 void DuckyEncode::EndEncode() { FreeEncoder(); }
711 
AllocateBitstreamBuffer(const VideoInfo & video_info)712 void DuckyEncode::AllocateBitstreamBuffer(const VideoInfo &video_info) {
713   pending_ctx_size_ = 0;
714   // TODO(angiebird): Set bitstream_buf size to a conservatve upperbound.
715   bitstream_buf_.assign(
716       video_info.frame_width * video_info.frame_height * 3 * 8, 0);
717 }
718 }  // namespace aom
719