• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2019, Alliance for Open Media. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 //  This is an example demonstrating how to implement a multi-layer AOM
12 //  encoding scheme for RTC video applications.
13 
14 #include <assert.h>
15 #include <limits.h>
16 #include <math.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include <memory>
22 
23 #include "config/aom_config.h"
24 
25 #if CONFIG_AV1_DECODER
26 #include "aom/aom_decoder.h"
27 #endif
28 #include "aom/aom_encoder.h"
29 #include "aom/aomcx.h"
30 #include "common/args.h"
31 #include "common/tools_common.h"
32 #include "common/video_writer.h"
33 #include "examples/encoder_util.h"
34 #include "aom_ports/aom_timer.h"
35 #include "av1/ratectrl_rtc.h"
36 
37 #define OPTION_BUFFER_SIZE 1024
38 
39 typedef struct {
40   const char *output_filename;
41   char options[OPTION_BUFFER_SIZE];
42   struct AvxInputContext input_ctx;
43   int speed;
44   int aq_mode;
45   int layering_mode;
46   int output_obu;
47   int decode;
48   int tune_content;
49   int show_psnr;
50   bool use_external_rc;
51 } AppInput;
52 
53 typedef enum {
54   QUANTIZER = 0,
55   BITRATE,
56   SCALE_FACTOR,
57   AUTO_ALT_REF,
58   ALL_OPTION_TYPES
59 } LAYER_OPTION_TYPE;
60 
61 static const arg_def_t outputfile =
62     ARG_DEF("o", "output", 1, "Output filename");
63 static const arg_def_t frames_arg =
64     ARG_DEF("f", "frames", 1, "Number of frames to encode");
65 static const arg_def_t threads_arg =
66     ARG_DEF("th", "threads", 1, "Number of threads to use");
67 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
68 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
69 static const arg_def_t timebase_arg =
70     ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
71 static const arg_def_t bitrate_arg = ARG_DEF(
72     "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
73 static const arg_def_t spatial_layers_arg =
74     ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
75 static const arg_def_t temporal_layers_arg =
76     ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
77 static const arg_def_t layering_mode_arg =
78     ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
79 static const arg_def_t kf_dist_arg =
80     ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
81 static const arg_def_t scale_factors_arg =
82     ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
83 static const arg_def_t min_q_arg =
84     ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
85 static const arg_def_t max_q_arg =
86     ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
87 static const arg_def_t speed_arg =
88     ARG_DEF("sp", "speed", 1, "Speed configuration");
89 static const arg_def_t aqmode_arg =
90     ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
91 static const arg_def_t bitrates_arg =
92     ARG_DEF("bl", "bitrates", 1,
93             "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
94 static const arg_def_t dropframe_thresh_arg =
95     ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
96 static const arg_def_t error_resilient_arg =
97     ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
98 static const arg_def_t output_obu_arg =
99     ARG_DEF(NULL, "output-obu", 1,
100             "Write OBUs when set to 1. Otherwise write IVF files.");
101 static const arg_def_t test_decode_arg =
102     ARG_DEF(NULL, "test-decode", 1,
103             "Attempt to test decoding the output when set to 1. Default is 1.");
104 static const arg_def_t psnr_arg =
105     ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
106 static const arg_def_t ext_rc_arg =
107     ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
108 static const struct arg_enum_list tune_content_enum[] = {
109   { "default", AOM_CONTENT_DEFAULT },
110   { "screen", AOM_CONTENT_SCREEN },
111   { "film", AOM_CONTENT_FILM },
112   { NULL, 0 }
113 };
114 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
115     NULL, "tune-content", 1, "Tune content type", tune_content_enum);
116 
117 #if CONFIG_AV1_HIGHBITDEPTH
118 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
119                                                       { "10", AOM_BITS_10 },
120                                                       { NULL, 0 } };
121 
122 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
123     "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
124 #endif  // CONFIG_AV1_HIGHBITDEPTH
125 
126 static const arg_def_t *svc_args[] = {
127   &frames_arg,          &outputfile,     &width_arg,
128   &height_arg,          &timebase_arg,   &bitrate_arg,
129   &spatial_layers_arg,  &kf_dist_arg,    &scale_factors_arg,
130   &min_q_arg,           &max_q_arg,      &temporal_layers_arg,
131   &layering_mode_arg,   &threads_arg,    &aqmode_arg,
132 #if CONFIG_AV1_HIGHBITDEPTH
133   &bitdepth_arg,
134 #endif
135   &speed_arg,           &bitrates_arg,   &dropframe_thresh_arg,
136   &error_resilient_arg, &output_obu_arg, &test_decode_arg,
137   &tune_content_arg,    &psnr_arg,       NULL,
138 };
139 
140 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
141 
142 static const char *exec_name;
143 
usage_exit(void)144 void usage_exit(void) {
145   fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
146           exec_name);
147   fprintf(stderr, "Options:\n");
148   arg_show_usage(stderr, svc_args);
149   exit(EXIT_FAILURE);
150 }
151 
file_is_y4m(const char detect[4])152 static int file_is_y4m(const char detect[4]) {
153   return memcmp(detect, "YUV4", 4) == 0;
154 }
155 
fourcc_is_ivf(const char detect[4])156 static int fourcc_is_ivf(const char detect[4]) {
157   if (memcmp(detect, "DKIF", 4) == 0) {
158     return 1;
159   }
160   return 0;
161 }
162 
163 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
164                                                          1 };
165 
166 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
167 
open_input_file(struct AvxInputContext * input,aom_chroma_sample_position_t csp)168 static void open_input_file(struct AvxInputContext *input,
169                             aom_chroma_sample_position_t csp) {
170   /* Parse certain options from the input file, if possible */
171   input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
172                                              : set_binary_mode(stdin);
173 
174   if (!input->file) fatal("Failed to open input file");
175 
176   if (!fseeko(input->file, 0, SEEK_END)) {
177     /* Input file is seekable. Figure out how long it is, so we can get
178      * progress info.
179      */
180     input->length = ftello(input->file);
181     rewind(input->file);
182   }
183 
184   /* Default to 1:1 pixel aspect ratio. */
185   input->pixel_aspect_ratio.numerator = 1;
186   input->pixel_aspect_ratio.denominator = 1;
187 
188   /* For RAW input sources, these bytes will applied on the first frame
189    *  in read_frame().
190    */
191   input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
192   input->detect.position = 0;
193 
194   if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
195     if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
196                        input->only_i420) >= 0) {
197       input->file_type = FILE_TYPE_Y4M;
198       input->width = input->y4m.pic_w;
199       input->height = input->y4m.pic_h;
200       input->pixel_aspect_ratio.numerator = input->y4m.par_n;
201       input->pixel_aspect_ratio.denominator = input->y4m.par_d;
202       input->framerate.numerator = input->y4m.fps_n;
203       input->framerate.denominator = input->y4m.fps_d;
204       input->fmt = input->y4m.aom_fmt;
205       input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
206     } else {
207       fatal("Unsupported Y4M stream.");
208     }
209   } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
210     fatal("IVF is not supported as input.");
211   } else {
212     input->file_type = FILE_TYPE_RAW;
213   }
214 }
215 
extract_option(LAYER_OPTION_TYPE type,char * input,int * value0,int * value1)216 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
217                                       int *value0, int *value1) {
218   if (type == SCALE_FACTOR) {
219     *value0 = (int)strtol(input, &input, 10);
220     if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
221     *value1 = (int)strtol(input, &input, 10);
222 
223     if (*value0 < option_min_values[SCALE_FACTOR] ||
224         *value1 < option_min_values[SCALE_FACTOR] ||
225         *value0 > option_max_values[SCALE_FACTOR] ||
226         *value1 > option_max_values[SCALE_FACTOR] ||
227         *value0 > *value1)  // num shouldn't be greater than den
228       return AOM_CODEC_INVALID_PARAM;
229   } else {
230     *value0 = atoi(input);
231     if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
232       return AOM_CODEC_INVALID_PARAM;
233   }
234   return AOM_CODEC_OK;
235 }
236 
parse_layer_options_from_string(aom_svc_params_t * svc_params,LAYER_OPTION_TYPE type,const char * input,int * option0,int * option1)237 static aom_codec_err_t parse_layer_options_from_string(
238     aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
239     int *option0, int *option1) {
240   aom_codec_err_t res = AOM_CODEC_OK;
241   char *input_string;
242   char *token;
243   const char *delim = ",";
244   int num_layers = svc_params->number_spatial_layers;
245   int i = 0;
246 
247   if (type == BITRATE)
248     num_layers =
249         svc_params->number_spatial_layers * svc_params->number_temporal_layers;
250 
251   if (input == NULL || option0 == NULL ||
252       (option1 == NULL && type == SCALE_FACTOR))
253     return AOM_CODEC_INVALID_PARAM;
254 
255   const size_t input_length = strlen(input);
256   input_string = reinterpret_cast<char *>(malloc(input_length + 1));
257   if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
258   memcpy(input_string, input, input_length + 1);
259   token = strtok(input_string, delim);  // NOLINT
260   for (i = 0; i < num_layers; ++i) {
261     if (token != NULL) {
262       res = extract_option(type, token, option0 + i, option1 + i);
263       if (res != AOM_CODEC_OK) break;
264       token = strtok(NULL, delim);  // NOLINT
265     } else {
266       res = AOM_CODEC_INVALID_PARAM;
267       break;
268     }
269   }
270   free(input_string);
271   return res;
272 }
273 
parse_command_line(int argc,const char ** argv_,AppInput * app_input,aom_svc_params_t * svc_params,aom_codec_enc_cfg_t * enc_cfg)274 static void parse_command_line(int argc, const char **argv_,
275                                AppInput *app_input,
276                                aom_svc_params_t *svc_params,
277                                aom_codec_enc_cfg_t *enc_cfg) {
278   struct arg arg;
279   char **argv = NULL;
280   char **argi = NULL;
281   char **argj = NULL;
282   char string_options[1024] = { 0 };
283 
284   // Default settings
285   svc_params->number_spatial_layers = 1;
286   svc_params->number_temporal_layers = 1;
287   app_input->layering_mode = 0;
288   app_input->output_obu = 0;
289   app_input->decode = 1;
290   enc_cfg->g_threads = 1;
291   enc_cfg->rc_end_usage = AOM_CBR;
292 
293   // process command line options
294   argv = argv_dup(argc - 1, argv_ + 1);
295   if (!argv) {
296     fprintf(stderr, "Error allocating argument list\n");
297     exit(EXIT_FAILURE);
298   }
299   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
300     arg.argv_step = 1;
301 
302     if (arg_match(&arg, &outputfile, argi)) {
303       app_input->output_filename = arg.val;
304     } else if (arg_match(&arg, &width_arg, argi)) {
305       enc_cfg->g_w = arg_parse_uint(&arg);
306     } else if (arg_match(&arg, &height_arg, argi)) {
307       enc_cfg->g_h = arg_parse_uint(&arg);
308     } else if (arg_match(&arg, &timebase_arg, argi)) {
309       enc_cfg->g_timebase = arg_parse_rational(&arg);
310     } else if (arg_match(&arg, &bitrate_arg, argi)) {
311       enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
312     } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
313       svc_params->number_spatial_layers = arg_parse_uint(&arg);
314     } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
315       svc_params->number_temporal_layers = arg_parse_uint(&arg);
316     } else if (arg_match(&arg, &speed_arg, argi)) {
317       app_input->speed = arg_parse_uint(&arg);
318       if (app_input->speed > 11) {
319         aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
320       }
321     } else if (arg_match(&arg, &aqmode_arg, argi)) {
322       app_input->aq_mode = arg_parse_uint(&arg);
323     } else if (arg_match(&arg, &threads_arg, argi)) {
324       enc_cfg->g_threads = arg_parse_uint(&arg);
325     } else if (arg_match(&arg, &layering_mode_arg, argi)) {
326       app_input->layering_mode = arg_parse_int(&arg);
327     } else if (arg_match(&arg, &kf_dist_arg, argi)) {
328       enc_cfg->kf_min_dist = arg_parse_uint(&arg);
329       enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
330     } else if (arg_match(&arg, &scale_factors_arg, argi)) {
331       aom_codec_err_t res = parse_layer_options_from_string(
332           svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
333           svc_params->scaling_factor_den);
334       if (res != AOM_CODEC_OK) {
335         die("Failed to parse scale factors: %s\n",
336             aom_codec_err_to_string(res));
337       }
338     } else if (arg_match(&arg, &min_q_arg, argi)) {
339       enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
340     } else if (arg_match(&arg, &max_q_arg, argi)) {
341       enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
342 #if CONFIG_AV1_HIGHBITDEPTH
343     } else if (arg_match(&arg, &bitdepth_arg, argi)) {
344       enc_cfg->g_bit_depth =
345           static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
346       switch (enc_cfg->g_bit_depth) {
347         case AOM_BITS_8:
348           enc_cfg->g_input_bit_depth = 8;
349           enc_cfg->g_profile = 0;
350           break;
351         case AOM_BITS_10:
352           enc_cfg->g_input_bit_depth = 10;
353           enc_cfg->g_profile = 0;
354           break;
355         default:
356           die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
357       }
358 #endif  // CONFIG_VP9_HIGHBITDEPTH
359     } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
360       enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
361     } else if (arg_match(&arg, &error_resilient_arg, argi)) {
362       enc_cfg->g_error_resilient = arg_parse_uint(&arg);
363       if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
364         die("Invalid value for error resilient (0, 1): %d.",
365             enc_cfg->g_error_resilient);
366     } else if (arg_match(&arg, &output_obu_arg, argi)) {
367       app_input->output_obu = arg_parse_uint(&arg);
368       if (app_input->output_obu != 0 && app_input->output_obu != 1)
369         die("Invalid value for obu output flag (0, 1): %d.",
370             app_input->output_obu);
371     } else if (arg_match(&arg, &test_decode_arg, argi)) {
372       app_input->decode = arg_parse_uint(&arg);
373       if (app_input->decode != 0 && app_input->decode != 1)
374         die("Invalid value for test decode flag (0, 1): %d.",
375             app_input->decode);
376     } else if (arg_match(&arg, &tune_content_arg, argi)) {
377       app_input->tune_content = arg_parse_enum_or_int(&arg);
378       printf("tune content %d\n", app_input->tune_content);
379     } else if (arg_match(&arg, &psnr_arg, argi)) {
380       app_input->show_psnr = 1;
381     } else if (arg_match(&arg, &ext_rc_arg, argi)) {
382       app_input->use_external_rc = true;
383     } else {
384       ++argj;
385     }
386   }
387 
388   // Total bitrate needs to be parsed after the number of layers.
389   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
390     arg.argv_step = 1;
391     if (arg_match(&arg, &bitrates_arg, argi)) {
392       aom_codec_err_t res = parse_layer_options_from_string(
393           svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
394       if (res != AOM_CODEC_OK) {
395         die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
396       }
397     } else {
398       ++argj;
399     }
400   }
401 
402   // There will be a space in front of the string options
403   if (strlen(string_options) > 0)
404     strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
405 
406   // Check for unrecognized options
407   for (argi = argv; *argi; ++argi)
408     if (argi[0][0] == '-' && strlen(argi[0]) > 1)
409       die("Error: Unrecognized option %s\n", *argi);
410 
411   if (argv[0] == NULL) {
412     usage_exit();
413   }
414 
415   app_input->input_ctx.filename = argv[0];
416   free(argv);
417 
418   open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
419   if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
420     enc_cfg->g_w = app_input->input_ctx.width;
421     enc_cfg->g_h = app_input->input_ctx.height;
422   }
423 
424   if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
425       enc_cfg->g_h % 2)
426     die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
427 
428   printf(
429       "Codec %s\n"
430       "layers: %d\n"
431       "width %u, height: %u\n"
432       "num: %d, den: %d, bitrate: %u\n"
433       "gop size: %u\n",
434       aom_codec_iface_name(aom_codec_av1_cx()),
435       svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
436       enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
437       enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
438 }
439 
440 static int mode_to_num_temporal_layers[12] = {
441   1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
442 };
443 static int mode_to_num_spatial_layers[12] = {
444   1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
445 };
446 
447 // For rate control encoding stats.
448 struct RateControlMetrics {
449   // Number of input frames per layer.
450   int layer_input_frames[AOM_MAX_TS_LAYERS];
451   // Number of encoded non-key frames per layer.
452   int layer_enc_frames[AOM_MAX_TS_LAYERS];
453   // Framerate per layer layer (cumulative).
454   double layer_framerate[AOM_MAX_TS_LAYERS];
455   // Target average frame size per layer (per-frame-bandwidth per layer).
456   double layer_pfb[AOM_MAX_LAYERS];
457   // Actual average frame size per layer.
458   double layer_avg_frame_size[AOM_MAX_LAYERS];
459   // Average rate mismatch per layer (|target - actual| / target).
460   double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
461   // Actual encoding bitrate per layer (cumulative across temporal layers).
462   double layer_encoding_bitrate[AOM_MAX_LAYERS];
463   // Average of the short-time encoder actual bitrate.
464   // TODO(marpan): Should we add these short-time stats for each layer?
465   double avg_st_encoding_bitrate;
466   // Variance of the short-time encoder actual bitrate.
467   double variance_st_encoding_bitrate;
468   // Window (number of frames) for computing short-timee encoding bitrate.
469   int window_size;
470   // Number of window measurements.
471   int window_count;
472   int layer_target_bitrate[AOM_MAX_LAYERS];
473 };
474 
475 static const int REF_FRAMES = 8;
476 
477 static const int INTER_REFS_PER_FRAME = 7;
478 
479 // Reference frames used in this example encoder.
480 enum {
481   SVC_LAST_FRAME = 0,
482   SVC_LAST2_FRAME,
483   SVC_LAST3_FRAME,
484   SVC_GOLDEN_FRAME,
485   SVC_BWDREF_FRAME,
486   SVC_ALTREF2_FRAME,
487   SVC_ALTREF_FRAME
488 };
489 
read_frame(struct AvxInputContext * input_ctx,aom_image_t * img)490 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
491   FILE *f = input_ctx->file;
492   y4m_input *y4m = &input_ctx->y4m;
493   int shortread = 0;
494 
495   if (input_ctx->file_type == FILE_TYPE_Y4M) {
496     if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
497   } else {
498     shortread = read_yuv_frame(input_ctx, img);
499   }
500 
501   return !shortread;
502 }
503 
close_input_file(struct AvxInputContext * input)504 static void close_input_file(struct AvxInputContext *input) {
505   fclose(input->file);
506   if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
507 }
508 
509 // Note: these rate control metrics assume only 1 key frame in the
510 // sequence (i.e., first frame only). So for temporal pattern# 7
511 // (which has key frame for every frame on base layer), the metrics
512 // computation will be off/wrong.
513 // TODO(marpan): Update these metrics to account for multiple key frames
514 // in the stream.
set_rate_control_metrics(struct RateControlMetrics * rc,double framerate,int ss_number_layers,int ts_number_layers)515 static void set_rate_control_metrics(struct RateControlMetrics *rc,
516                                      double framerate, int ss_number_layers,
517                                      int ts_number_layers) {
518   int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
519   ts_rate_decimator[0] = 1;
520   if (ts_number_layers == 2) {
521     ts_rate_decimator[0] = 2;
522     ts_rate_decimator[1] = 1;
523   }
524   if (ts_number_layers == 3) {
525     ts_rate_decimator[0] = 4;
526     ts_rate_decimator[1] = 2;
527     ts_rate_decimator[2] = 1;
528   }
529   // Set the layer (cumulative) framerate and the target layer (non-cumulative)
530   // per-frame-bandwidth, for the rate control encoding stats below.
531   for (int sl = 0; sl < ss_number_layers; ++sl) {
532     int i = sl * ts_number_layers;
533     rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
534     rc->layer_pfb[i] =
535         1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
536     for (int tl = 0; tl < ts_number_layers; ++tl) {
537       i = sl * ts_number_layers + tl;
538       if (tl > 0) {
539         rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
540         rc->layer_pfb[i] =
541             1000.0 *
542             (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
543             (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
544       }
545       rc->layer_input_frames[tl] = 0;
546       rc->layer_enc_frames[tl] = 0;
547       rc->layer_encoding_bitrate[i] = 0.0;
548       rc->layer_avg_frame_size[i] = 0.0;
549       rc->layer_avg_rate_mismatch[i] = 0.0;
550     }
551   }
552   rc->window_count = 0;
553   rc->window_size = 15;
554   rc->avg_st_encoding_bitrate = 0.0;
555   rc->variance_st_encoding_bitrate = 0.0;
556 }
557 
printout_rate_control_summary(struct RateControlMetrics * rc,int frame_cnt,int ss_number_layers,int ts_number_layers)558 static void printout_rate_control_summary(struct RateControlMetrics *rc,
559                                           int frame_cnt, int ss_number_layers,
560                                           int ts_number_layers) {
561   int tot_num_frames = 0;
562   double perc_fluctuation = 0.0;
563   printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
564   printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
565   for (int sl = 0; sl < ss_number_layers; ++sl) {
566     tot_num_frames = 0;
567     for (int tl = 0; tl < ts_number_layers; ++tl) {
568       int i = sl * ts_number_layers + tl;
569       const int num_dropped =
570           tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
571                  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
572       tot_num_frames += rc->layer_input_frames[tl];
573       rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
574                                       rc->layer_encoding_bitrate[i] /
575                                       tot_num_frames;
576       rc->layer_avg_frame_size[i] =
577           rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
578       rc->layer_avg_rate_mismatch[i] =
579           100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
580       printf("For layer#: %d %d \n", sl, tl);
581       printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
582              rc->layer_encoding_bitrate[i]);
583       printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
584              rc->layer_avg_frame_size[i]);
585       printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
586       printf(
587           "Number of input frames, encoded (non-key) frames, "
588           "and perc dropped frames: %d %d %f\n",
589           rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
590           100.0 * num_dropped / rc->layer_input_frames[tl]);
591       printf("\n");
592     }
593   }
594   rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
595   rc->variance_st_encoding_bitrate =
596       rc->variance_st_encoding_bitrate / rc->window_count -
597       (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
598   perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
599                      rc->avg_st_encoding_bitrate;
600   printf("Short-time stats, for window of %d frames:\n", rc->window_size);
601   printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
602          rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
603          perc_fluctuation);
604   if (frame_cnt - 1 != tot_num_frames)
605     die("Error: Number of input frames not equal to output!\n");
606 }
607 
608 // Layer pattern configuration.
set_layer_pattern(int layering_mode,int superframe_cnt,aom_svc_layer_id_t * layer_id,aom_svc_ref_frame_config_t * ref_frame_config,aom_svc_ref_frame_comp_pred_t * ref_frame_comp_pred,int * use_svc_control,int spatial_layer_id,int is_key_frame,int ksvc_mode,int speed)609 static void set_layer_pattern(
610     int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
611     aom_svc_ref_frame_config_t *ref_frame_config,
612     aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
613     int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
614   // Setting this flag to 1 enables simplex example of
615   // RPS (Reference Picture Selection) for 1 layer.
616   int use_rps_example = 0;
617   int i;
618   int enable_longterm_temporal_ref = 1;
619   int shift = (layering_mode == 8) ? 2 : 0;
620   int simulcast_mode = (layering_mode == 11);
621   *use_svc_control = 1;
622   layer_id->spatial_layer_id = spatial_layer_id;
623   int lag_index = 0;
624   int base_count = superframe_cnt >> 2;
625   ref_frame_comp_pred->use_comp_pred[0] = 0;  // GOLDEN_LAST
626   ref_frame_comp_pred->use_comp_pred[1] = 0;  // LAST2_LAST
627   ref_frame_comp_pred->use_comp_pred[2] = 0;  // ALTREF_LAST
628   // Set the reference map buffer idx for the 7 references:
629   // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
630   // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
631   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
632   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
633   for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
634 
635   if (ksvc_mode) {
636     // Same pattern as case 9, but the reference strucutre will be constrained
637     // below.
638     layering_mode = 9;
639   }
640   switch (layering_mode) {
641     case 0:
642       if (use_rps_example == 0) {
643         // 1-layer: update LAST on every frame, reference LAST.
644         layer_id->temporal_layer_id = 0;
645         layer_id->spatial_layer_id = 0;
646         ref_frame_config->refresh[0] = 1;
647         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
648       } else {
649         // Pattern of 2 references (ALTREF and GOLDEN) trailing
650         // LAST by 4 and 8 frames, with some switching logic to
651         // sometimes only predict from the longer-term reference
652         //(golden here). This is simple example to test RPS
653         // (reference picture selection).
654         int last_idx = 0;
655         int last_idx_refresh = 0;
656         int gld_idx = 0;
657         int alt_ref_idx = 0;
658         int lag_alt = 4;
659         int lag_gld = 8;
660         layer_id->temporal_layer_id = 0;
661         layer_id->spatial_layer_id = 0;
662         int sh = 8;  // slots 0 - 7.
663         // Moving index slot for last: 0 - (sh - 1)
664         if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
665         // Moving index for refresh of last: one ahead for next frame.
666         last_idx_refresh = superframe_cnt % sh;
667         // Moving index for gld_ref, lag behind current by lag_gld
668         if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
669         // Moving index for alt_ref, lag behind LAST by lag_alt frames.
670         if (superframe_cnt > lag_alt)
671           alt_ref_idx = (superframe_cnt - lag_alt) % sh;
672         // Set the ref_idx.
673         // Default all references to slot for last.
674         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
675           ref_frame_config->ref_idx[i] = last_idx;
676         // Set the ref_idx for the relevant references.
677         ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
678         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
679         ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
680         ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
681         // Refresh this slot, which will become LAST on next frame.
682         ref_frame_config->refresh[last_idx_refresh] = 1;
683         // Reference LAST, ALTREF, and GOLDEN
684         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
685         ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
686         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
687         // Switch to only GOLDEN every 300 frames.
688         if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
689           ref_frame_config->reference[SVC_LAST_FRAME] = 0;
690           ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
691           ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
692           // Test if the long-term is LAST instead, this is just a renaming
693           // but its tests if encoder behaves the same, whether its
694           // LAST or GOLDEN.
695           if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
696             ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
697             ref_frame_config->reference[SVC_LAST_FRAME] = 1;
698             ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
699             ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
700           }
701         }
702       }
703       break;
704     case 1:
705       // 2-temporal layer.
706       //    1    3    5
707       //  0    2    4
708       // Keep golden fixed at slot 3.
709       base_count = superframe_cnt >> 1;
710       ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
711       // Cyclically refresh slots 5, 6, 7, for lag alt ref.
712       lag_index = 5;
713       if (base_count > 0) {
714         lag_index = 5 + (base_count % 3);
715         if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
716       }
717       // Set the altref slot to lag_index.
718       ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
719       if (superframe_cnt % 2 == 0) {
720         layer_id->temporal_layer_id = 0;
721         // Update LAST on layer 0, reference LAST.
722         ref_frame_config->refresh[0] = 1;
723         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
724         // Refresh lag_index slot, needed for lagging golen.
725         ref_frame_config->refresh[lag_index] = 1;
726         // Refresh GOLDEN every x base layer frames.
727         if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
728       } else {
729         layer_id->temporal_layer_id = 1;
730         // No updates on layer 1, reference LAST (TL0).
731         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
732       }
733       // Always reference golden and altref on TL0.
734       if (layer_id->temporal_layer_id == 0) {
735         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
736         ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
737       }
738       break;
739     case 2:
740       // 3-temporal layer:
741       //   1    3   5    7
742       //     2        6
743       // 0        4        8
744       if (superframe_cnt % 4 == 0) {
745         // Base layer.
746         layer_id->temporal_layer_id = 0;
747         // Update LAST on layer 0, reference LAST.
748         ref_frame_config->refresh[0] = 1;
749         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
750       } else if ((superframe_cnt - 1) % 4 == 0) {
751         layer_id->temporal_layer_id = 2;
752         // First top layer: no updates, only reference LAST (TL0).
753         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
754       } else if ((superframe_cnt - 2) % 4 == 0) {
755         layer_id->temporal_layer_id = 1;
756         // Middle layer (TL1): update LAST2, only reference LAST (TL0).
757         ref_frame_config->refresh[1] = 1;
758         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
759       } else if ((superframe_cnt - 3) % 4 == 0) {
760         layer_id->temporal_layer_id = 2;
761         // Second top layer: no updates, only reference LAST.
762         // Set buffer idx for LAST to slot 1, since that was the slot
763         // updated in previous frame. So LAST is TL1 frame.
764         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
765         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
766         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
767       }
768       break;
769     case 3:
770       // 3 TL, same as above, except allow for predicting
771       // off 2 more references (GOLDEN and ALTREF), with
772       // GOLDEN updated periodically, and ALTREF lagging from
773       // LAST from ~4 frames. Both GOLDEN and ALTREF
774       // can only be updated on base temporal layer.
775 
776       // Keep golden fixed at slot 3.
777       ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
778       // Cyclically refresh slots 5, 6, 7, for lag altref.
779       lag_index = 5;
780       if (base_count > 0) {
781         lag_index = 5 + (base_count % 3);
782         if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
783       }
784       // Set the altref slot to lag_index.
785       ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
786       if (superframe_cnt % 4 == 0) {
787         // Base layer.
788         layer_id->temporal_layer_id = 0;
789         // Update LAST on layer 0, reference LAST.
790         ref_frame_config->refresh[0] = 1;
791         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
792         // Refresh GOLDEN every x ~10 base layer frames.
793         if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
794         // Refresh lag_index slot, needed for lagging altref.
795         ref_frame_config->refresh[lag_index] = 1;
796       } else if ((superframe_cnt - 1) % 4 == 0) {
797         layer_id->temporal_layer_id = 2;
798         // First top layer: no updates, only reference LAST (TL0).
799         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
800       } else if ((superframe_cnt - 2) % 4 == 0) {
801         layer_id->temporal_layer_id = 1;
802         // Middle layer (TL1): update LAST2, only reference LAST (TL0).
803         ref_frame_config->refresh[1] = 1;
804         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
805       } else if ((superframe_cnt - 3) % 4 == 0) {
806         layer_id->temporal_layer_id = 2;
807         // Second top layer: no updates, only reference LAST.
808         // Set buffer idx for LAST to slot 1, since that was the slot
809         // updated in previous frame. So LAST is TL1 frame.
810         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
811         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
812         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
813       }
814       // Every frame can reference GOLDEN AND ALTREF.
815       ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
816       ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
817       // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
818       if (speed >= 7) {
819         ref_frame_comp_pred->use_comp_pred[2] = 1;
820         ref_frame_comp_pred->use_comp_pred[0] = 1;
821       }
822       break;
823     case 4:
824       // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
825       // only reference GF (not LAST). Other frames only reference LAST.
826       //   1    3   5    7
827       //     2        6
828       // 0        4        8
829       if (superframe_cnt % 4 == 0) {
830         // Base layer.
831         layer_id->temporal_layer_id = 0;
832         // Update LAST on layer 0, only reference LAST.
833         ref_frame_config->refresh[0] = 1;
834         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
835       } else if ((superframe_cnt - 1) % 4 == 0) {
836         layer_id->temporal_layer_id = 2;
837         // First top layer: no updates, only reference LAST (TL0).
838         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
839       } else if ((superframe_cnt - 2) % 4 == 0) {
840         layer_id->temporal_layer_id = 1;
841         // Middle layer (TL1): update GF, only reference LAST (TL0).
842         ref_frame_config->refresh[3] = 1;
843         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
844       } else if ((superframe_cnt - 3) % 4 == 0) {
845         layer_id->temporal_layer_id = 2;
846         // Second top layer: no updates, only reference GF.
847         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
848       }
849       break;
850     case 5:
851       // 2 spatial layers, 1 temporal.
852       layer_id->temporal_layer_id = 0;
853       if (layer_id->spatial_layer_id == 0) {
854         // Reference LAST, update LAST.
855         ref_frame_config->refresh[0] = 1;
856         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
857       } else if (layer_id->spatial_layer_id == 1) {
858         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
859         // and GOLDEN to slot 0. Update slot 1 (LAST).
860         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
861         ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
862         ref_frame_config->refresh[1] = 1;
863         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
864         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
865       }
866       break;
867     case 6:
868       // 3 spatial layers, 1 temporal.
869       // Note for this case, we set the buffer idx for all references to be
870       // either LAST or GOLDEN, which are always valid references, since decoder
871       // will check if any of the 7 references is valid scale in
872       // valid_ref_frame_size().
873       layer_id->temporal_layer_id = 0;
874       if (layer_id->spatial_layer_id == 0) {
875         // Reference LAST, update LAST. Set all buffer_idx to 0.
876         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
877           ref_frame_config->ref_idx[i] = 0;
878         ref_frame_config->refresh[0] = 1;
879         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
880       } else if (layer_id->spatial_layer_id == 1) {
881         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
882         // and GOLDEN (and all other refs) to slot 0.
883         // Update slot 1 (LAST).
884         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
885           ref_frame_config->ref_idx[i] = 0;
886         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
887         ref_frame_config->refresh[1] = 1;
888         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
889         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
890       } else if (layer_id->spatial_layer_id == 2) {
891         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
892         // and GOLDEN (and all other refs) to slot 1.
893         // Update slot 2 (LAST).
894         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
895           ref_frame_config->ref_idx[i] = 1;
896         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
897         ref_frame_config->refresh[2] = 1;
898         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
899         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
900         // For 3 spatial layer case: allow for top spatial layer to use
901         // additional temporal reference. Update every 10 frames.
902         if (enable_longterm_temporal_ref) {
903           ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
904           ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
905           if (base_count % 10 == 0)
906             ref_frame_config->refresh[REF_FRAMES - 1] = 1;
907         }
908       }
909       break;
910     case 7:
911       // 2 spatial and 3 temporal layer.
912       ref_frame_config->reference[SVC_LAST_FRAME] = 1;
913       if (superframe_cnt % 4 == 0) {
914         // Base temporal layer
915         layer_id->temporal_layer_id = 0;
916         if (layer_id->spatial_layer_id == 0) {
917           // Reference LAST, update LAST
918           // Set all buffer_idx to 0
919           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
920             ref_frame_config->ref_idx[i] = 0;
921           ref_frame_config->refresh[0] = 1;
922         } else if (layer_id->spatial_layer_id == 1) {
923           // Reference LAST and GOLDEN.
924           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
925             ref_frame_config->ref_idx[i] = 0;
926           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
927           ref_frame_config->refresh[1] = 1;
928         }
929       } else if ((superframe_cnt - 1) % 4 == 0) {
930         // First top temporal enhancement layer.
931         layer_id->temporal_layer_id = 2;
932         if (layer_id->spatial_layer_id == 0) {
933           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
934             ref_frame_config->ref_idx[i] = 0;
935           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
936           ref_frame_config->refresh[3] = 1;
937         } else if (layer_id->spatial_layer_id == 1) {
938           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
939           // GOLDEN (and all other refs) to slot 3.
940           // No update.
941           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
942             ref_frame_config->ref_idx[i] = 3;
943           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
944         }
945       } else if ((superframe_cnt - 2) % 4 == 0) {
946         // Middle temporal enhancement layer.
947         layer_id->temporal_layer_id = 1;
948         if (layer_id->spatial_layer_id == 0) {
949           // Reference LAST.
950           // Set all buffer_idx to 0.
951           // Set GOLDEN to slot 5 and update slot 5.
952           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
953             ref_frame_config->ref_idx[i] = 0;
954           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
955           ref_frame_config->refresh[5 - shift] = 1;
956         } else if (layer_id->spatial_layer_id == 1) {
957           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
958           // GOLDEN (and all other refs) to slot 5.
959           // Set LAST3 to slot 6 and update slot 6.
960           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
961             ref_frame_config->ref_idx[i] = 5 - shift;
962           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
963           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
964           ref_frame_config->refresh[6 - shift] = 1;
965         }
966       } else if ((superframe_cnt - 3) % 4 == 0) {
967         // Second top temporal enhancement layer.
968         layer_id->temporal_layer_id = 2;
969         if (layer_id->spatial_layer_id == 0) {
970           // Set LAST to slot 5 and reference LAST.
971           // Set GOLDEN to slot 3 and update slot 3.
972           // Set all other buffer_idx to 0.
973           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
974             ref_frame_config->ref_idx[i] = 0;
975           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
976           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
977           ref_frame_config->refresh[3] = 1;
978         } else if (layer_id->spatial_layer_id == 1) {
979           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
980           // GOLDEN to slot 3. No update.
981           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
982             ref_frame_config->ref_idx[i] = 0;
983           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
984           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
985         }
986       }
987       break;
988     case 8:
989       // 3 spatial and 3 temporal layer.
990       // Same as case 9 but overalap in the buffer slot updates.
991       // (shift = 2). The slots 3 and 4 updated by first TL2 are
992       // reused for update in TL1 superframe.
993       // Note for this case, frame order hint must be disabled for
994       // lower resolutios (operating points > 0) to be decoedable.
995     case 9:
996       // 3 spatial and 3 temporal layer.
997       // No overlap in buffer updates between TL2 and TL1.
998       // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
999       // Set the references via the svc_ref_frame_config control.
1000       // Always reference LAST.
1001       ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1002       if (superframe_cnt % 4 == 0) {
1003         // Base temporal layer.
1004         layer_id->temporal_layer_id = 0;
1005         if (layer_id->spatial_layer_id == 0) {
1006           // Reference LAST, update LAST.
1007           // Set all buffer_idx to 0.
1008           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1009             ref_frame_config->ref_idx[i] = 0;
1010           ref_frame_config->refresh[0] = 1;
1011         } else if (layer_id->spatial_layer_id == 1) {
1012           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1013           // GOLDEN (and all other refs) to slot 0.
1014           // Update slot 1 (LAST).
1015           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1016             ref_frame_config->ref_idx[i] = 0;
1017           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1018           ref_frame_config->refresh[1] = 1;
1019         } else if (layer_id->spatial_layer_id == 2) {
1020           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1021           // GOLDEN (and all other refs) to slot 1.
1022           // Update slot 2 (LAST).
1023           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1024             ref_frame_config->ref_idx[i] = 1;
1025           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1026           ref_frame_config->refresh[2] = 1;
1027         }
1028       } else if ((superframe_cnt - 1) % 4 == 0) {
1029         // First top temporal enhancement layer.
1030         layer_id->temporal_layer_id = 2;
1031         if (layer_id->spatial_layer_id == 0) {
1032           // Reference LAST (slot 0).
1033           // Set GOLDEN to slot 3 and update slot 3.
1034           // Set all other buffer_idx to slot 0.
1035           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1036             ref_frame_config->ref_idx[i] = 0;
1037           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1038           ref_frame_config->refresh[3] = 1;
1039         } else if (layer_id->spatial_layer_id == 1) {
1040           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1041           // GOLDEN (and all other refs) to slot 3.
1042           // Set LAST2 to slot 4 and Update slot 4.
1043           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1044             ref_frame_config->ref_idx[i] = 3;
1045           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1046           ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1047           ref_frame_config->refresh[4] = 1;
1048         } else if (layer_id->spatial_layer_id == 2) {
1049           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1050           // GOLDEN (and all other refs) to slot 4.
1051           // No update.
1052           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1053             ref_frame_config->ref_idx[i] = 4;
1054           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1055         }
1056       } else if ((superframe_cnt - 2) % 4 == 0) {
1057         // Middle temporal enhancement layer.
1058         layer_id->temporal_layer_id = 1;
1059         if (layer_id->spatial_layer_id == 0) {
1060           // Reference LAST.
1061           // Set all buffer_idx to 0.
1062           // Set GOLDEN to slot 5 and update slot 5.
1063           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1064             ref_frame_config->ref_idx[i] = 0;
1065           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1066           ref_frame_config->refresh[5 - shift] = 1;
1067         } else if (layer_id->spatial_layer_id == 1) {
1068           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1069           // GOLDEN (and all other refs) to slot 5.
1070           // Set LAST3 to slot 6 and update slot 6.
1071           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1072             ref_frame_config->ref_idx[i] = 5 - shift;
1073           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1074           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1075           ref_frame_config->refresh[6 - shift] = 1;
1076         } else if (layer_id->spatial_layer_id == 2) {
1077           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1078           // GOLDEN (and all other refs) to slot 6.
1079           // Set LAST3 to slot 7 and update slot 7.
1080           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1081             ref_frame_config->ref_idx[i] = 6 - shift;
1082           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1083           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1084           ref_frame_config->refresh[7 - shift] = 1;
1085         }
1086       } else if ((superframe_cnt - 3) % 4 == 0) {
1087         // Second top temporal enhancement layer.
1088         layer_id->temporal_layer_id = 2;
1089         if (layer_id->spatial_layer_id == 0) {
1090           // Set LAST to slot 5 and reference LAST.
1091           // Set GOLDEN to slot 3 and update slot 3.
1092           // Set all other buffer_idx to 0.
1093           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1094             ref_frame_config->ref_idx[i] = 0;
1095           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1096           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1097           ref_frame_config->refresh[3] = 1;
1098         } else if (layer_id->spatial_layer_id == 1) {
1099           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1100           // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1101           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1102             ref_frame_config->ref_idx[i] = 0;
1103           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1104           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1105           ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1106           ref_frame_config->refresh[4] = 1;
1107         } else if (layer_id->spatial_layer_id == 2) {
1108           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1109           // GOLDEN to slot 4. No update.
1110           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1111             ref_frame_config->ref_idx[i] = 0;
1112           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1113           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1114         }
1115       }
1116       break;
1117     case 11:
1118       // Simulcast mode for 3 spatial and 3 temporal layers.
1119       // No inter-layer predicton, only prediction is temporal and single
1120       // reference (LAST).
1121       // No overlap in buffer slots between spatial layers. So for example,
1122       // SL0 only uses slots 0 and 1.
1123       // SL1 only uses slots 2 and 3.
1124       // SL2 only uses slots 4 and 5.
1125       // All 7 references for each inter-frame must only access buffer slots
1126       // for that spatial layer.
1127       // On key (super)frames: SL1 and SL2 must have no references set
1128       // and must refresh all the slots for that layer only (so 2 and 3
1129       // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1130       // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1131       // internally as Intra-only frames that allow that stream to be decoded.
1132       // These conditions will allow for each spatial stream to be
1133       // independently decodeable.
1134 
1135       // Initialize all references to 0 (don't use reference).
1136       for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1137         ref_frame_config->reference[i] = 0;
1138       // Initialize as no refresh/update for all slots.
1139       for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1140       for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1141         ref_frame_config->ref_idx[i] = 0;
1142 
1143       if (is_key_frame) {
1144         if (layer_id->spatial_layer_id == 0) {
1145           // Assign LAST/GOLDEN to slot 0/1.
1146           // Refesh slots 0 and 1 for SL0.
1147           // SL0: this will get set to KEY frame internally.
1148           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1149           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1150           ref_frame_config->refresh[0] = 1;
1151           ref_frame_config->refresh[1] = 1;
1152         } else if (layer_id->spatial_layer_id == 1) {
1153           // Assign LAST/GOLDEN to slot 2/3.
1154           // Refesh slots 2 and 3 for SL1.
1155           // This will get set to Intra-only frame internally.
1156           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1157           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1158           ref_frame_config->refresh[2] = 1;
1159           ref_frame_config->refresh[3] = 1;
1160         } else if (layer_id->spatial_layer_id == 2) {
1161           // Assign LAST/GOLDEN to slot 4/5.
1162           // Refresh slots 4 and 5 for SL2.
1163           // This will get set to Intra-only frame internally.
1164           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1165           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1166           ref_frame_config->refresh[4] = 1;
1167           ref_frame_config->refresh[5] = 1;
1168         }
1169       } else if (superframe_cnt % 4 == 0) {
1170         // Base temporal layer: TL0
1171         layer_id->temporal_layer_id = 0;
1172         if (layer_id->spatial_layer_id == 0) {  // SL0
1173           // Reference LAST. Assign all references to either slot
1174           // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1175           // Update slot 0 (LAST).
1176           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1177           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1178             ref_frame_config->ref_idx[i] = 1;
1179           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1180           ref_frame_config->refresh[0] = 1;
1181         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1182           // Reference LAST. Assign all references to either slot
1183           // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1184           // Update slot 2 (LAST).
1185           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1186           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1187             ref_frame_config->ref_idx[i] = 3;
1188           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1189           ref_frame_config->refresh[2] = 1;
1190         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1191           // Reference LAST. Assign all references to either slot
1192           // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1193           // Update slot 4 (LAST).
1194           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1195           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1196             ref_frame_config->ref_idx[i] = 5;
1197           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1198           ref_frame_config->refresh[4] = 1;
1199         }
1200       } else if ((superframe_cnt - 1) % 4 == 0) {
1201         // First top temporal enhancement layer: TL2
1202         layer_id->temporal_layer_id = 2;
1203         if (layer_id->spatial_layer_id == 0) {  // SL0
1204           // Reference LAST (slot 0). Assign other references to slot 1.
1205           // No update/refresh on any slots.
1206           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1207           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1208             ref_frame_config->ref_idx[i] = 1;
1209           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1210         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1211           // Reference LAST (slot 2). Assign other references to slot 3.
1212           // No update/refresh on any slots.
1213           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1214           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1215             ref_frame_config->ref_idx[i] = 3;
1216           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1217         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1218           // Reference LAST (slot 4). Assign other references to slot 4.
1219           // No update/refresh on any slots.
1220           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1221           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1222             ref_frame_config->ref_idx[i] = 5;
1223           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1224         }
1225       } else if ((superframe_cnt - 2) % 4 == 0) {
1226         // Middle temporal enhancement layer: TL1
1227         layer_id->temporal_layer_id = 1;
1228         if (layer_id->spatial_layer_id == 0) {  // SL0
1229           // Reference LAST (slot 0).
1230           // Set GOLDEN to slot 1 and update slot 1.
1231           // This will be used as reference for next TL2.
1232           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1233           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1234             ref_frame_config->ref_idx[i] = 1;
1235           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1236           ref_frame_config->refresh[1] = 1;
1237         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1238           // Reference LAST (slot 2).
1239           // Set GOLDEN to slot 3 and update slot 3.
1240           // This will be used as reference for next TL2.
1241           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1242           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1243             ref_frame_config->ref_idx[i] = 3;
1244           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1245           ref_frame_config->refresh[3] = 1;
1246         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1247           // Reference LAST (slot 4).
1248           // Set GOLDEN to slot 5 and update slot 5.
1249           // This will be used as reference for next TL2.
1250           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1251           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1252             ref_frame_config->ref_idx[i] = 5;
1253           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1254           ref_frame_config->refresh[5] = 1;
1255         }
1256       } else if ((superframe_cnt - 3) % 4 == 0) {
1257         // Second top temporal enhancement layer: TL2
1258         layer_id->temporal_layer_id = 2;
1259         if (layer_id->spatial_layer_id == 0) {  // SL0
1260           // Reference LAST (slot 1). Assign other references to slot 0.
1261           // No update/refresh on any slots.
1262           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1263           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1264             ref_frame_config->ref_idx[i] = 0;
1265           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1266         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1267           // Reference LAST (slot 3). Assign other references to slot 2.
1268           // No update/refresh on any slots.
1269           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1270           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1271             ref_frame_config->ref_idx[i] = 2;
1272           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1273         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1274           // Reference LAST (slot 5). Assign other references to slot 4.
1275           // No update/refresh on any slots.
1276           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1277           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1278             ref_frame_config->ref_idx[i] = 4;
1279           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1280         }
1281       }
1282       if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1283         // Always reference GOLDEN (inter-layer prediction).
1284         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1285         if (ksvc_mode) {
1286           // KSVC: only keep the inter-layer reference (GOLDEN) for
1287           // superframes whose base is key.
1288           if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1289         }
1290         if (is_key_frame && layer_id->spatial_layer_id > 1) {
1291           // On superframes whose base is key: remove LAST to avoid prediction
1292           // off layer two levels below.
1293           ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1294         }
1295       }
1296       // For 3 spatial layer case 8 (where there is free buffer slot):
1297       // allow for top spatial layer to use additional temporal reference.
1298       // Additional reference is only updated on base temporal layer, every
1299       // 10 TL0 frames here.
1300       if (!simulcast_mode && enable_longterm_temporal_ref &&
1301           layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1302         ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1303         if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1304         if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1305           ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1306       }
1307       break;
1308     default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1309   }
1310 }
1311 
1312 #if CONFIG_AV1_DECODER
1313 // Returns whether there is a mismatch between the encoder's new frame and the
1314 // decoder's new frame.
test_decode(aom_codec_ctx_t * encoder,aom_codec_ctx_t * decoder,const int frames_out)1315 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1316                        const int frames_out) {
1317   aom_image_t enc_img, dec_img;
1318   int mismatch = 0;
1319 
1320   /* Get the internal new frame */
1321   AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
1322   AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
1323 
1324 #if CONFIG_AV1_HIGHBITDEPTH
1325   if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1326       (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1327     if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1328       aom_image_t enc_hbd_img;
1329       aom_img_alloc(
1330           &enc_hbd_img,
1331           static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1332           enc_img.d_w, enc_img.d_h, 16);
1333       aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1334       enc_img = enc_hbd_img;
1335     }
1336     if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1337       aom_image_t dec_hbd_img;
1338       aom_img_alloc(
1339           &dec_hbd_img,
1340           static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1341           dec_img.d_w, dec_img.d_h, 16);
1342       aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1343       dec_img = dec_hbd_img;
1344     }
1345   }
1346 #endif
1347 
1348   if (!aom_compare_img(&enc_img, &dec_img)) {
1349     int y[4], u[4], v[4];
1350 #if CONFIG_AV1_HIGHBITDEPTH
1351     if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1352       aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1353     } else {
1354       aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1355     }
1356 #else
1357     aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1358 #endif
1359     fprintf(stderr,
1360             "Encode/decode mismatch on frame %d at"
1361             " Y[%d, %d] {%d/%d},"
1362             " U[%d, %d] {%d/%d},"
1363             " V[%d, %d] {%d/%d}\n",
1364             frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1365             v[1], v[2], v[3]);
1366     mismatch = 1;
1367   }
1368 
1369   aom_img_free(&enc_img);
1370   aom_img_free(&dec_img);
1371   return mismatch;
1372 }
1373 #endif  // CONFIG_AV1_DECODER
1374 
1375 struct psnr_stats {
1376   // The second element of these arrays is reserved for high bitdepth.
1377   uint64_t psnr_sse_total[2];
1378   uint64_t psnr_samples_total[2];
1379   double psnr_totals[2][4];
1380   int psnr_count[2];
1381 };
1382 
show_psnr(struct psnr_stats * psnr_stream,double peak)1383 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1384   double ovpsnr;
1385 
1386   if (!psnr_stream->psnr_count[0]) return;
1387 
1388   fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1389   ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1390                        (double)psnr_stream->psnr_sse_total[0]);
1391   fprintf(stderr, " %.3f", ovpsnr);
1392 
1393   for (int i = 0; i < 4; i++) {
1394     fprintf(stderr, " %.3f",
1395             psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1396   }
1397   fprintf(stderr, "\n");
1398 }
1399 
create_rtc_rc_config(const aom_codec_enc_cfg_t & cfg,const AppInput & app_input)1400 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1401     const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1402   aom::AV1RateControlRtcConfig rc_cfg;
1403   rc_cfg.width = cfg.g_w;
1404   rc_cfg.height = cfg.g_h;
1405   rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1406   rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1407   rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1408   rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1409   rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1410   rc_cfg.buf_sz = cfg.rc_buf_sz;
1411   rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1412   rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1413   // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1414   rc_cfg.max_intra_bitrate_pct = 300;
1415   rc_cfg.framerate = cfg.g_timebase.den;
1416   // TODO(jianj): Add suppor for SVC.
1417   rc_cfg.ss_number_layers = 1;
1418   rc_cfg.ts_number_layers = 1;
1419   rc_cfg.scaling_factor_num[0] = 1;
1420   rc_cfg.scaling_factor_den[0] = 1;
1421   rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1422   rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1423   rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1424   rc_cfg.aq_mode = app_input.aq_mode;
1425 
1426   return rc_cfg;
1427 }
1428 
qindex_to_quantizer(int qindex)1429 static int qindex_to_quantizer(int qindex) {
1430   // Table that converts 0-63 range Q values passed in outside to the 0-255
1431   // range Qindex used internally.
1432   static const int quantizer_to_qindex[] = {
1433     0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,
1434     52,  56,  60,  64,  68,  72,  76,  80,  84,  88,  92,  96,  100,
1435     104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1436     156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1437     208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1438   };
1439   for (int quantizer = 0; quantizer < 64; ++quantizer)
1440     if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1441 
1442   return 63;
1443 }
1444 
set_active_map(const aom_codec_enc_cfg_t * cfg,aom_codec_ctx_t * codec,int frame_cnt)1445 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1446                            aom_codec_ctx_t *codec, int frame_cnt) {
1447   aom_active_map_t map = { 0, 0, 0 };
1448 
1449   map.rows = (cfg->g_h + 15) / 16;
1450   map.cols = (cfg->g_w + 15) / 16;
1451 
1452   map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1453   if (!map.active_map) die("Failed to allocate active map");
1454 
1455   // Example map for testing.
1456   for (unsigned int i = 0; i < map.rows; ++i) {
1457     for (unsigned int j = 0; j < map.cols; ++j) {
1458       int index = map.cols * i + j;
1459       map.active_map[index] = 1;
1460       if (frame_cnt < 300) {
1461         if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1462       } else if (frame_cnt >= 300) {
1463         if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1464       }
1465     }
1466   }
1467 
1468   if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1469     die_codec(codec, "Failed to set active map");
1470 
1471   free(map.active_map);
1472 }
1473 
main(int argc,const char ** argv)1474 int main(int argc, const char **argv) {
1475   AppInput app_input;
1476   AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1477   FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1478   AvxVideoWriter *total_layer_file = NULL;
1479   FILE *total_layer_obu_file = NULL;
1480   aom_codec_enc_cfg_t cfg;
1481   int frame_cnt = 0;
1482   aom_image_t raw;
1483   int frame_avail;
1484   int got_data = 0;
1485   int flags = 0;
1486   int i;
1487   int pts = 0;             // PTS starts at 0.
1488   int frame_duration = 1;  // 1 timebase tick per frame.
1489   aom_svc_layer_id_t layer_id;
1490   aom_svc_params_t svc_params;
1491   aom_svc_ref_frame_config_t ref_frame_config;
1492   aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1493 
1494 #if CONFIG_INTERNAL_STATS
1495   FILE *stats_file = fopen("opsnr.stt", "a");
1496   if (stats_file == NULL) {
1497     die("Cannot open opsnr.stt\n");
1498   }
1499 #endif
1500 #if CONFIG_AV1_DECODER
1501   aom_codec_ctx_t decoder;
1502 #endif
1503 
1504   struct RateControlMetrics rc;
1505   int64_t cx_time = 0;
1506   int64_t cx_time_layer[AOM_MAX_LAYERS];  // max number of layers.
1507   int frame_cnt_layer[AOM_MAX_LAYERS];
1508   double sum_bitrate = 0.0;
1509   double sum_bitrate2 = 0.0;
1510   double framerate = 30.0;
1511   int use_svc_control = 1;
1512   int set_err_resil_frame = 0;
1513   int test_changing_bitrate = 0;
1514   zero(rc.layer_target_bitrate);
1515   memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1516   memset(&app_input, 0, sizeof(AppInput));
1517   memset(&svc_params, 0, sizeof(svc_params));
1518 
1519   // Flag to test dynamic scaling of source frames for single
1520   // spatial stream, using the scaling_mode control.
1521   const int test_dynamic_scaling_single_layer = 0;
1522 
1523   // Flag to test setting speed per layer.
1524   const int test_speed_per_layer = 0;
1525 
1526   // Flag for testing active maps.
1527   const int test_active_maps = 0;
1528 
1529   /* Setup default input stream settings */
1530   app_input.input_ctx.framerate.numerator = 30;
1531   app_input.input_ctx.framerate.denominator = 1;
1532   app_input.input_ctx.only_i420 = 0;
1533   app_input.input_ctx.bit_depth = AOM_BITS_8;
1534   app_input.speed = 7;
1535   exec_name = argv[0];
1536 
1537   // start with default encoder configuration
1538   aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
1539                                                      AOM_USAGE_REALTIME);
1540   if (res != AOM_CODEC_OK) {
1541     die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1542   }
1543 
1544   // Real time parameters.
1545   cfg.g_usage = AOM_USAGE_REALTIME;
1546 
1547   cfg.rc_end_usage = AOM_CBR;
1548   cfg.rc_min_quantizer = 2;
1549   cfg.rc_max_quantizer = 52;
1550   cfg.rc_undershoot_pct = 50;
1551   cfg.rc_overshoot_pct = 50;
1552   cfg.rc_buf_initial_sz = 600;
1553   cfg.rc_buf_optimal_sz = 600;
1554   cfg.rc_buf_sz = 1000;
1555   cfg.rc_resize_mode = 0;  // Set to RESIZE_DYNAMIC for dynamic resize.
1556   cfg.g_lag_in_frames = 0;
1557   cfg.kf_mode = AOM_KF_AUTO;
1558 
1559   parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1560 
1561   int ts_number_layers = svc_params.number_temporal_layers;
1562   int ss_number_layers = svc_params.number_spatial_layers;
1563 
1564   unsigned int width = cfg.g_w;
1565   unsigned int height = cfg.g_h;
1566 
1567   if (app_input.layering_mode >= 0) {
1568     if (ts_number_layers !=
1569             mode_to_num_temporal_layers[app_input.layering_mode] ||
1570         ss_number_layers !=
1571             mode_to_num_spatial_layers[app_input.layering_mode]) {
1572       die("Number of layers doesn't match layering mode.");
1573     }
1574   }
1575 
1576   // Y4M reader has its own allocation.
1577   if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1578     if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1579       die("Failed to allocate image (%dx%d)", width, height);
1580     }
1581   }
1582 
1583   aom_codec_iface_t *encoder = aom_codec_av1_cx();
1584 
1585   memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1586          sizeof(svc_params.layer_target_bitrate));
1587 
1588   unsigned int total_rate = 0;
1589   for (i = 0; i < ss_number_layers; i++) {
1590     total_rate +=
1591         svc_params
1592             .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1593   }
1594   if (total_rate != cfg.rc_target_bitrate) {
1595     die("Incorrect total target bitrate");
1596   }
1597 
1598   svc_params.framerate_factor[0] = 1;
1599   if (ts_number_layers == 2) {
1600     svc_params.framerate_factor[0] = 2;
1601     svc_params.framerate_factor[1] = 1;
1602   } else if (ts_number_layers == 3) {
1603     svc_params.framerate_factor[0] = 4;
1604     svc_params.framerate_factor[1] = 2;
1605     svc_params.framerate_factor[2] = 1;
1606   }
1607 
1608   if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1609     // Override these settings with the info from Y4M file.
1610     cfg.g_w = app_input.input_ctx.width;
1611     cfg.g_h = app_input.input_ctx.height;
1612     // g_timebase is the reciprocal of frame rate.
1613     cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1614     cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1615   }
1616   framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1617   set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1618 
1619   AvxVideoInfo info;
1620   info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1621   info.frame_width = cfg.g_w;
1622   info.frame_height = cfg.g_h;
1623   info.time_base.numerator = cfg.g_timebase.num;
1624   info.time_base.denominator = cfg.g_timebase.den;
1625   // Open an output file for each stream.
1626   for (int sl = 0; sl < ss_number_layers; ++sl) {
1627     for (int tl = 0; tl < ts_number_layers; ++tl) {
1628       i = sl * ts_number_layers + tl;
1629       char file_name[PATH_MAX];
1630       snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1631                app_input.output_filename, i);
1632       if (app_input.output_obu) {
1633         obu_files[i] = fopen(file_name, "wb");
1634         if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1635       } else {
1636         outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1637         if (!outfile[i]) die("Failed to open %s for writing", file_name);
1638       }
1639     }
1640   }
1641   if (app_input.output_obu) {
1642     total_layer_obu_file = fopen(app_input.output_filename, "wb");
1643     if (!total_layer_obu_file)
1644       die("Failed to open %s for writing", app_input.output_filename);
1645   } else {
1646     total_layer_file =
1647         aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1648     if (!total_layer_file)
1649       die("Failed to open %s for writing", app_input.output_filename);
1650   }
1651 
1652   // Initialize codec.
1653   aom_codec_ctx_t codec;
1654   aom_codec_flags_t flag = 0;
1655   flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH;
1656   flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1657   if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1658     die_codec(&codec, "Failed to initialize encoder");
1659 
1660 #if CONFIG_AV1_DECODER
1661   if (app_input.decode) {
1662     if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1663       die_codec(&decoder, "Failed to initialize decoder");
1664   }
1665 #endif
1666 
1667   aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1668   aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1669   aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
1670   aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1);
1671   aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1);
1672   aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0);
1673   aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0);
1674   aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
1675   aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0);
1676   aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0);
1677   aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0);
1678   aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3);
1679   aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3);
1680   aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3);
1681   aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3);
1682   aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1);
1683 
1684   // Settings to reduce key frame encoding time.
1685   aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0);
1686   aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
1687   aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0);
1688   aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0);
1689   aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
1690 
1691   if (cfg.g_threads > 1) {
1692     aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS,
1693                       (unsigned int)log2(cfg.g_threads));
1694   }
1695 
1696   aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1697   if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1698     aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
1699     aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 1);
1700     // INTRABC is currently disabled for rt mode, as it's too slow.
1701     aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
1702   }
1703 
1704   if (app_input.use_external_rc) {
1705     aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1);
1706   }
1707 
1708   aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_CBR, INT_MAX);
1709 
1710   aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE,
1711                     AOM_FULL_SUPERFRAME_DROP);
1712 
1713   svc_params.number_spatial_layers = ss_number_layers;
1714   svc_params.number_temporal_layers = ts_number_layers;
1715   for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1716     svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1717     svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1718   }
1719   for (i = 0; i < ss_number_layers; ++i) {
1720     svc_params.scaling_factor_num[i] = 1;
1721     svc_params.scaling_factor_den[i] = 1;
1722   }
1723   if (ss_number_layers == 2) {
1724     svc_params.scaling_factor_num[0] = 1;
1725     svc_params.scaling_factor_den[0] = 2;
1726   } else if (ss_number_layers == 3) {
1727     svc_params.scaling_factor_num[0] = 1;
1728     svc_params.scaling_factor_den[0] = 4;
1729     svc_params.scaling_factor_num[1] = 1;
1730     svc_params.scaling_factor_den[1] = 2;
1731   }
1732   aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1733   // TODO(aomedia:3032): Configure KSVC in fixed mode.
1734 
1735   // This controls the maximum target size of the key frame.
1736   // For generating smaller key frames, use a smaller max_intra_size_pct
1737   // value, like 100 or 200.
1738   {
1739     const int max_intra_size_pct = 300;
1740     aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT,
1741                       max_intra_size_pct);
1742   }
1743 
1744   for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1745     cx_time_layer[lx] = 0;
1746     frame_cnt_layer[lx] = 0;
1747   }
1748 
1749   std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1750   if (app_input.use_external_rc) {
1751     const aom::AV1RateControlRtcConfig rc_cfg =
1752         create_rtc_rc_config(cfg, app_input);
1753     rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1754   }
1755 
1756   frame_avail = 1;
1757   struct psnr_stats psnr_stream;
1758   memset(&psnr_stream, 0, sizeof(psnr_stream));
1759   while (frame_avail || got_data) {
1760     struct aom_usec_timer timer;
1761     frame_avail = read_frame(&(app_input.input_ctx), &raw);
1762     // Loop over spatial layers.
1763     for (int slx = 0; slx < ss_number_layers; slx++) {
1764       aom_codec_iter_t iter = NULL;
1765       const aom_codec_cx_pkt_t *pkt;
1766       int layer = 0;
1767       // Flag for superframe whose base is key.
1768       int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1769       // For flexible mode:
1770       if (app_input.layering_mode >= 0) {
1771         // Set the reference/update flags, layer_id, and reference_map
1772         // buffer index.
1773         set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1774                           &ref_frame_config, &ref_frame_comp_pred,
1775                           &use_svc_control, slx, is_key_frame,
1776                           (app_input.layering_mode == 10), app_input.speed);
1777         aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1778         if (use_svc_control) {
1779           aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1780                             &ref_frame_config);
1781           aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1782                             &ref_frame_comp_pred);
1783         }
1784         // Set the speed per layer.
1785         if (test_speed_per_layer) {
1786           int speed_per_layer = 10;
1787           if (layer_id.spatial_layer_id == 0) {
1788             if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1789             if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1790             if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1791           } else if (layer_id.spatial_layer_id == 1) {
1792             if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1793             if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1794             if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1795           } else if (layer_id.spatial_layer_id == 2) {
1796             if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1797             if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1798             if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1799           }
1800           aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1801         }
1802       } else {
1803         // Only up to 3 temporal layers supported in fixed mode.
1804         // Only need to set spatial and temporal layer_id: reference
1805         // prediction, refresh, and buffer_idx are set internally.
1806         layer_id.spatial_layer_id = slx;
1807         layer_id.temporal_layer_id = 0;
1808         if (ts_number_layers == 2) {
1809           layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1810         } else if (ts_number_layers == 3) {
1811           if (frame_cnt % 2 != 0)
1812             layer_id.temporal_layer_id = 2;
1813           else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1814             layer_id.temporal_layer_id = 1;
1815         }
1816         aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1817       }
1818 
1819       if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1820         // Set error_resilient per frame: off/0 for base layer and
1821         // on/1 for enhancement layer frames.
1822         // Note that this is can only be done on the fly/per-frame/layer
1823         // if the config error_resilience is off/0. See the logic for updating
1824         // in set_encoder_config():
1825         // tool_cfg->error_resilient_mode =
1826         //     cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1827         const int err_resil_mode =
1828             layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1829         aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE,
1830                           err_resil_mode);
1831       }
1832 
1833       layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1834       if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1835 
1836       if (test_dynamic_scaling_single_layer) {
1837         // Example to scale source down by 2x2, then 4x4, and then back up to
1838         // 2x2, and then back to original.
1839         int frame_2x2 = 200;
1840         int frame_4x4 = 400;
1841         int frame_2x2up = 600;
1842         int frame_orig = 800;
1843         if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1844           // Scale source down by 2x2.
1845           struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1846           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1847         } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1848           // Scale source down by 4x4.
1849           struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1850           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1851         } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1852           // Source back up to 2x2.
1853           struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1854           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1855         } else if (frame_cnt >= frame_orig) {
1856           // Source back up to original resolution (no scaling).
1857           struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1858           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1859         }
1860         if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1861             frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1862           // For dynamic resize testing on single layer: refresh all references
1863           // on the resized frame: this is to avoid decode error:
1864           // if resize goes down by >= 4x4 then libaom decoder will throw an
1865           // error that some reference (even though not used) is beyond the
1866           // limit size (must be smaller than 4x4).
1867           for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1868           if (use_svc_control) {
1869             aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
1870                               &ref_frame_config);
1871             aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
1872                               &ref_frame_comp_pred);
1873           }
1874         }
1875       }
1876 
1877       // Change target_bitrate every other frame.
1878       if (test_changing_bitrate && frame_cnt % 2 == 0) {
1879         if (frame_cnt < 500)
1880           cfg.rc_target_bitrate += 10;
1881         else
1882           cfg.rc_target_bitrate -= 10;
1883         // Do big increase and decrease.
1884         if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1885         if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1886         if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1887         // Call change_config, or bypass with new control.
1888         // res = aom_codec_enc_config_set(&codec, &cfg);
1889         if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR,
1890                               cfg.rc_target_bitrate))
1891           die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1892       }
1893 
1894       if (rc_api) {
1895         aom::AV1FrameParamsRTC frame_params;
1896         // TODO(jianj): Add support for SVC.
1897         frame_params.spatial_layer_id = 0;
1898         frame_params.temporal_layer_id = 0;
1899         frame_params.frame_type =
1900             is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1901         rc_api->ComputeQP(frame_params);
1902         const int current_qp = rc_api->GetQP();
1903         if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS,
1904                               qindex_to_quantizer(current_qp))) {
1905           die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1906         }
1907       }
1908 
1909       if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
1910 
1911       // Do the layer encode.
1912       aom_usec_timer_start(&timer);
1913       if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1914         die_codec(&codec, "Failed to encode frame");
1915       aom_usec_timer_mark(&timer);
1916       cx_time += aom_usec_timer_elapsed(&timer);
1917       cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1918       frame_cnt_layer[layer] += 1;
1919 
1920       got_data = 0;
1921       // For simulcast (mode 11): write out each spatial layer to the file.
1922       int ss_layers_write = (app_input.layering_mode == 11)
1923                                 ? layer_id.spatial_layer_id + 1
1924                                 : ss_number_layers;
1925       while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1926         switch (pkt->kind) {
1927           case AOM_CODEC_CX_FRAME_PKT:
1928             for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1929                  ++sl) {
1930               for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1931                    ++tl) {
1932                 int j = sl * ts_number_layers + tl;
1933                 if (app_input.output_obu) {
1934                   fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1935                          obu_files[j]);
1936                 } else {
1937                   aom_video_writer_write_frame(
1938                       outfile[j],
1939                       reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1940                       pkt->data.frame.sz, pts);
1941                 }
1942                 if (sl == layer_id.spatial_layer_id)
1943                   rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1944               }
1945             }
1946             got_data = 1;
1947             // Write everything into the top layer.
1948             if (app_input.output_obu) {
1949               fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1950                      total_layer_obu_file);
1951             } else {
1952               aom_video_writer_write_frame(
1953                   total_layer_file,
1954                   reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1955                   pkt->data.frame.sz, pts);
1956             }
1957             // Keep count of rate control stats per layer (for non-key).
1958             if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1959               int j = layer_id.spatial_layer_id * ts_number_layers +
1960                       layer_id.temporal_layer_id;
1961               assert(j >= 0);
1962               rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1963               rc.layer_avg_rate_mismatch[j] +=
1964                   fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1965                   rc.layer_pfb[j];
1966               if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1967             }
1968 
1969             if (rc_api) {
1970               rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1971             }
1972             // Update for short-time encoding bitrate states, for moving window
1973             // of size rc->window, shifted by rc->window / 2.
1974             // Ignore first window segment, due to key frame.
1975             // For spatial layers: only do this for top/highest SL.
1976             if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1977               sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1978               rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1979               if (frame_cnt % rc.window_size == 0) {
1980                 rc.window_count += 1;
1981                 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1982                 rc.variance_st_encoding_bitrate +=
1983                     (sum_bitrate / rc.window_size) *
1984                     (sum_bitrate / rc.window_size);
1985                 sum_bitrate = 0.0;
1986               }
1987             }
1988             // Second shifted window.
1989             if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1990                 slx == ss_number_layers - 1) {
1991               sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1992               if (frame_cnt > 2 * rc.window_size &&
1993                   frame_cnt % rc.window_size == 0) {
1994                 rc.window_count += 1;
1995                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
1996                 rc.variance_st_encoding_bitrate +=
1997                     (sum_bitrate2 / rc.window_size) *
1998                     (sum_bitrate2 / rc.window_size);
1999                 sum_bitrate2 = 0.0;
2000               }
2001             }
2002 
2003 #if CONFIG_AV1_DECODER
2004             if (app_input.decode) {
2005               if (aom_codec_decode(
2006                       &decoder,
2007                       reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2008                       pkt->data.frame.sz, NULL))
2009                 die_codec(&decoder, "Failed to decode frame");
2010             }
2011 #endif
2012 
2013             break;
2014           case AOM_CODEC_PSNR_PKT:
2015             if (app_input.show_psnr) {
2016               psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2017               psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2018               for (int plane = 0; plane < 4; plane++) {
2019                 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2020               }
2021               psnr_stream.psnr_count[0]++;
2022             }
2023             break;
2024           default: break;
2025         }
2026       }
2027 #if CONFIG_AV1_DECODER
2028       if (got_data && app_input.decode) {
2029         // Don't look for mismatch on top spatial and top temporal layers as
2030         // they are non reference frames.
2031         if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2032             !(layer_id.temporal_layer_id > 0 &&
2033               layer_id.temporal_layer_id == ts_number_layers - 1)) {
2034           if (test_decode(&codec, &decoder, frame_cnt)) {
2035 #if CONFIG_INTERNAL_STATS
2036             fprintf(stats_file, "First mismatch occurred in frame %d\n",
2037                     frame_cnt);
2038             fclose(stats_file);
2039 #endif
2040             fatal("Mismatch seen");
2041           }
2042         }
2043       }
2044 #endif
2045     }  // loop over spatial layers
2046     ++frame_cnt;
2047     pts += frame_duration;
2048   }
2049 
2050   close_input_file(&(app_input.input_ctx));
2051   printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2052                                 ts_number_layers);
2053 
2054   printf("\n");
2055   for (int slx = 0; slx < ss_number_layers; slx++)
2056     for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2057       int lx = slx * ts_number_layers + tlx;
2058       printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2059              slx, tlx, frame_cnt_layer[lx],
2060              (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2061              1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2062     }
2063 
2064   printf("\n");
2065   printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2066          frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2067          1000000 * (double)frame_cnt / (double)cx_time);
2068 
2069   if (app_input.show_psnr) {
2070     show_psnr(&psnr_stream, 255.0);
2071   }
2072 
2073   if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2074 
2075 #if CONFIG_AV1_DECODER
2076   if (app_input.decode) {
2077     if (aom_codec_destroy(&decoder))
2078       die_codec(&decoder, "Failed to destroy decoder");
2079   }
2080 #endif
2081 
2082 #if CONFIG_INTERNAL_STATS
2083   fprintf(stats_file, "No mismatch detected in recon buffers\n");
2084   fclose(stats_file);
2085 #endif
2086 
2087   // Try to rewrite the output file headers with the actual frame count.
2088   for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2089     aom_video_writer_close(outfile[i]);
2090   aom_video_writer_close(total_layer_file);
2091 
2092   if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2093     aom_img_free(&raw);
2094   }
2095   return EXIT_SUCCESS;
2096 }
2097