• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 //  This is an example demonstrating how to implement a multi-layer AOM
13 //  encoding scheme for RTC video applications.
14 
15 #include <assert.h>
16 #include <inttypes.h>
17 #include <limits.h>
18 #include <math.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include <memory>
24 
25 #include "config/aom_config.h"
26 
27 #if CONFIG_AV1_DECODER
28 #include "aom/aom_decoder.h"
29 #endif
30 #include "aom/aom_encoder.h"
31 #include "aom/aom_image.h"
32 #include "aom/aom_integer.h"
33 #include "aom/aomcx.h"
34 #include "aom_dsp/bitwriter_buffer.h"
35 #include "aom_ports/aom_timer.h"
36 #include "av1/ratectrl_rtc.h"
37 #include "common/args.h"
38 #include "common/tools_common.h"
39 #include "common/video_writer.h"
40 #include "examples/encoder_util.h"
41 #include "examples/multilayer_metadata.h"
42 
43 #define OPTION_BUFFER_SIZE 1024
44 #define MAX_NUM_SPATIAL_LAYERS 4
45 
46 typedef struct {
47   const char *output_filename;
48   char options[OPTION_BUFFER_SIZE];
49   struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
50   int speed;
51   int aq_mode;
52   int layering_mode;
53   int output_obu;
54   int decode;
55   int tune_content;
56   int show_psnr;
57   bool use_external_rc;
58   bool scale_factors_explicitly_set;
59   const char *multilayer_metadata_file;
60 } AppInput;
61 
62 typedef enum {
63   QUANTIZER = 0,
64   BITRATE,
65   SCALE_FACTOR,
66   AUTO_ALT_REF,
67   ALL_OPTION_TYPES
68 } LAYER_OPTION_TYPE;
69 
70 static const arg_def_t outputfile =
71     ARG_DEF("o", "output", 1, "Output filename");
72 static const arg_def_t frames_arg =
73     ARG_DEF("f", "frames", 1, "Number of frames to encode");
74 static const arg_def_t threads_arg =
75     ARG_DEF("th", "threads", 1, "Number of threads to use");
76 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
77 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
78 static const arg_def_t timebase_arg =
79     ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
80 static const arg_def_t bitrate_arg = ARG_DEF(
81     "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
82 static const arg_def_t spatial_layers_arg =
83     ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
84 static const arg_def_t temporal_layers_arg =
85     ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
86 static const arg_def_t layering_mode_arg =
87     ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
88 static const arg_def_t kf_dist_arg =
89     ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
90 static const arg_def_t scale_factors_arg =
91     ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
92 static const arg_def_t min_q_arg =
93     ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
94 static const arg_def_t max_q_arg =
95     ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
96 static const arg_def_t speed_arg =
97     ARG_DEF("sp", "speed", 1, "Speed configuration");
98 static const arg_def_t aqmode_arg =
99     ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
100 static const arg_def_t bitrates_arg =
101     ARG_DEF("bl", "bitrates", 1,
102             "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
103 static const arg_def_t dropframe_thresh_arg =
104     ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
105 static const arg_def_t error_resilient_arg =
106     ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
107 static const arg_def_t output_obu_arg =
108     ARG_DEF(NULL, "output-obu", 1,
109             "Write OBUs when set to 1. Otherwise write IVF files.");
110 static const arg_def_t test_decode_arg =
111     ARG_DEF(NULL, "test-decode", 1,
112             "Attempt to test decoding the output when set to 1. Default is 1.");
113 static const arg_def_t psnr_arg =
114     ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
115 static const arg_def_t ext_rc_arg =
116     ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
117 static const struct arg_enum_list tune_content_enum[] = {
118   { "default", AOM_CONTENT_DEFAULT },
119   { "screen", AOM_CONTENT_SCREEN },
120   { "film", AOM_CONTENT_FILM },
121   { NULL, 0 }
122 };
123 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
124     NULL, "tune-content", 1, "Tune content type", tune_content_enum);
125 #if CONFIG_CWG_E050
126 static const arg_def_t multilayer_metadata_file_arg =
127     ARG_DEF("ml", "multilayer_metadata_file", 1,
128             "Experimental: path to multilayer metadata file");
129 #endif
130 
131 #if CONFIG_AV1_HIGHBITDEPTH
132 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
133                                                       { "10", AOM_BITS_10 },
134                                                       { NULL, 0 } };
135 
136 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
137     "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
138 #endif  // CONFIG_AV1_HIGHBITDEPTH
139 
140 static const arg_def_t *svc_args[] = {
141   &frames_arg,
142   &outputfile,
143   &width_arg,
144   &height_arg,
145   &timebase_arg,
146   &bitrate_arg,
147   &spatial_layers_arg,
148   &kf_dist_arg,
149   &scale_factors_arg,
150   &min_q_arg,
151   &max_q_arg,
152   &temporal_layers_arg,
153   &layering_mode_arg,
154   &threads_arg,
155   &aqmode_arg,
156 #if CONFIG_AV1_HIGHBITDEPTH
157   &bitdepth_arg,
158 #endif
159   &speed_arg,
160   &bitrates_arg,
161   &dropframe_thresh_arg,
162   &error_resilient_arg,
163   &output_obu_arg,
164   &test_decode_arg,
165   &tune_content_arg,
166   &psnr_arg,
167 #if CONFIG_CWG_E050
168   &multilayer_metadata_file_arg,
169 #endif
170   NULL,
171 };
172 
173 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
174 
175 static const char *exec_name;
176 
usage_exit(void)177 void usage_exit(void) {
178   fprintf(stderr,
179           "Usage: %s <options> input_filename [input_filename ...] -o "
180           "output_filename\n",
181           exec_name);
182   fprintf(stderr, "Options:\n");
183   arg_show_usage(stderr, svc_args);
184   fprintf(
185       stderr,
186       "Input files must be y4m or yuv.\n"
187       "If multiple input files are specified, they correspond to spatial "
188       "layers, and there should be as many as there are spatial layers.\n"
189       "All input files must have the same width, height, frame rate and number "
190       "of frames.\n"
191       "If only one file is specified, it is used for all spatial layers.\n");
192   exit(EXIT_FAILURE);
193 }
194 
file_is_y4m(const char detect[4])195 static int file_is_y4m(const char detect[4]) {
196   return memcmp(detect, "YUV4", 4) == 0;
197 }
198 
fourcc_is_ivf(const char detect[4])199 static int fourcc_is_ivf(const char detect[4]) {
200   if (memcmp(detect, "DKIF", 4) == 0) {
201     return 1;
202   }
203   return 0;
204 }
205 
206 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
207                                                          1 };
208 
209 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
210 
open_input_file(struct AvxInputContext * input,aom_chroma_sample_position_t csp)211 static void open_input_file(struct AvxInputContext *input,
212                             aom_chroma_sample_position_t csp) {
213   /* Parse certain options from the input file, if possible */
214   input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
215                                              : set_binary_mode(stdin);
216 
217   if (!input->file) fatal("Failed to open input file");
218 
219   if (!fseeko(input->file, 0, SEEK_END)) {
220     /* Input file is seekable. Figure out how long it is, so we can get
221      * progress info.
222      */
223     input->length = ftello(input->file);
224     rewind(input->file);
225   }
226 
227   /* Default to 1:1 pixel aspect ratio. */
228   input->pixel_aspect_ratio.numerator = 1;
229   input->pixel_aspect_ratio.denominator = 1;
230 
231   /* For RAW input sources, these bytes will applied on the first frame
232    *  in read_frame().
233    */
234   input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
235   input->detect.position = 0;
236 
237   if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
238     if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
239                        input->only_i420) >= 0) {
240       input->file_type = FILE_TYPE_Y4M;
241       input->width = input->y4m.pic_w;
242       input->height = input->y4m.pic_h;
243       input->pixel_aspect_ratio.numerator = input->y4m.par_n;
244       input->pixel_aspect_ratio.denominator = input->y4m.par_d;
245       input->framerate.numerator = input->y4m.fps_n;
246       input->framerate.denominator = input->y4m.fps_d;
247       input->fmt = input->y4m.aom_fmt;
248       input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
249     } else {
250       fatal("Unsupported Y4M stream.");
251     }
252   } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
253     fatal("IVF is not supported as input.");
254   } else {
255     input->file_type = FILE_TYPE_RAW;
256   }
257 }
258 
extract_option(LAYER_OPTION_TYPE type,char * input,int * value0,int * value1)259 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
260                                       int *value0, int *value1) {
261   if (type == SCALE_FACTOR) {
262     *value0 = (int)strtol(input, &input, 10);
263     if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
264     *value1 = (int)strtol(input, &input, 10);
265 
266     if (*value0 < option_min_values[SCALE_FACTOR] ||
267         *value1 < option_min_values[SCALE_FACTOR] ||
268         *value0 > option_max_values[SCALE_FACTOR] ||
269         *value1 > option_max_values[SCALE_FACTOR] ||
270         *value0 > *value1)  // num shouldn't be greater than den
271       return AOM_CODEC_INVALID_PARAM;
272   } else {
273     *value0 = atoi(input);
274     if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
275       return AOM_CODEC_INVALID_PARAM;
276   }
277   return AOM_CODEC_OK;
278 }
279 
parse_layer_options_from_string(aom_svc_params_t * svc_params,LAYER_OPTION_TYPE type,const char * input,int * option0,int * option1)280 static aom_codec_err_t parse_layer_options_from_string(
281     aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
282     int *option0, int *option1) {
283   aom_codec_err_t res = AOM_CODEC_OK;
284   char *input_string;
285   char *token;
286   const char *delim = ",";
287   int num_layers = svc_params->number_spatial_layers;
288   int i = 0;
289 
290   if (type == BITRATE)
291     num_layers =
292         svc_params->number_spatial_layers * svc_params->number_temporal_layers;
293 
294   if (input == NULL || option0 == NULL ||
295       (option1 == NULL && type == SCALE_FACTOR))
296     return AOM_CODEC_INVALID_PARAM;
297 
298   const size_t input_length = strlen(input);
299   input_string = reinterpret_cast<char *>(malloc(input_length + 1));
300   if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
301   memcpy(input_string, input, input_length + 1);
302   token = strtok(input_string, delim);  // NOLINT
303   for (i = 0; i < num_layers; ++i) {
304     if (token != NULL) {
305       res = extract_option(type, token, option0 + i, option1 + i);
306       if (res != AOM_CODEC_OK) break;
307       token = strtok(NULL, delim);  // NOLINT
308     } else {
309       res = AOM_CODEC_INVALID_PARAM;
310       break;
311     }
312   }
313   free(input_string);
314   return res;
315 }
316 
parse_command_line(int argc,const char ** argv_,AppInput * app_input,aom_svc_params_t * svc_params,aom_codec_enc_cfg_t * enc_cfg)317 static void parse_command_line(int argc, const char **argv_,
318                                AppInput *app_input,
319                                aom_svc_params_t *svc_params,
320                                aom_codec_enc_cfg_t *enc_cfg) {
321   struct arg arg;
322   char **argv = NULL;
323   char **argi = NULL;
324   char **argj = NULL;
325   char string_options[1024] = { 0 };
326 
327   // Default settings
328   svc_params->number_spatial_layers = 1;
329   svc_params->number_temporal_layers = 1;
330   app_input->layering_mode = 0;
331   app_input->output_obu = 0;
332   app_input->decode = 1;
333   enc_cfg->g_threads = 1;
334   enc_cfg->rc_end_usage = AOM_CBR;
335 
336   // process command line options
337   argv = argv_dup(argc - 1, argv_ + 1);
338   if (!argv) {
339     fprintf(stderr, "Error allocating argument list\n");
340     exit(EXIT_FAILURE);
341   }
342   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
343     arg.argv_step = 1;
344 
345     if (arg_match(&arg, &outputfile, argi)) {
346       app_input->output_filename = arg.val;
347     } else if (arg_match(&arg, &width_arg, argi)) {
348       enc_cfg->g_w = arg_parse_uint(&arg);
349     } else if (arg_match(&arg, &height_arg, argi)) {
350       enc_cfg->g_h = arg_parse_uint(&arg);
351     } else if (arg_match(&arg, &timebase_arg, argi)) {
352       enc_cfg->g_timebase = arg_parse_rational(&arg);
353     } else if (arg_match(&arg, &bitrate_arg, argi)) {
354       enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
355     } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
356       svc_params->number_spatial_layers = arg_parse_uint(&arg);
357     } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
358       svc_params->number_temporal_layers = arg_parse_uint(&arg);
359     } else if (arg_match(&arg, &speed_arg, argi)) {
360       app_input->speed = arg_parse_uint(&arg);
361       if (app_input->speed > 11) {
362         aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
363       }
364     } else if (arg_match(&arg, &aqmode_arg, argi)) {
365       app_input->aq_mode = arg_parse_uint(&arg);
366     } else if (arg_match(&arg, &threads_arg, argi)) {
367       enc_cfg->g_threads = arg_parse_uint(&arg);
368     } else if (arg_match(&arg, &layering_mode_arg, argi)) {
369       app_input->layering_mode = arg_parse_int(&arg);
370     } else if (arg_match(&arg, &kf_dist_arg, argi)) {
371       enc_cfg->kf_min_dist = arg_parse_uint(&arg);
372       enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
373     } else if (arg_match(&arg, &scale_factors_arg, argi)) {
374       aom_codec_err_t res = parse_layer_options_from_string(
375           svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
376           svc_params->scaling_factor_den);
377       app_input->scale_factors_explicitly_set = true;
378       if (res != AOM_CODEC_OK) {
379         die("Failed to parse scale factors: %s\n",
380             aom_codec_err_to_string(res));
381       }
382     } else if (arg_match(&arg, &min_q_arg, argi)) {
383       enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
384     } else if (arg_match(&arg, &max_q_arg, argi)) {
385       enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
386 #if CONFIG_AV1_HIGHBITDEPTH
387     } else if (arg_match(&arg, &bitdepth_arg, argi)) {
388       enc_cfg->g_bit_depth =
389           static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
390       switch (enc_cfg->g_bit_depth) {
391         case AOM_BITS_8:
392           enc_cfg->g_input_bit_depth = 8;
393           enc_cfg->g_profile = 0;
394           break;
395         case AOM_BITS_10:
396           enc_cfg->g_input_bit_depth = 10;
397           enc_cfg->g_profile = 0;
398           break;
399         default:
400           die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
401       }
402 #endif  // CONFIG_VP9_HIGHBITDEPTH
403     } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
404       enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
405     } else if (arg_match(&arg, &error_resilient_arg, argi)) {
406       enc_cfg->g_error_resilient = arg_parse_uint(&arg);
407       if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
408         die("Invalid value for error resilient (0, 1): %d.",
409             enc_cfg->g_error_resilient);
410     } else if (arg_match(&arg, &output_obu_arg, argi)) {
411       app_input->output_obu = arg_parse_uint(&arg);
412       if (app_input->output_obu != 0 && app_input->output_obu != 1)
413         die("Invalid value for obu output flag (0, 1): %d.",
414             app_input->output_obu);
415     } else if (arg_match(&arg, &test_decode_arg, argi)) {
416       app_input->decode = arg_parse_uint(&arg);
417       if (app_input->decode != 0 && app_input->decode != 1)
418         die("Invalid value for test decode flag (0, 1): %d.",
419             app_input->decode);
420     } else if (arg_match(&arg, &tune_content_arg, argi)) {
421       app_input->tune_content = arg_parse_enum_or_int(&arg);
422       printf("tune content %d\n", app_input->tune_content);
423     } else if (arg_match(&arg, &psnr_arg, argi)) {
424       app_input->show_psnr = 1;
425     } else if (arg_match(&arg, &ext_rc_arg, argi)) {
426       app_input->use_external_rc = true;
427 #if CONFIG_CWG_E050
428     } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
429       app_input->multilayer_metadata_file = arg.val;
430 #endif
431     } else {
432       ++argj;
433     }
434   }
435 
436   // Total bitrate needs to be parsed after the number of layers.
437   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
438     arg.argv_step = 1;
439     if (arg_match(&arg, &bitrates_arg, argi)) {
440       aom_codec_err_t res = parse_layer_options_from_string(
441           svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
442       if (res != AOM_CODEC_OK) {
443         die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
444       }
445     } else {
446       ++argj;
447     }
448   }
449 
450   // There will be a space in front of the string options
451   if (strlen(string_options) > 0)
452     strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
453 
454   // Check for unrecognized options
455   for (argi = argv; *argi; ++argi)
456     if (argi[0][0] == '-' && strlen(argi[0]) > 1)
457       die("Error: Unrecognized option %s\n", *argi);
458 
459   if (argv[0] == NULL) {
460     usage_exit();
461   }
462 
463   int input_count = 0;
464   while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
465     app_input->input_ctx[input_count].filename = argv[input_count];
466     ++input_count;
467   }
468   if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
469     die("Error: Number of input files does not match number of spatial layers");
470   }
471   if (argv[input_count] != NULL) {
472     die("Error: Too many input files specified, there should be at most %d",
473         MAX_NUM_SPATIAL_LAYERS);
474   }
475 
476   free(argv);
477 
478   for (int i = 0; i < input_count; ++i) {
479     open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
480     if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
481       if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
482         // Override these settings with the info from Y4M file.
483         enc_cfg->g_w = app_input->input_ctx[i].width;
484         enc_cfg->g_h = app_input->input_ctx[i].height;
485         // g_timebase is the reciprocal of frame rate.
486         enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
487         enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
488       } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
489                  enc_cfg->g_h != app_input->input_ctx[i].height ||
490                  enc_cfg->g_timebase.num !=
491                      app_input->input_ctx[i].framerate.denominator ||
492                  enc_cfg->g_timebase.den !=
493                      app_input->input_ctx[i].framerate.numerator) {
494         die("Error: Input file dimensions and/or frame rate mismatch");
495       }
496     }
497   }
498   if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
499     die("Error: Input file dimensions not set, use -w and -h");
500   }
501 
502   if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
503       enc_cfg->g_h % 2)
504     die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
505 
506   printf(
507       "Codec %s\n"
508       "layers: %d\n"
509       "width %u, height: %u\n"
510       "num: %d, den: %d, bitrate: %u\n"
511       "gop size: %u\n",
512       aom_codec_iface_name(aom_codec_av1_cx()),
513       svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
514       enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
515       enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
516 }
517 
518 static const int mode_to_num_temporal_layers[12] = {
519   1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
520 };
521 static const int mode_to_num_spatial_layers[12] = {
522   1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
523 };
524 
525 // For rate control encoding stats.
526 struct RateControlMetrics {
527   // Number of input frames per layer.
528   int layer_input_frames[AOM_MAX_TS_LAYERS];
529   // Number of encoded non-key frames per layer.
530   int layer_enc_frames[AOM_MAX_TS_LAYERS];
531   // Framerate per layer layer (cumulative).
532   double layer_framerate[AOM_MAX_TS_LAYERS];
533   // Target average frame size per layer (per-frame-bandwidth per layer).
534   double layer_pfb[AOM_MAX_LAYERS];
535   // Actual average frame size per layer.
536   double layer_avg_frame_size[AOM_MAX_LAYERS];
537   // Average rate mismatch per layer (|target - actual| / target).
538   double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
539   // Actual encoding bitrate per layer (cumulative across temporal layers).
540   double layer_encoding_bitrate[AOM_MAX_LAYERS];
541   // Average of the short-time encoder actual bitrate.
542   // TODO(marpan): Should we add these short-time stats for each layer?
543   double avg_st_encoding_bitrate;
544   // Variance of the short-time encoder actual bitrate.
545   double variance_st_encoding_bitrate;
546   // Window (number of frames) for computing short-timee encoding bitrate.
547   int window_size;
548   // Number of window measurements.
549   int window_count;
550   int layer_target_bitrate[AOM_MAX_LAYERS];
551 };
552 
553 static const int REF_FRAMES = 8;
554 
555 static const int INTER_REFS_PER_FRAME = 7;
556 
557 // Reference frames used in this example encoder.
558 enum {
559   SVC_LAST_FRAME = 0,
560   SVC_LAST2_FRAME,
561   SVC_LAST3_FRAME,
562   SVC_GOLDEN_FRAME,
563   SVC_BWDREF_FRAME,
564   SVC_ALTREF2_FRAME,
565   SVC_ALTREF_FRAME
566 };
567 
read_frame(struct AvxInputContext * input_ctx,aom_image_t * img)568 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
569   FILE *f = input_ctx->file;
570   y4m_input *y4m = &input_ctx->y4m;
571   int shortread = 0;
572 
573   if (input_ctx->file_type == FILE_TYPE_Y4M) {
574     if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
575   } else {
576     shortread = read_yuv_frame(input_ctx, img);
577   }
578 
579   return !shortread;
580 }
581 
close_input_file(struct AvxInputContext * input)582 static void close_input_file(struct AvxInputContext *input) {
583   fclose(input->file);
584   if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
585 }
586 
587 // Note: these rate control metrics assume only 1 key frame in the
588 // sequence (i.e., first frame only). So for temporal pattern# 7
589 // (which has key frame for every frame on base layer), the metrics
590 // computation will be off/wrong.
591 // TODO(marpan): Update these metrics to account for multiple key frames
592 // in the stream.
set_rate_control_metrics(struct RateControlMetrics * rc,double framerate,int ss_number_layers,int ts_number_layers)593 static void set_rate_control_metrics(struct RateControlMetrics *rc,
594                                      double framerate, int ss_number_layers,
595                                      int ts_number_layers) {
596   int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
597   ts_rate_decimator[0] = 1;
598   if (ts_number_layers == 2) {
599     ts_rate_decimator[0] = 2;
600     ts_rate_decimator[1] = 1;
601   }
602   if (ts_number_layers == 3) {
603     ts_rate_decimator[0] = 4;
604     ts_rate_decimator[1] = 2;
605     ts_rate_decimator[2] = 1;
606   }
607   // Set the layer (cumulative) framerate and the target layer (non-cumulative)
608   // per-frame-bandwidth, for the rate control encoding stats below.
609   for (int sl = 0; sl < ss_number_layers; ++sl) {
610     int i = sl * ts_number_layers;
611     rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
612     rc->layer_pfb[i] =
613         1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
614     for (int tl = 0; tl < ts_number_layers; ++tl) {
615       i = sl * ts_number_layers + tl;
616       if (tl > 0) {
617         rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
618         rc->layer_pfb[i] =
619             1000.0 *
620             (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
621             (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
622       }
623       rc->layer_input_frames[tl] = 0;
624       rc->layer_enc_frames[tl] = 0;
625       rc->layer_encoding_bitrate[i] = 0.0;
626       rc->layer_avg_frame_size[i] = 0.0;
627       rc->layer_avg_rate_mismatch[i] = 0.0;
628     }
629   }
630   rc->window_count = 0;
631   rc->window_size = 15;
632   rc->avg_st_encoding_bitrate = 0.0;
633   rc->variance_st_encoding_bitrate = 0.0;
634 }
635 
printout_rate_control_summary(struct RateControlMetrics * rc,int frame_cnt,int ss_number_layers,int ts_number_layers)636 static void printout_rate_control_summary(struct RateControlMetrics *rc,
637                                           int frame_cnt, int ss_number_layers,
638                                           int ts_number_layers) {
639   int tot_num_frames = 0;
640   double perc_fluctuation = 0.0;
641   printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
642   printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
643   for (int sl = 0; sl < ss_number_layers; ++sl) {
644     tot_num_frames = 0;
645     for (int tl = 0; tl < ts_number_layers; ++tl) {
646       int i = sl * ts_number_layers + tl;
647       const int num_dropped =
648           tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
649                  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
650       tot_num_frames += rc->layer_input_frames[tl];
651       rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
652                                       rc->layer_encoding_bitrate[i] /
653                                       tot_num_frames;
654       rc->layer_avg_frame_size[i] =
655           rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
656       rc->layer_avg_rate_mismatch[i] =
657           100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
658       printf("For layer#: %d %d \n", sl, tl);
659       printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
660              rc->layer_encoding_bitrate[i]);
661       printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
662              rc->layer_avg_frame_size[i]);
663       printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
664       printf(
665           "Number of input frames, encoded (non-key) frames, "
666           "and perc dropped frames: %d %d %f\n",
667           rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
668           100.0 * num_dropped / rc->layer_input_frames[tl]);
669       printf("\n");
670     }
671   }
672   rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
673   rc->variance_st_encoding_bitrate =
674       rc->variance_st_encoding_bitrate / rc->window_count -
675       (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
676   perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
677                      rc->avg_st_encoding_bitrate;
678   printf("Short-time stats, for window of %d frames:\n", rc->window_size);
679   printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
680          rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
681          perc_fluctuation);
682   if (frame_cnt - 1 != tot_num_frames)
683     die("Error: Number of input frames not equal to output!\n");
684 }
685 
686 // Layer pattern configuration.
set_layer_pattern(int layering_mode,int superframe_cnt,aom_svc_layer_id_t * layer_id,aom_svc_ref_frame_config_t * ref_frame_config,aom_svc_ref_frame_comp_pred_t * ref_frame_comp_pred,int * use_svc_control,int spatial_layer_id,int is_key_frame,int ksvc_mode,int speed)687 static void set_layer_pattern(
688     int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
689     aom_svc_ref_frame_config_t *ref_frame_config,
690     aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
691     int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
692   // Setting this flag to 1 enables simplex example of
693   // RPS (Reference Picture Selection) for 1 layer.
694   int use_rps_example = 0;
695   int i;
696   int enable_longterm_temporal_ref = 1;
697   int shift = (layering_mode == 8) ? 2 : 0;
698   int simulcast_mode = (layering_mode == 11);
699   *use_svc_control = 1;
700   layer_id->spatial_layer_id = spatial_layer_id;
701   int lag_index = 0;
702   int base_count = superframe_cnt >> 2;
703   ref_frame_comp_pred->use_comp_pred[0] = 0;  // GOLDEN_LAST
704   ref_frame_comp_pred->use_comp_pred[1] = 0;  // LAST2_LAST
705   ref_frame_comp_pred->use_comp_pred[2] = 0;  // ALTREF_LAST
706   // Set the reference map buffer idx for the 7 references:
707   // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
708   // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
709   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
710   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
711   for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
712 
713   if (ksvc_mode) {
714     // Same pattern as case 9, but the reference strucutre will be constrained
715     // below.
716     layering_mode = 9;
717   }
718   switch (layering_mode) {
719     case 0:
720       if (use_rps_example == 0) {
721         // 1-layer: update LAST on every frame, reference LAST.
722         layer_id->temporal_layer_id = 0;
723         layer_id->spatial_layer_id = 0;
724         ref_frame_config->refresh[0] = 1;
725         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
726       } else {
727         // Pattern of 2 references (ALTREF and GOLDEN) trailing
728         // LAST by 4 and 8 frames, with some switching logic to
729         // sometimes only predict from the longer-term reference
730         //(golden here). This is simple example to test RPS
731         // (reference picture selection).
732         int last_idx = 0;
733         int last_idx_refresh = 0;
734         int gld_idx = 0;
735         int alt_ref_idx = 0;
736         int lag_alt = 4;
737         int lag_gld = 8;
738         layer_id->temporal_layer_id = 0;
739         layer_id->spatial_layer_id = 0;
740         int sh = 8;  // slots 0 - 7.
741         // Moving index slot for last: 0 - (sh - 1)
742         if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
743         // Moving index for refresh of last: one ahead for next frame.
744         last_idx_refresh = superframe_cnt % sh;
745         // Moving index for gld_ref, lag behind current by lag_gld
746         if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
747         // Moving index for alt_ref, lag behind LAST by lag_alt frames.
748         if (superframe_cnt > lag_alt)
749           alt_ref_idx = (superframe_cnt - lag_alt) % sh;
750         // Set the ref_idx.
751         // Default all references to slot for last.
752         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
753           ref_frame_config->ref_idx[i] = last_idx;
754         // Set the ref_idx for the relevant references.
755         ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
756         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
757         ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
758         ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
759         // Refresh this slot, which will become LAST on next frame.
760         ref_frame_config->refresh[last_idx_refresh] = 1;
761         // Reference LAST, ALTREF, and GOLDEN
762         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
763         ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
764         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
765         // Switch to only GOLDEN every 300 frames.
766         if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
767           ref_frame_config->reference[SVC_LAST_FRAME] = 0;
768           ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
769           ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
770           // Test if the long-term is LAST instead, this is just a renaming
771           // but its tests if encoder behaves the same, whether its
772           // LAST or GOLDEN.
773           if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
774             ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
775             ref_frame_config->reference[SVC_LAST_FRAME] = 1;
776             ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
777             ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
778           }
779         }
780       }
781       break;
782     case 1:
783       // 2-temporal layer.
784       //    1    3    5
785       //  0    2    4
786       // Keep golden fixed at slot 3.
787       base_count = superframe_cnt >> 1;
788       ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
789       // Cyclically refresh slots 5, 6, 7, for lag alt ref.
790       lag_index = 5;
791       if (base_count > 0) {
792         lag_index = 5 + (base_count % 3);
793         if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
794       }
795       // Set the altref slot to lag_index.
796       ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
797       if (superframe_cnt % 2 == 0) {
798         layer_id->temporal_layer_id = 0;
799         // Update LAST on layer 0, reference LAST.
800         ref_frame_config->refresh[0] = 1;
801         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
802         // Refresh lag_index slot, needed for lagging golen.
803         ref_frame_config->refresh[lag_index] = 1;
804         // Refresh GOLDEN every x base layer frames.
805         if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
806       } else {
807         layer_id->temporal_layer_id = 1;
808         // No updates on layer 1, reference LAST (TL0).
809         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
810       }
811       // Always reference golden and altref on TL0.
812       if (layer_id->temporal_layer_id == 0) {
813         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
814         ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
815       }
816       break;
817     case 2:
818       // 3-temporal layer:
819       //   1    3   5    7
820       //     2        6
821       // 0        4        8
822       if (superframe_cnt % 4 == 0) {
823         // Base layer.
824         layer_id->temporal_layer_id = 0;
825         // Update LAST on layer 0, reference LAST.
826         ref_frame_config->refresh[0] = 1;
827         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
828       } else if ((superframe_cnt - 1) % 4 == 0) {
829         layer_id->temporal_layer_id = 2;
830         // First top layer: no updates, only reference LAST (TL0).
831         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
832       } else if ((superframe_cnt - 2) % 4 == 0) {
833         layer_id->temporal_layer_id = 1;
834         // Middle layer (TL1): update LAST2, only reference LAST (TL0).
835         ref_frame_config->refresh[1] = 1;
836         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
837       } else if ((superframe_cnt - 3) % 4 == 0) {
838         layer_id->temporal_layer_id = 2;
839         // Second top layer: no updates, only reference LAST.
840         // Set buffer idx for LAST to slot 1, since that was the slot
841         // updated in previous frame. So LAST is TL1 frame.
842         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
843         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
844         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845       }
846       break;
847     case 3:
848       // 3 TL, same as above, except allow for predicting
849       // off 2 more references (GOLDEN and ALTREF), with
850       // GOLDEN updated periodically, and ALTREF lagging from
851       // LAST from ~4 frames. Both GOLDEN and ALTREF
852       // can only be updated on base temporal layer.
853 
854       // Keep golden fixed at slot 3.
855       ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
856       // Cyclically refresh slots 5, 6, 7, for lag altref.
857       lag_index = 5;
858       if (base_count > 0) {
859         lag_index = 5 + (base_count % 3);
860         if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
861       }
862       // Set the altref slot to lag_index.
863       ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
864       if (superframe_cnt % 4 == 0) {
865         // Base layer.
866         layer_id->temporal_layer_id = 0;
867         // Update LAST on layer 0, reference LAST.
868         ref_frame_config->refresh[0] = 1;
869         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
870         // Refresh GOLDEN every x ~10 base layer frames.
871         if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
872         // Refresh lag_index slot, needed for lagging altref.
873         ref_frame_config->refresh[lag_index] = 1;
874       } else if ((superframe_cnt - 1) % 4 == 0) {
875         layer_id->temporal_layer_id = 2;
876         // First top layer: no updates, only reference LAST (TL0).
877         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
878       } else if ((superframe_cnt - 2) % 4 == 0) {
879         layer_id->temporal_layer_id = 1;
880         // Middle layer (TL1): update LAST2, only reference LAST (TL0).
881         ref_frame_config->refresh[1] = 1;
882         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
883       } else if ((superframe_cnt - 3) % 4 == 0) {
884         layer_id->temporal_layer_id = 2;
885         // Second top layer: no updates, only reference LAST.
886         // Set buffer idx for LAST to slot 1, since that was the slot
887         // updated in previous frame. So LAST is TL1 frame.
888         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
889         ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
890         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
891       }
892       // Every frame can reference GOLDEN AND ALTREF.
893       ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
894       ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
895       // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
896       if (speed >= 7) {
897         ref_frame_comp_pred->use_comp_pred[2] = 1;
898         ref_frame_comp_pred->use_comp_pred[0] = 1;
899       }
900       break;
901     case 4:
902       // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
903       // only reference GF (not LAST). Other frames only reference LAST.
904       //   1    3   5    7
905       //     2        6
906       // 0        4        8
907       if (superframe_cnt % 4 == 0) {
908         // Base layer.
909         layer_id->temporal_layer_id = 0;
910         // Update LAST on layer 0, only reference LAST.
911         ref_frame_config->refresh[0] = 1;
912         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
913       } else if ((superframe_cnt - 1) % 4 == 0) {
914         layer_id->temporal_layer_id = 2;
915         // First top layer: no updates, only reference LAST (TL0).
916         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
917       } else if ((superframe_cnt - 2) % 4 == 0) {
918         layer_id->temporal_layer_id = 1;
919         // Middle layer (TL1): update GF, only reference LAST (TL0).
920         ref_frame_config->refresh[3] = 1;
921         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
922       } else if ((superframe_cnt - 3) % 4 == 0) {
923         layer_id->temporal_layer_id = 2;
924         // Second top layer: no updates, only reference GF.
925         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
926       }
927       break;
928     case 5:
929       // 2 spatial layers, 1 temporal.
930       layer_id->temporal_layer_id = 0;
931       if (layer_id->spatial_layer_id == 0) {
932         // Reference LAST, update LAST.
933         ref_frame_config->refresh[0] = 1;
934         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
935       } else if (layer_id->spatial_layer_id == 1) {
936         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
937         // and GOLDEN to slot 0. Update slot 1 (LAST).
938         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
939         ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
940         ref_frame_config->refresh[1] = 1;
941         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
942         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
943       }
944       break;
945     case 6:
946       // 3 spatial layers, 1 temporal.
947       // Note for this case, we set the buffer idx for all references to be
948       // either LAST or GOLDEN, which are always valid references, since decoder
949       // will check if any of the 7 references is valid scale in
950       // valid_ref_frame_size().
951       layer_id->temporal_layer_id = 0;
952       if (layer_id->spatial_layer_id == 0) {
953         // Reference LAST, update LAST. Set all buffer_idx to 0.
954         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
955           ref_frame_config->ref_idx[i] = 0;
956         ref_frame_config->refresh[0] = 1;
957         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
958       } else if (layer_id->spatial_layer_id == 1) {
959         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
960         // and GOLDEN (and all other refs) to slot 0.
961         // Update slot 1 (LAST).
962         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
963           ref_frame_config->ref_idx[i] = 0;
964         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
965         ref_frame_config->refresh[1] = 1;
966         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
967         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
968       } else if (layer_id->spatial_layer_id == 2) {
969         // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
970         // and GOLDEN (and all other refs) to slot 1.
971         // Update slot 2 (LAST).
972         for (i = 0; i < INTER_REFS_PER_FRAME; i++)
973           ref_frame_config->ref_idx[i] = 1;
974         ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
975         ref_frame_config->refresh[2] = 1;
976         ref_frame_config->reference[SVC_LAST_FRAME] = 1;
977         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
978         // For 3 spatial layer case: allow for top spatial layer to use
979         // additional temporal reference. Update every 10 frames.
980         if (enable_longterm_temporal_ref) {
981           ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
982           ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
983           if (base_count % 10 == 0)
984             ref_frame_config->refresh[REF_FRAMES - 1] = 1;
985         }
986       }
987       break;
988     case 7:
989       // 2 spatial and 3 temporal layer.
990       ref_frame_config->reference[SVC_LAST_FRAME] = 1;
991       if (superframe_cnt % 4 == 0) {
992         // Base temporal layer
993         layer_id->temporal_layer_id = 0;
994         if (layer_id->spatial_layer_id == 0) {
995           // Reference LAST, update LAST
996           // Set all buffer_idx to 0
997           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
998             ref_frame_config->ref_idx[i] = 0;
999           ref_frame_config->refresh[0] = 1;
1000         } else if (layer_id->spatial_layer_id == 1) {
1001           // Reference LAST and GOLDEN.
1002           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1003             ref_frame_config->ref_idx[i] = 0;
1004           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1005           ref_frame_config->refresh[1] = 1;
1006         }
1007       } else if ((superframe_cnt - 1) % 4 == 0) {
1008         // First top temporal enhancement layer.
1009         layer_id->temporal_layer_id = 2;
1010         if (layer_id->spatial_layer_id == 0) {
1011           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1012             ref_frame_config->ref_idx[i] = 0;
1013           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1014           ref_frame_config->refresh[3] = 1;
1015         } else if (layer_id->spatial_layer_id == 1) {
1016           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1017           // GOLDEN (and all other refs) to slot 3.
1018           // No update.
1019           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1020             ref_frame_config->ref_idx[i] = 3;
1021           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1022         }
1023       } else if ((superframe_cnt - 2) % 4 == 0) {
1024         // Middle temporal enhancement layer.
1025         layer_id->temporal_layer_id = 1;
1026         if (layer_id->spatial_layer_id == 0) {
1027           // Reference LAST.
1028           // Set all buffer_idx to 0.
1029           // Set GOLDEN to slot 5 and update slot 5.
1030           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1031             ref_frame_config->ref_idx[i] = 0;
1032           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1033           ref_frame_config->refresh[5 - shift] = 1;
1034         } else if (layer_id->spatial_layer_id == 1) {
1035           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1036           // GOLDEN (and all other refs) to slot 5.
1037           // Set LAST3 to slot 6 and update slot 6.
1038           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1039             ref_frame_config->ref_idx[i] = 5 - shift;
1040           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1041           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1042           ref_frame_config->refresh[6 - shift] = 1;
1043         }
1044       } else if ((superframe_cnt - 3) % 4 == 0) {
1045         // Second top temporal enhancement layer.
1046         layer_id->temporal_layer_id = 2;
1047         if (layer_id->spatial_layer_id == 0) {
1048           // Set LAST to slot 5 and reference LAST.
1049           // Set GOLDEN to slot 3 and update slot 3.
1050           // Set all other buffer_idx to 0.
1051           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1052             ref_frame_config->ref_idx[i] = 0;
1053           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1054           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1055           ref_frame_config->refresh[3] = 1;
1056         } else if (layer_id->spatial_layer_id == 1) {
1057           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1058           // GOLDEN to slot 3. No update.
1059           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1060             ref_frame_config->ref_idx[i] = 0;
1061           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1062           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1063         }
1064       }
1065       break;
1066     case 8:
1067       // 3 spatial and 3 temporal layer.
1068       // Same as case 9 but overalap in the buffer slot updates.
1069       // (shift = 2). The slots 3 and 4 updated by first TL2 are
1070       // reused for update in TL1 superframe.
1071       // Note for this case, frame order hint must be disabled for
1072       // lower resolutios (operating points > 0) to be decoedable.
1073     case 9:
1074       // 3 spatial and 3 temporal layer.
1075       // No overlap in buffer updates between TL2 and TL1.
1076       // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1077       // Set the references via the svc_ref_frame_config control.
1078       // Always reference LAST.
1079       ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1080       if (superframe_cnt % 4 == 0) {
1081         // Base temporal layer.
1082         layer_id->temporal_layer_id = 0;
1083         if (layer_id->spatial_layer_id == 0) {
1084           // Reference LAST, update LAST.
1085           // Set all buffer_idx to 0.
1086           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1087             ref_frame_config->ref_idx[i] = 0;
1088           ref_frame_config->refresh[0] = 1;
1089         } else if (layer_id->spatial_layer_id == 1) {
1090           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1091           // GOLDEN (and all other refs) to slot 0.
1092           // Update slot 1 (LAST).
1093           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1094             ref_frame_config->ref_idx[i] = 0;
1095           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1096           ref_frame_config->refresh[1] = 1;
1097         } else if (layer_id->spatial_layer_id == 2) {
1098           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1099           // GOLDEN (and all other refs) to slot 1.
1100           // Update slot 2 (LAST).
1101           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1102             ref_frame_config->ref_idx[i] = 1;
1103           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1104           ref_frame_config->refresh[2] = 1;
1105         }
1106       } else if ((superframe_cnt - 1) % 4 == 0) {
1107         // First top temporal enhancement layer.
1108         layer_id->temporal_layer_id = 2;
1109         if (layer_id->spatial_layer_id == 0) {
1110           // Reference LAST (slot 0).
1111           // Set GOLDEN to slot 3 and update slot 3.
1112           // Set all other buffer_idx to slot 0.
1113           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1114             ref_frame_config->ref_idx[i] = 0;
1115           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1116           ref_frame_config->refresh[3] = 1;
1117         } else if (layer_id->spatial_layer_id == 1) {
1118           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1119           // GOLDEN (and all other refs) to slot 3.
1120           // Set LAST2 to slot 4 and Update slot 4.
1121           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1122             ref_frame_config->ref_idx[i] = 3;
1123           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1124           ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1125           ref_frame_config->refresh[4] = 1;
1126         } else if (layer_id->spatial_layer_id == 2) {
1127           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1128           // GOLDEN (and all other refs) to slot 4.
1129           // No update.
1130           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1131             ref_frame_config->ref_idx[i] = 4;
1132           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1133         }
1134       } else if ((superframe_cnt - 2) % 4 == 0) {
1135         // Middle temporal enhancement layer.
1136         layer_id->temporal_layer_id = 1;
1137         if (layer_id->spatial_layer_id == 0) {
1138           // Reference LAST.
1139           // Set all buffer_idx to 0.
1140           // Set GOLDEN to slot 5 and update slot 5.
1141           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1142             ref_frame_config->ref_idx[i] = 0;
1143           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1144           ref_frame_config->refresh[5 - shift] = 1;
1145         } else if (layer_id->spatial_layer_id == 1) {
1146           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1147           // GOLDEN (and all other refs) to slot 5.
1148           // Set LAST3 to slot 6 and update slot 6.
1149           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1150             ref_frame_config->ref_idx[i] = 5 - shift;
1151           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1152           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1153           ref_frame_config->refresh[6 - shift] = 1;
1154         } else if (layer_id->spatial_layer_id == 2) {
1155           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1156           // GOLDEN (and all other refs) to slot 6.
1157           // Set LAST3 to slot 7 and update slot 7.
1158           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1159             ref_frame_config->ref_idx[i] = 6 - shift;
1160           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1161           ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1162           ref_frame_config->refresh[7 - shift] = 1;
1163         }
1164       } else if ((superframe_cnt - 3) % 4 == 0) {
1165         // Second top temporal enhancement layer.
1166         layer_id->temporal_layer_id = 2;
1167         if (layer_id->spatial_layer_id == 0) {
1168           // Set LAST to slot 5 and reference LAST.
1169           // Set GOLDEN to slot 3 and update slot 3.
1170           // Set all other buffer_idx to 0.
1171           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1172             ref_frame_config->ref_idx[i] = 0;
1173           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1174           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1175           ref_frame_config->refresh[3] = 1;
1176         } else if (layer_id->spatial_layer_id == 1) {
1177           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1178           // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1179           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1180             ref_frame_config->ref_idx[i] = 0;
1181           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1182           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1183           ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1184           ref_frame_config->refresh[4] = 1;
1185         } else if (layer_id->spatial_layer_id == 2) {
1186           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1187           // GOLDEN to slot 4. No update.
1188           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1189             ref_frame_config->ref_idx[i] = 0;
1190           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1191           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1192         }
1193       }
1194       break;
1195     case 11:
1196       // Simulcast mode for 3 spatial and 3 temporal layers.
1197       // No inter-layer predicton, only prediction is temporal and single
1198       // reference (LAST).
1199       // No overlap in buffer slots between spatial layers. So for example,
1200       // SL0 only uses slots 0 and 1.
1201       // SL1 only uses slots 2 and 3.
1202       // SL2 only uses slots 4 and 5.
1203       // All 7 references for each inter-frame must only access buffer slots
1204       // for that spatial layer.
1205       // On key (super)frames: SL1 and SL2 must have no references set
1206       // and must refresh all the slots for that layer only (so 2 and 3
1207       // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1208       // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1209       // internally as Intra-only frames that allow that stream to be decoded.
1210       // These conditions will allow for each spatial stream to be
1211       // independently decodeable.
1212 
1213       // Initialize all references to 0 (don't use reference).
1214       for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1215         ref_frame_config->reference[i] = 0;
1216       // Initialize as no refresh/update for all slots.
1217       for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1218       for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1219         ref_frame_config->ref_idx[i] = 0;
1220 
1221       if (is_key_frame) {
1222         if (layer_id->spatial_layer_id == 0) {
1223           // Assign LAST/GOLDEN to slot 0/1.
1224           // Refesh slots 0 and 1 for SL0.
1225           // SL0: this will get set to KEY frame internally.
1226           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1227           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1228           ref_frame_config->refresh[0] = 1;
1229           ref_frame_config->refresh[1] = 1;
1230         } else if (layer_id->spatial_layer_id == 1) {
1231           // Assign LAST/GOLDEN to slot 2/3.
1232           // Refesh slots 2 and 3 for SL1.
1233           // This will get set to Intra-only frame internally.
1234           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1235           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1236           ref_frame_config->refresh[2] = 1;
1237           ref_frame_config->refresh[3] = 1;
1238         } else if (layer_id->spatial_layer_id == 2) {
1239           // Assign LAST/GOLDEN to slot 4/5.
1240           // Refresh slots 4 and 5 for SL2.
1241           // This will get set to Intra-only frame internally.
1242           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1243           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1244           ref_frame_config->refresh[4] = 1;
1245           ref_frame_config->refresh[5] = 1;
1246         }
1247       } else if (superframe_cnt % 4 == 0) {
1248         // Base temporal layer: TL0
1249         layer_id->temporal_layer_id = 0;
1250         if (layer_id->spatial_layer_id == 0) {  // SL0
1251           // Reference LAST. Assign all references to either slot
1252           // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1253           // Update slot 0 (LAST).
1254           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1255           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1256             ref_frame_config->ref_idx[i] = 1;
1257           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1258           ref_frame_config->refresh[0] = 1;
1259         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1260           // Reference LAST. Assign all references to either slot
1261           // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1262           // Update slot 2 (LAST).
1263           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1264           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1265             ref_frame_config->ref_idx[i] = 3;
1266           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1267           ref_frame_config->refresh[2] = 1;
1268         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1269           // Reference LAST. Assign all references to either slot
1270           // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1271           // Update slot 4 (LAST).
1272           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1273           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1274             ref_frame_config->ref_idx[i] = 5;
1275           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1276           ref_frame_config->refresh[4] = 1;
1277         }
1278       } else if ((superframe_cnt - 1) % 4 == 0) {
1279         // First top temporal enhancement layer: TL2
1280         layer_id->temporal_layer_id = 2;
1281         if (layer_id->spatial_layer_id == 0) {  // SL0
1282           // Reference LAST (slot 0). Assign other references to slot 1.
1283           // No update/refresh on any slots.
1284           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1285           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1286             ref_frame_config->ref_idx[i] = 1;
1287           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1288         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1289           // Reference LAST (slot 2). Assign other references to slot 3.
1290           // No update/refresh on any slots.
1291           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1292           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1293             ref_frame_config->ref_idx[i] = 3;
1294           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1295         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1296           // Reference LAST (slot 4). Assign other references to slot 4.
1297           // No update/refresh on any slots.
1298           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1299           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1300             ref_frame_config->ref_idx[i] = 5;
1301           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1302         }
1303       } else if ((superframe_cnt - 2) % 4 == 0) {
1304         // Middle temporal enhancement layer: TL1
1305         layer_id->temporal_layer_id = 1;
1306         if (layer_id->spatial_layer_id == 0) {  // SL0
1307           // Reference LAST (slot 0).
1308           // Set GOLDEN to slot 1 and update slot 1.
1309           // This will be used as reference for next TL2.
1310           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1311           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1312             ref_frame_config->ref_idx[i] = 1;
1313           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1314           ref_frame_config->refresh[1] = 1;
1315         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1316           // Reference LAST (slot 2).
1317           // Set GOLDEN to slot 3 and update slot 3.
1318           // This will be used as reference for next TL2.
1319           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1320           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1321             ref_frame_config->ref_idx[i] = 3;
1322           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1323           ref_frame_config->refresh[3] = 1;
1324         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1325           // Reference LAST (slot 4).
1326           // Set GOLDEN to slot 5 and update slot 5.
1327           // This will be used as reference for next TL2.
1328           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1329           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1330             ref_frame_config->ref_idx[i] = 5;
1331           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1332           ref_frame_config->refresh[5] = 1;
1333         }
1334       } else if ((superframe_cnt - 3) % 4 == 0) {
1335         // Second top temporal enhancement layer: TL2
1336         layer_id->temporal_layer_id = 2;
1337         if (layer_id->spatial_layer_id == 0) {  // SL0
1338           // Reference LAST (slot 1). Assign other references to slot 0.
1339           // No update/refresh on any slots.
1340           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1341           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1342             ref_frame_config->ref_idx[i] = 0;
1343           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1344         } else if (layer_id->spatial_layer_id == 1) {  // SL1
1345           // Reference LAST (slot 3). Assign other references to slot 2.
1346           // No update/refresh on any slots.
1347           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1348           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1349             ref_frame_config->ref_idx[i] = 2;
1350           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1351         } else if (layer_id->spatial_layer_id == 2) {  // SL2
1352           // Reference LAST (slot 5). Assign other references to slot 4.
1353           // No update/refresh on any slots.
1354           ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1355           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1356             ref_frame_config->ref_idx[i] = 4;
1357           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1358         }
1359       }
1360       if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1361         // Always reference GOLDEN (inter-layer prediction).
1362         ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1363         if (ksvc_mode) {
1364           // KSVC: only keep the inter-layer reference (GOLDEN) for
1365           // superframes whose base is key.
1366           if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1367         }
1368         if (is_key_frame && layer_id->spatial_layer_id > 1) {
1369           // On superframes whose base is key: remove LAST to avoid prediction
1370           // off layer two levels below.
1371           ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1372         }
1373       }
1374       // For 3 spatial layer case 8 (where there is free buffer slot):
1375       // allow for top spatial layer to use additional temporal reference.
1376       // Additional reference is only updated on base temporal layer, every
1377       // 10 TL0 frames here.
1378       if (!simulcast_mode && enable_longterm_temporal_ref &&
1379           layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1380         ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1381         if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1382         if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1383           ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1384       }
1385       break;
1386     default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1387   }
1388 }
1389 
write_literal(struct aom_write_bit_buffer * wb,uint32_t data,uint8_t bits,uint32_t offset=0)1390 static void write_literal(struct aom_write_bit_buffer *wb, uint32_t data,
1391                           uint8_t bits, uint32_t offset = 0) {
1392   if (bits > 32) {
1393     die("Invalid bits value %d > 32\n", bits);
1394   }
1395   const uint32_t max = static_cast<uint32_t>(((uint64_t)1 << bits) - 1);
1396   if (data < offset || (data - offset) > max) {
1397     die("Invalid data, value %u out of range [%u, %" PRIu64 "]\n", data, offset,
1398         (uint64_t)max + offset);
1399   }
1400   aom_wb_write_unsigned_literal(wb, data - offset, bits);
1401 }
1402 
write_depth_representation_element(struct aom_write_bit_buffer * buffer,const std::pair<libaom_examples::DepthRepresentationElement,bool> & element)1403 static void write_depth_representation_element(
1404     struct aom_write_bit_buffer *buffer,
1405     const std::pair<libaom_examples::DepthRepresentationElement, bool>
1406         &element) {
1407   if (!element.second) {
1408     return;
1409   }
1410   write_literal(buffer, element.first.sign_flag, 1);
1411   write_literal(buffer, element.first.exponent, 7);
1412   if (element.first.mantissa_len == 0 || element.first.mantissa_len > 32) {
1413     die("Invalid mantissan_len %d\n", element.first.mantissa_len);
1414   }
1415   write_literal(buffer, element.first.mantissa_len - 1, 5);
1416   write_literal(buffer, element.first.mantissa, element.first.mantissa_len);
1417 }
1418 
write_color_properties(struct aom_write_bit_buffer * buffer,const std::pair<libaom_examples::ColorProperties,bool> & color_properties)1419 static void write_color_properties(
1420     struct aom_write_bit_buffer *buffer,
1421     const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
1422   write_literal(buffer, color_properties.second, 1);
1423   if (color_properties.second) {
1424     write_literal(buffer, color_properties.first.color_range, 1);
1425     write_literal(buffer, color_properties.first.color_primaries, 8);
1426     write_literal(buffer, color_properties.first.transfer_characteristics, 8);
1427     write_literal(buffer, color_properties.first.matrix_coefficients, 8);
1428   } else {
1429     write_literal(buffer, 0, 1);  // reserved_1bit
1430   }
1431 }
1432 
add_multilayer_metadata(aom_image_t * frame,const libaom_examples::MultilayerMetadata & multilayer)1433 static void add_multilayer_metadata(
1434     aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer) {
1435   // Pretty large buffer to accommodate the largest multilayer metadata
1436   // possible, with 4 alpha segmentation layers (each can be up to about 66kB).
1437   std::vector<uint8_t> data(66000 * multilayer.layers.size());
1438   struct aom_write_bit_buffer buffer = { data.data(), 0 };
1439 
1440   write_literal(&buffer, multilayer.use_case, 6);
1441   if (multilayer.layers.empty()) {
1442     die("Invalid multilayer metadata, no layers found\n");
1443   } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
1444     die("Invalid multilayer metadata, too many layers (max is %d)\n",
1445         MAX_NUM_SPATIAL_LAYERS);
1446   }
1447   write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
1448   assert(buffer.bit_offset % 8 == 0);
1449   for (size_t i = 0; i < multilayer.layers.size(); ++i) {
1450     const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
1451     // Alpha info with segmentation with labels can be up to about 66k bytes,
1452     // which requires 3 bytes to encode in leb128.
1453     const int bytes_reserved_for_size = 3;
1454     // Placeholder for layer_metadata_size which will be written later.
1455     write_literal(&buffer, 0, bytes_reserved_for_size * 8);
1456     const uint32_t metadata_start = buffer.bit_offset;
1457     write_literal(&buffer, (int)i, 2);  // ml_spatial_id
1458     write_literal(&buffer, layer.layer_type, 5);
1459     write_literal(&buffer, layer.luma_plane_only_flag, 1);
1460     write_literal(&buffer, layer.layer_view_type, 3);
1461     write_literal(&buffer, layer.group_id, 2);
1462     write_literal(&buffer, layer.layer_dependency_idc, 3);
1463     write_literal(&buffer, layer.layer_metadata_scope, 2);
1464     write_literal(&buffer, 0, 4);  // ml_reserved_4bits
1465 
1466     if (i > 0) {
1467       write_color_properties(&buffer, layer.layer_color_description);
1468     } else {
1469       write_literal(&buffer, 0, 2);  // ml_reserved_2bits
1470     }
1471     assert(buffer.bit_offset % 8 == 0);
1472 
1473     if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA &&
1474         layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1475       const libaom_examples::AlphaInformation &alpha_info =
1476           layer.global_alpha_info;
1477       write_literal(&buffer, alpha_info.alpha_use_idc, 3);
1478       write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
1479       write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
1480       write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
1481       write_literal(&buffer, alpha_info.alpha_transparent_value,
1482                     alpha_info.alpha_bit_depth + 1);
1483       write_literal(&buffer, alpha_info.alpha_opaque_value,
1484                     alpha_info.alpha_bit_depth + 1);
1485       if (buffer.bit_offset % 8 != 0) {
1486         // ai_byte_alignment_bits
1487         write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1488       }
1489       assert(buffer.bit_offset % 8 == 0);
1490 
1491       if (alpha_info.alpha_use_idc == libaom_examples::ALPHA_STRAIGHT) {
1492         write_literal(&buffer, 0, 6);  // ai_reserved_6bits
1493         write_color_properties(&buffer, alpha_info.alpha_color_description);
1494       } else if (alpha_info.alpha_use_idc ==
1495                  libaom_examples::ALPHA_SEGMENTATION) {
1496         write_literal(&buffer, 0, 7);  // ai_reserved_7bits
1497         write_literal(&buffer, !alpha_info.label_type_id.empty(), 1);
1498         if (!alpha_info.label_type_id.empty()) {
1499           const size_t num_values =
1500               std::abs(alpha_info.alpha_transparent_value -
1501                        alpha_info.alpha_opaque_value) +
1502               1;
1503           if (!alpha_info.label_type_id.empty() &&
1504               alpha_info.label_type_id.size() != num_values) {
1505             die("Invalid multilayer metadata, label_type_id size must be "
1506                 "equal to the range of alpha values between "
1507                 "alpha_transparent_value and alpha_opaque_value (expected "
1508                 "%d values, found %d values)\n",
1509                 (int)num_values, (int)alpha_info.label_type_id.size());
1510           }
1511           for (size_t j = 0; j < num_values; ++j) {
1512             write_literal(&buffer, alpha_info.label_type_id[j], 16);
1513           }
1514         }
1515       }
1516       assert(buffer.bit_offset % 8 == 0);
1517     } else if (layer.layer_type ==
1518                    libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH &&
1519                layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1520       const libaom_examples::DepthInformation &depth_info =
1521           layer.global_depth_info;
1522       write_literal(&buffer, depth_info.z_near.second, 1);
1523       write_literal(&buffer, depth_info.z_far.second, 1);
1524       write_literal(&buffer, depth_info.d_min.second, 1);
1525       write_literal(&buffer, depth_info.d_max.second, 1);
1526       write_literal(&buffer, depth_info.depth_representation_type, 4);
1527       if (depth_info.d_min.second || depth_info.d_max.second) {
1528         write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
1529       }
1530       write_depth_representation_element(&buffer, depth_info.z_near);
1531       write_depth_representation_element(&buffer, depth_info.z_far);
1532       write_depth_representation_element(&buffer, depth_info.d_min);
1533       write_depth_representation_element(&buffer, depth_info.d_max);
1534       if (depth_info.depth_representation_type == 3) {
1535         write_literal(&buffer, depth_info.depth_nonlinear_precision, 4,
1536                       /*offset=*/8);
1537         if (depth_info.depth_nonlinear_representation_model.empty() ||
1538             depth_info.depth_nonlinear_representation_model.size() > (1 << 6)) {
1539           die("Invalid multilayer metadata, if depth_nonlinear_precision "
1540               "== 3, depth_nonlinear_representation_model must have 1 to "
1541               "%d elements, found %d elements\n",
1542               1 << 6,
1543               (int)depth_info.depth_nonlinear_representation_model.size());
1544         }
1545         write_literal(
1546             &buffer,
1547             (int)depth_info.depth_nonlinear_representation_model.size() - 1, 6);
1548         const int bit_depth = depth_info.depth_nonlinear_precision;
1549         for (const uint32_t v :
1550              depth_info.depth_nonlinear_representation_model) {
1551           write_literal(&buffer, v, bit_depth);
1552         }
1553       }
1554       if (buffer.bit_offset % 8 != 0) {
1555         write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1556       }
1557       assert(buffer.bit_offset % 8 == 0);
1558     }
1559 
1560     assert(buffer.bit_offset % 8 == 0);
1561 
1562     const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
1563     const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
1564     size_t coded_size;
1565     if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
1566                                    bytes_reserved_for_size,
1567                                    &buffer.bit_buffer[size_pos], &coded_size)) {
1568       // Need to increase bytes_reserved_for_size in the code above.
1569       die("Error: Failed to write metadata size\n");
1570     }
1571   }
1572   assert(buffer.bit_offset % 8 == 0);
1573   if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
1574                            buffer.bit_buffer, buffer.bit_offset / 8,
1575                            AOM_MIF_KEY_FRAME)) {
1576     die("Error: Failed to add metadata\n");
1577   }
1578 }
1579 
1580 #if CONFIG_AV1_DECODER
1581 // Returns whether there is a mismatch between the encoder's new frame and the
1582 // decoder's new frame.
test_decode(aom_codec_ctx_t * encoder,aom_codec_ctx_t * decoder,const int frames_out)1583 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1584                        const int frames_out) {
1585   aom_image_t enc_img, dec_img;
1586   int mismatch = 0;
1587 
1588   /* Get the internal new frame */
1589   AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
1590   AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
1591 
1592 #if CONFIG_AV1_HIGHBITDEPTH
1593   if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1594       (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1595     if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1596       aom_image_t enc_hbd_img;
1597       aom_img_alloc(
1598           &enc_hbd_img,
1599           static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1600           enc_img.d_w, enc_img.d_h, 16);
1601       aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1602       enc_img = enc_hbd_img;
1603     }
1604     if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1605       aom_image_t dec_hbd_img;
1606       aom_img_alloc(
1607           &dec_hbd_img,
1608           static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1609           dec_img.d_w, dec_img.d_h, 16);
1610       aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1611       dec_img = dec_hbd_img;
1612     }
1613   }
1614 #endif
1615 
1616   if (!aom_compare_img(&enc_img, &dec_img)) {
1617     int y[4], u[4], v[4];
1618 #if CONFIG_AV1_HIGHBITDEPTH
1619     if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1620       aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1621     } else {
1622       aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1623     }
1624 #else
1625     aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1626 #endif
1627     fprintf(stderr,
1628             "Encode/decode mismatch on frame %d at"
1629             " Y[%d, %d] {%d/%d},"
1630             " U[%d, %d] {%d/%d},"
1631             " V[%d, %d] {%d/%d}\n",
1632             frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1633             v[1], v[2], v[3]);
1634     mismatch = 1;
1635   }
1636 
1637   aom_img_free(&enc_img);
1638   aom_img_free(&dec_img);
1639   return mismatch;
1640 }
1641 #endif  // CONFIG_AV1_DECODER
1642 
1643 struct psnr_stats {
1644   // The second element of these arrays is reserved for high bitdepth.
1645   uint64_t psnr_sse_total[2];
1646   uint64_t psnr_samples_total[2];
1647   double psnr_totals[2][4];
1648   int psnr_count[2];
1649 };
1650 
show_psnr(struct psnr_stats * psnr_stream,double peak)1651 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1652   double ovpsnr;
1653 
1654   if (!psnr_stream->psnr_count[0]) return;
1655 
1656   fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1657   ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1658                        (double)psnr_stream->psnr_sse_total[0]);
1659   fprintf(stderr, " %.3f", ovpsnr);
1660 
1661   for (int i = 0; i < 4; i++) {
1662     fprintf(stderr, " %.3f",
1663             psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1664   }
1665   fprintf(stderr, "\n");
1666 }
1667 
create_rtc_rc_config(const aom_codec_enc_cfg_t & cfg,const AppInput & app_input)1668 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1669     const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1670   aom::AV1RateControlRtcConfig rc_cfg;
1671   rc_cfg.width = cfg.g_w;
1672   rc_cfg.height = cfg.g_h;
1673   rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1674   rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1675   rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1676   rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1677   rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1678   rc_cfg.buf_sz = cfg.rc_buf_sz;
1679   rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1680   rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1681   // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1682   rc_cfg.max_intra_bitrate_pct = 300;
1683   rc_cfg.framerate = cfg.g_timebase.den;
1684   // TODO(jianj): Add suppor for SVC.
1685   rc_cfg.ss_number_layers = 1;
1686   rc_cfg.ts_number_layers = 1;
1687   rc_cfg.scaling_factor_num[0] = 1;
1688   rc_cfg.scaling_factor_den[0] = 1;
1689   rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1690   rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1691   rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1692   rc_cfg.aq_mode = app_input.aq_mode;
1693 
1694   return rc_cfg;
1695 }
1696 
qindex_to_quantizer(int qindex)1697 static int qindex_to_quantizer(int qindex) {
1698   // Table that converts 0-63 range Q values passed in outside to the 0-255
1699   // range Qindex used internally.
1700   static const int quantizer_to_qindex[] = {
1701     0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,
1702     52,  56,  60,  64,  68,  72,  76,  80,  84,  88,  92,  96,  100,
1703     104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1704     156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1705     208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1706   };
1707   for (int quantizer = 0; quantizer < 64; ++quantizer)
1708     if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1709 
1710   return 63;
1711 }
1712 
set_active_map(const aom_codec_enc_cfg_t * cfg,aom_codec_ctx_t * codec,int frame_cnt)1713 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1714                            aom_codec_ctx_t *codec, int frame_cnt) {
1715   aom_active_map_t map = { 0, 0, 0 };
1716 
1717   map.rows = (cfg->g_h + 15) / 16;
1718   map.cols = (cfg->g_w + 15) / 16;
1719 
1720   map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1721   if (!map.active_map) die("Failed to allocate active map");
1722 
1723   // Example map for testing.
1724   for (unsigned int i = 0; i < map.rows; ++i) {
1725     for (unsigned int j = 0; j < map.cols; ++j) {
1726       int index = map.cols * i + j;
1727       map.active_map[index] = 1;
1728       if (frame_cnt < 300) {
1729         if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1730       } else if (frame_cnt >= 300) {
1731         if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1732       }
1733     }
1734   }
1735 
1736   if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1737     die_codec(codec, "Failed to set active map");
1738 
1739   free(map.active_map);
1740 }
1741 
main(int argc,const char ** argv)1742 int main(int argc, const char **argv) {
1743   AppInput app_input;
1744   AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1745   FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1746   AvxVideoWriter *total_layer_file = NULL;
1747   FILE *total_layer_obu_file = NULL;
1748   aom_codec_enc_cfg_t cfg;
1749   int frame_cnt = 0;
1750   aom_image_t raw;
1751   int frame_avail;
1752   int got_data = 0;
1753   int flags = 0;
1754   int i;
1755   int pts = 0;             // PTS starts at 0.
1756   int frame_duration = 1;  // 1 timebase tick per frame.
1757   aom_svc_layer_id_t layer_id;
1758   aom_svc_params_t svc_params;
1759   aom_svc_ref_frame_config_t ref_frame_config;
1760   aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1761 
1762 #if CONFIG_INTERNAL_STATS
1763   FILE *stats_file = fopen("opsnr.stt", "a");
1764   if (stats_file == NULL) {
1765     die("Cannot open opsnr.stt\n");
1766   }
1767 #endif
1768 #if CONFIG_AV1_DECODER
1769   aom_codec_ctx_t decoder;
1770 #endif
1771 
1772   struct RateControlMetrics rc;
1773   int64_t cx_time = 0;
1774   int64_t cx_time_layer[AOM_MAX_LAYERS];  // max number of layers.
1775   int frame_cnt_layer[AOM_MAX_LAYERS];
1776   double sum_bitrate = 0.0;
1777   double sum_bitrate2 = 0.0;
1778   double framerate = 30.0;
1779   int use_svc_control = 1;
1780   int set_err_resil_frame = 0;
1781   int test_changing_bitrate = 0;
1782   zero(rc.layer_target_bitrate);
1783   memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1784   memset(&app_input, 0, sizeof(AppInput));
1785   memset(&svc_params, 0, sizeof(svc_params));
1786 
1787   // Flag to test dynamic scaling of source frames for single
1788   // spatial stream, using the scaling_mode control.
1789   const int test_dynamic_scaling_single_layer = 0;
1790 
1791   // Flag to test setting speed per layer.
1792   const int test_speed_per_layer = 0;
1793 
1794   // Flag for testing active maps.
1795   const int test_active_maps = 0;
1796 
1797   /* Setup default input stream settings */
1798   for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
1799     app_input.input_ctx[i].framerate.numerator = 30;
1800     app_input.input_ctx[i].framerate.denominator = 1;
1801     app_input.input_ctx[i].only_i420 = 0;
1802     app_input.input_ctx[i].bit_depth = AOM_BITS_8;
1803   }
1804   app_input.speed = 7;
1805   exec_name = argv[0];
1806 
1807   // start with default encoder configuration
1808   aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
1809                                                      AOM_USAGE_REALTIME);
1810   if (res != AOM_CODEC_OK) {
1811     die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1812   }
1813 
1814   // Real time parameters.
1815   cfg.g_usage = AOM_USAGE_REALTIME;
1816 
1817   cfg.rc_end_usage = AOM_CBR;
1818   cfg.rc_min_quantizer = 2;
1819   cfg.rc_max_quantizer = 52;
1820   cfg.rc_undershoot_pct = 50;
1821   cfg.rc_overshoot_pct = 50;
1822   cfg.rc_buf_initial_sz = 600;
1823   cfg.rc_buf_optimal_sz = 600;
1824   cfg.rc_buf_sz = 1000;
1825   cfg.rc_resize_mode = 0;  // Set to RESIZE_DYNAMIC for dynamic resize.
1826   cfg.g_lag_in_frames = 0;
1827   cfg.kf_mode = AOM_KF_AUTO;
1828   cfg.g_w = 0;  // Force user to specify width and height for raw input.
1829   cfg.g_h = 0;
1830 
1831   parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1832 
1833   int ts_number_layers = svc_params.number_temporal_layers;
1834   int ss_number_layers = svc_params.number_spatial_layers;
1835 
1836   unsigned int width = cfg.g_w;
1837   unsigned int height = cfg.g_h;
1838 
1839   if (app_input.layering_mode >= 0) {
1840     if (ts_number_layers !=
1841             mode_to_num_temporal_layers[app_input.layering_mode] ||
1842         ss_number_layers !=
1843             mode_to_num_spatial_layers[app_input.layering_mode]) {
1844       die("Number of layers doesn't match layering mode.");
1845     }
1846   }
1847 
1848   bool has_non_y4m_input = false;
1849   for (i = 0; i < AOM_MAX_LAYERS; ++i) {
1850     if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
1851       has_non_y4m_input = true;
1852       break;
1853     }
1854   }
1855   // Y4M reader has its own allocation.
1856   if (has_non_y4m_input) {
1857     if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1858       die("Failed to allocate image (%dx%d)", width, height);
1859     }
1860   }
1861 
1862   aom_codec_iface_t *encoder = aom_codec_av1_cx();
1863 
1864   memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1865          sizeof(svc_params.layer_target_bitrate));
1866 
1867   unsigned int total_rate = 0;
1868   for (i = 0; i < ss_number_layers; i++) {
1869     total_rate +=
1870         svc_params
1871             .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1872   }
1873   if (total_rate != cfg.rc_target_bitrate) {
1874     die("Incorrect total target bitrate, expected: %d", total_rate);
1875   }
1876 
1877   svc_params.framerate_factor[0] = 1;
1878   if (ts_number_layers == 2) {
1879     svc_params.framerate_factor[0] = 2;
1880     svc_params.framerate_factor[1] = 1;
1881   } else if (ts_number_layers == 3) {
1882     svc_params.framerate_factor[0] = 4;
1883     svc_params.framerate_factor[1] = 2;
1884     svc_params.framerate_factor[2] = 1;
1885   }
1886 
1887   libaom_examples::MultilayerMetadata multilayer_metadata;
1888   if (app_input.multilayer_metadata_file != NULL) {
1889     if (!libaom_examples::parse_multilayer_file(
1890             app_input.multilayer_metadata_file, &multilayer_metadata)) {
1891       die("Failed to parse multilayer metadata");
1892     }
1893     libaom_examples::print_multilayer_metadata(multilayer_metadata);
1894   }
1895 
1896   framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1897   set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1898 
1899   AvxVideoInfo info;
1900   info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1901   info.frame_width = cfg.g_w;
1902   info.frame_height = cfg.g_h;
1903   info.time_base.numerator = cfg.g_timebase.num;
1904   info.time_base.denominator = cfg.g_timebase.den;
1905   // Open an output file for each stream.
1906   for (int sl = 0; sl < ss_number_layers; ++sl) {
1907     for (int tl = 0; tl < ts_number_layers; ++tl) {
1908       i = sl * ts_number_layers + tl;
1909       char file_name[PATH_MAX];
1910       snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1911                app_input.output_filename, i);
1912       if (app_input.output_obu) {
1913         obu_files[i] = fopen(file_name, "wb");
1914         if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1915       } else {
1916         outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1917         if (!outfile[i]) die("Failed to open %s for writing", file_name);
1918       }
1919     }
1920   }
1921   if (app_input.output_obu) {
1922     total_layer_obu_file = fopen(app_input.output_filename, "wb");
1923     if (!total_layer_obu_file)
1924       die("Failed to open %s for writing", app_input.output_filename);
1925   } else {
1926     total_layer_file =
1927         aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1928     if (!total_layer_file)
1929       die("Failed to open %s for writing", app_input.output_filename);
1930   }
1931 
1932   // Initialize codec.
1933   aom_codec_ctx_t codec;
1934   aom_codec_flags_t flag = 0;
1935   flag |= cfg.g_input_bit_depth == AOM_BITS_8 ? 0 : AOM_CODEC_USE_HIGHBITDEPTH;
1936   flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1937   if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1938     die_codec(&codec, "Failed to initialize encoder");
1939 
1940 #if CONFIG_AV1_DECODER
1941   if (app_input.decode) {
1942     if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1943       die_codec(&decoder, "Failed to initialize decoder");
1944   }
1945 #endif
1946 
1947   aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1948   aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1949   aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
1950   aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1);
1951   aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1);
1952   aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0);
1953   aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0);
1954   aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
1955   aom_codec_control(&codec, AV1E_SET_ENABLE_ORDER_HINT, 0);
1956   aom_codec_control(&codec, AV1E_SET_ENABLE_TPL_MODEL, 0);
1957   aom_codec_control(&codec, AV1E_SET_DELTAQ_MODE, 0);
1958   aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3);
1959   aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3);
1960   aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3);
1961   aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3);
1962   aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1);
1963 
1964   // Settings to reduce key frame encoding time.
1965   aom_codec_control(&codec, AV1E_SET_ENABLE_CFL_INTRA, 0);
1966   aom_codec_control(&codec, AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
1967   aom_codec_control(&codec, AV1E_SET_ENABLE_ANGLE_DELTA, 0);
1968   aom_codec_control(&codec, AV1E_SET_ENABLE_FILTER_INTRA, 0);
1969   aom_codec_control(&codec, AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
1970 
1971   aom_codec_control(&codec, AV1E_SET_AUTO_TILES, 1);
1972 
1973   aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1974   if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1975     aom_codec_control(&codec, AV1E_SET_ENABLE_PALETTE, 1);
1976     // INTRABC is currently disabled for rt mode, as it's too slow.
1977     aom_codec_control(&codec, AV1E_SET_ENABLE_INTRABC, 0);
1978   }
1979 
1980   if (app_input.use_external_rc) {
1981     aom_codec_control(&codec, AV1E_SET_RTC_EXTERNAL_RC, 1);
1982   }
1983 
1984   aom_codec_control(&codec, AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR, INT_MAX);
1985 
1986   aom_codec_control(&codec, AV1E_SET_SVC_FRAME_DROP_MODE,
1987                     AOM_FULL_SUPERFRAME_DROP);
1988 
1989   aom_codec_control(&codec, AV1E_SET_POSTENCODE_DROP_RTC, 1);
1990 
1991   svc_params.number_spatial_layers = ss_number_layers;
1992   svc_params.number_temporal_layers = ts_number_layers;
1993   for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1994     svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1995     svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1996   }
1997   if (!app_input.scale_factors_explicitly_set) {
1998     for (i = 0; i < ss_number_layers; ++i) {
1999       svc_params.scaling_factor_num[i] = 1;
2000       svc_params.scaling_factor_den[i] = 1;
2001     }
2002     if (ss_number_layers == 2) {
2003       svc_params.scaling_factor_num[0] = 1;
2004       svc_params.scaling_factor_den[0] = 2;
2005     } else if (ss_number_layers == 3) {
2006       svc_params.scaling_factor_num[0] = 1;
2007       svc_params.scaling_factor_den[0] = 4;
2008       svc_params.scaling_factor_num[1] = 1;
2009       svc_params.scaling_factor_den[1] = 2;
2010     }
2011   }
2012   aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
2013   // TODO(aomedia:3032): Configure KSVC in fixed mode.
2014 
2015   // This controls the maximum target size of the key frame.
2016   // For generating smaller key frames, use a smaller max_intra_size_pct
2017   // value, like 100 or 200.
2018   {
2019     const int max_intra_size_pct = 300;
2020     aom_codec_control(&codec, AOME_SET_MAX_INTRA_BITRATE_PCT,
2021                       max_intra_size_pct);
2022   }
2023 
2024   for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
2025     cx_time_layer[lx] = 0;
2026     frame_cnt_layer[lx] = 0;
2027   }
2028 
2029   std::unique_ptr<aom::AV1RateControlRTC> rc_api;
2030   if (app_input.use_external_rc) {
2031     const aom::AV1RateControlRtcConfig rc_cfg =
2032         create_rtc_rc_config(cfg, app_input);
2033     rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
2034   }
2035 
2036   frame_avail = 1;
2037   struct psnr_stats psnr_stream;
2038   memset(&psnr_stream, 0, sizeof(psnr_stream));
2039   while (frame_avail || got_data) {
2040     struct aom_usec_timer timer;
2041     frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
2042     // Loop over spatial layers.
2043     for (int slx = 0; slx < ss_number_layers; slx++) {
2044       if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
2045         const int previous_layer_frame_avail = frame_avail;
2046         frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
2047         if (previous_layer_frame_avail != frame_avail) {
2048           die("Mismatch in number of frames between spatial layer input files");
2049         }
2050       }
2051 
2052       aom_codec_iter_t iter = NULL;
2053       const aom_codec_cx_pkt_t *pkt;
2054       int layer = 0;
2055       // Flag for superframe whose base is key.
2056       int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
2057       // For flexible mode:
2058       if (app_input.layering_mode >= 0) {
2059         // Set the reference/update flags, layer_id, and reference_map
2060         // buffer index.
2061         set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
2062                           &ref_frame_config, &ref_frame_comp_pred,
2063                           &use_svc_control, slx, is_key_frame,
2064                           (app_input.layering_mode == 10), app_input.speed);
2065         aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2066         if (use_svc_control) {
2067           aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
2068                             &ref_frame_config);
2069           aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
2070                             &ref_frame_comp_pred);
2071         }
2072         if (app_input.multilayer_metadata_file != NULL) {
2073           add_multilayer_metadata(&raw, multilayer_metadata);
2074         }
2075         // Set the speed per layer.
2076         if (test_speed_per_layer) {
2077           int speed_per_layer = 10;
2078           if (layer_id.spatial_layer_id == 0) {
2079             if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
2080             if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
2081             if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
2082           } else if (layer_id.spatial_layer_id == 1) {
2083             if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
2084             if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
2085             if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
2086           } else if (layer_id.spatial_layer_id == 2) {
2087             if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
2088             if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
2089             if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
2090           }
2091           aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
2092         }
2093       } else {
2094         // Only up to 3 temporal layers supported in fixed mode.
2095         // Only need to set spatial and temporal layer_id: reference
2096         // prediction, refresh, and buffer_idx are set internally.
2097         layer_id.spatial_layer_id = slx;
2098         layer_id.temporal_layer_id = 0;
2099         if (ts_number_layers == 2) {
2100           layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
2101         } else if (ts_number_layers == 3) {
2102           if (frame_cnt % 2 != 0)
2103             layer_id.temporal_layer_id = 2;
2104           else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
2105             layer_id.temporal_layer_id = 1;
2106         }
2107         aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2108       }
2109 
2110       if (set_err_resil_frame && cfg.g_error_resilient == 0) {
2111         // Set error_resilient per frame: off/0 for base layer and
2112         // on/1 for enhancement layer frames.
2113         // Note that this is can only be done on the fly/per-frame/layer
2114         // if the config error_resilience is off/0. See the logic for updating
2115         // in set_encoder_config():
2116         // tool_cfg->error_resilient_mode =
2117         //     cfg->g_error_resilient | extra_cfg->error_resilient_mode;
2118         const int err_resil_mode =
2119             layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
2120         aom_codec_control(&codec, AV1E_SET_ERROR_RESILIENT_MODE,
2121                           err_resil_mode);
2122       }
2123 
2124       layer = slx * ts_number_layers + layer_id.temporal_layer_id;
2125       if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
2126 
2127       if (test_dynamic_scaling_single_layer) {
2128         // Example to scale source down by 2x2, then 4x4, and then back up to
2129         // 2x2, and then back to original.
2130         int frame_2x2 = 200;
2131         int frame_4x4 = 400;
2132         int frame_2x2up = 600;
2133         int frame_orig = 800;
2134         if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
2135           // Scale source down by 2x2.
2136           struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2137           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2138         } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
2139           // Scale source down by 4x4.
2140           struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
2141           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2142         } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
2143           // Source back up to 2x2.
2144           struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2145           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2146         } else if (frame_cnt >= frame_orig) {
2147           // Source back up to original resolution (no scaling).
2148           struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
2149           aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2150         }
2151         if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
2152             frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
2153           // For dynamic resize testing on single layer: refresh all references
2154           // on the resized frame: this is to avoid decode error:
2155           // if resize goes down by >= 4x4 then libaom decoder will throw an
2156           // error that some reference (even though not used) is beyond the
2157           // limit size (must be smaller than 4x4).
2158           for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
2159           if (use_svc_control) {
2160             aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
2161                               &ref_frame_config);
2162             aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_COMP_PRED,
2163                               &ref_frame_comp_pred);
2164           }
2165         }
2166       }
2167 
2168       // Change target_bitrate every other frame.
2169       if (test_changing_bitrate && frame_cnt % 2 == 0) {
2170         if (frame_cnt < 500)
2171           cfg.rc_target_bitrate += 10;
2172         else
2173           cfg.rc_target_bitrate -= 10;
2174         // Do big increase and decrease.
2175         if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
2176         if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
2177         if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
2178         // Call change_config, or bypass with new control.
2179         // res = aom_codec_enc_config_set(&codec, &cfg);
2180         if (aom_codec_control(&codec, AV1E_SET_BITRATE_ONE_PASS_CBR,
2181                               cfg.rc_target_bitrate))
2182           die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
2183       }
2184 
2185       if (rc_api) {
2186         aom::AV1FrameParamsRTC frame_params;
2187         // TODO(jianj): Add support for SVC.
2188         frame_params.spatial_layer_id = 0;
2189         frame_params.temporal_layer_id = 0;
2190         frame_params.frame_type =
2191             is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
2192         rc_api->ComputeQP(frame_params);
2193         const int current_qp = rc_api->GetQP();
2194         if (aom_codec_control(&codec, AV1E_SET_QUANTIZER_ONE_PASS,
2195                               qindex_to_quantizer(current_qp))) {
2196           die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
2197         }
2198       }
2199 
2200       if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
2201 
2202       // Do the layer encode.
2203       aom_usec_timer_start(&timer);
2204       if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
2205         die_codec(&codec, "Failed to encode frame");
2206       aom_usec_timer_mark(&timer);
2207       cx_time += aom_usec_timer_elapsed(&timer);
2208       cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
2209       frame_cnt_layer[layer] += 1;
2210 
2211       // Get the high motion content flag.
2212       int content_flag = 0;
2213       if (aom_codec_control(&codec, AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC,
2214                             &content_flag)) {
2215         die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
2216       }
2217 
2218       got_data = 0;
2219       // For simulcast (mode 11): write out each spatial layer to the file.
2220       int ss_layers_write = (app_input.layering_mode == 11)
2221                                 ? layer_id.spatial_layer_id + 1
2222                                 : ss_number_layers;
2223       while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
2224         switch (pkt->kind) {
2225           case AOM_CODEC_CX_FRAME_PKT:
2226             for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
2227                  ++sl) {
2228               for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
2229                    ++tl) {
2230                 int j = sl * ts_number_layers + tl;
2231                 if (app_input.output_obu) {
2232                   fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2233                          obu_files[j]);
2234                 } else {
2235                   aom_video_writer_write_frame(
2236                       outfile[j],
2237                       reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2238                       pkt->data.frame.sz, pts);
2239                 }
2240                 if (sl == layer_id.spatial_layer_id)
2241                   rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
2242               }
2243             }
2244             got_data = 1;
2245             // Write everything into the top layer.
2246             if (app_input.output_obu) {
2247               fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2248                      total_layer_obu_file);
2249             } else {
2250               aom_video_writer_write_frame(
2251                   total_layer_file,
2252                   reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2253                   pkt->data.frame.sz, pts);
2254             }
2255             // Keep count of rate control stats per layer (for non-key).
2256             if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
2257               int j = layer_id.spatial_layer_id * ts_number_layers +
2258                       layer_id.temporal_layer_id;
2259               assert(j >= 0);
2260               rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
2261               rc.layer_avg_rate_mismatch[j] +=
2262                   fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
2263                   rc.layer_pfb[j];
2264               if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
2265             }
2266 
2267             if (rc_api) {
2268               rc_api->PostEncodeUpdate(pkt->data.frame.sz);
2269             }
2270             // Update for short-time encoding bitrate states, for moving window
2271             // of size rc->window, shifted by rc->window / 2.
2272             // Ignore first window segment, due to key frame.
2273             // For spatial layers: only do this for top/highest SL.
2274             if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
2275               sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2276               rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
2277               if (frame_cnt % rc.window_size == 0) {
2278                 rc.window_count += 1;
2279                 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
2280                 rc.variance_st_encoding_bitrate +=
2281                     (sum_bitrate / rc.window_size) *
2282                     (sum_bitrate / rc.window_size);
2283                 sum_bitrate = 0.0;
2284               }
2285             }
2286             // Second shifted window.
2287             if (frame_cnt > rc.window_size + rc.window_size / 2 &&
2288                 slx == ss_number_layers - 1) {
2289               sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2290               if (frame_cnt > 2 * rc.window_size &&
2291                   frame_cnt % rc.window_size == 0) {
2292                 rc.window_count += 1;
2293                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2294                 rc.variance_st_encoding_bitrate +=
2295                     (sum_bitrate2 / rc.window_size) *
2296                     (sum_bitrate2 / rc.window_size);
2297                 sum_bitrate2 = 0.0;
2298               }
2299             }
2300 
2301 #if CONFIG_AV1_DECODER
2302             if (app_input.decode) {
2303               if (aom_codec_decode(
2304                       &decoder,
2305                       reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2306                       pkt->data.frame.sz, NULL))
2307                 die_codec(&decoder, "Failed to decode frame");
2308             }
2309 #endif
2310 
2311             break;
2312           case AOM_CODEC_PSNR_PKT:
2313             if (app_input.show_psnr) {
2314               psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2315               psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2316               for (int plane = 0; plane < 4; plane++) {
2317                 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2318               }
2319               psnr_stream.psnr_count[0]++;
2320             }
2321             break;
2322           default: break;
2323         }
2324       }
2325 #if CONFIG_AV1_DECODER
2326       if (got_data && app_input.decode) {
2327         // Don't look for mismatch on top spatial and top temporal layers as
2328         // they are non reference frames.
2329         if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2330             !(layer_id.temporal_layer_id > 0 &&
2331               layer_id.temporal_layer_id == ts_number_layers - 1)) {
2332           if (test_decode(&codec, &decoder, frame_cnt)) {
2333 #if CONFIG_INTERNAL_STATS
2334             fprintf(stats_file, "First mismatch occurred in frame %d\n",
2335                     frame_cnt);
2336             fclose(stats_file);
2337 #endif
2338             fatal("Mismatch seen");
2339           }
2340         }
2341       }
2342 #endif
2343     }  // loop over spatial layers
2344     ++frame_cnt;
2345     pts += frame_duration;
2346   }
2347 
2348   for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
2349     if (app_input.input_ctx[i].filename == NULL) {
2350       break;
2351     }
2352     close_input_file(&(app_input.input_ctx[i]));
2353   }
2354   printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2355                                 ts_number_layers);
2356 
2357   printf("\n");
2358   for (int slx = 0; slx < ss_number_layers; slx++)
2359     for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2360       int lx = slx * ts_number_layers + tlx;
2361       printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2362              slx, tlx, frame_cnt_layer[lx],
2363              (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2364              1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2365     }
2366 
2367   printf("\n");
2368   printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2369          frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2370          1000000 * (double)frame_cnt / (double)cx_time);
2371 
2372   if (app_input.show_psnr) {
2373     show_psnr(&psnr_stream, 255.0);
2374   }
2375 
2376   if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2377 
2378 #if CONFIG_AV1_DECODER
2379   if (app_input.decode) {
2380     if (aom_codec_destroy(&decoder))
2381       die_codec(&decoder, "Failed to destroy decoder");
2382   }
2383 #endif
2384 
2385 #if CONFIG_INTERNAL_STATS
2386   fprintf(stats_file, "No mismatch detected in recon buffers\n");
2387   fclose(stats_file);
2388 #endif
2389 
2390   // Try to rewrite the output file headers with the actual frame count.
2391   for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2392     aom_video_writer_close(outfile[i]);
2393   aom_video_writer_close(total_layer_file);
2394 
2395   if (has_non_y4m_input) {
2396     aom_img_free(&raw);
2397   }
2398   return EXIT_SUCCESS;
2399 }
2400