• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "modules/video_coding/codecs/av1/libaom_av1_encoder.h"
11 
12 #include <stddef.h>
13 #include <stdint.h>
14 
15 #include <memory>
16 #include <utility>
17 #include <vector>
18 
19 #include "absl/algorithm/container.h"
20 #include "absl/base/macros.h"
21 #include "absl/types/optional.h"
22 #include "api/scoped_refptr.h"
23 #include "api/video/encoded_image.h"
24 #include "api/video/i420_buffer.h"
25 #include "api/video/video_frame.h"
26 #include "api/video_codecs/video_codec.h"
27 #include "api/video_codecs/video_encoder.h"
28 #include "modules/video_coding/include/video_codec_interface.h"
29 #include "modules/video_coding/include/video_error_codes.h"
30 #include "modules/video_coding/svc/create_scalability_structure.h"
31 #include "modules/video_coding/svc/scalable_video_controller.h"
32 #include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
33 #include "rtc_base/checks.h"
34 #include "rtc_base/logging.h"
35 #include "rtc_base/numerics/sequence_number_util.h"
36 #include "third_party/libaom/source/libaom/aom/aom_codec.h"
37 #include "third_party/libaom/source/libaom/aom/aom_encoder.h"
38 #include "third_party/libaom/source/libaom/aom/aomcx.h"
39 
40 #define SET_ENCODER_PARAM_OR_RETURN_ERROR(param_id, param_value) \
41   do {                                                           \
42     if (!SetEncoderControlParameters(param_id, param_value)) {   \
43       return WEBRTC_VIDEO_CODEC_ERROR;                           \
44     }                                                            \
45   } while (0)
46 
47 namespace webrtc {
48 namespace {
49 
50 // Encoder configuration parameters
51 constexpr int kQpMin = 10;
52 constexpr int kUsageProfile = AOM_USAGE_REALTIME;
53 constexpr int kMinQindex = 145;  // Min qindex threshold for QP scaling.
54 constexpr int kMaxQindex = 205;  // Max qindex threshold for QP scaling.
55 constexpr int kBitDepth = 8;
56 constexpr int kLagInFrames = 0;  // No look ahead.
57 constexpr int kRtpTicksPerSecond = 90000;
58 constexpr float kMinimumFrameRate = 1.0;
59 
GetSuperblockSize(int width,int height,int threads)60 aom_superblock_size_t GetSuperblockSize(int width, int height, int threads) {
61   int resolution = width * height;
62   if (threads >= 4 && resolution >= 960 * 540 && resolution < 1920 * 1080)
63     return AOM_SUPERBLOCK_SIZE_64X64;
64   else
65     return AOM_SUPERBLOCK_SIZE_DYNAMIC;
66 }
67 
68 class LibaomAv1Encoder final : public VideoEncoder {
69  public:
70   explicit LibaomAv1Encoder(
71       const absl::optional<LibaomAv1EncoderAuxConfig>& aux_config);
72   ~LibaomAv1Encoder();
73 
74   int InitEncode(const VideoCodec* codec_settings,
75                  const Settings& settings) override;
76 
77   int32_t RegisterEncodeCompleteCallback(
78       EncodedImageCallback* encoded_image_callback) override;
79 
80   int32_t Release() override;
81 
82   int32_t Encode(const VideoFrame& frame,
83                  const std::vector<VideoFrameType>* frame_types) override;
84 
85   void SetRates(const RateControlParameters& parameters) override;
86 
87   EncoderInfo GetEncoderInfo() const override;
88 
89  private:
90   template <typename P>
91   bool SetEncoderControlParameters(int param_id, P param_value);
92 
93   // Get value to be used for encoder cpu_speed setting
94   int GetCpuSpeed(int width, int height);
95 
96   // Determine number of encoder threads to use.
97   int NumberOfThreads(int width, int height, int number_of_cores);
98 
SvcEnabled() const99   bool SvcEnabled() const { return svc_params_.has_value(); }
100   // Fills svc_params_ memeber value. Returns false on error.
101   bool SetSvcParams(ScalableVideoController::StreamLayersConfig svc_config);
102   // Configures the encoder with layer for the next frame.
103   void SetSvcLayerId(
104       const ScalableVideoController::LayerFrameConfig& layer_frame);
105   // Configures the encoder which buffers next frame updates and can reference.
106   void SetSvcRefFrameConfig(
107       const ScalableVideoController::LayerFrameConfig& layer_frame);
108   // If pixel format doesn't match, then reallocate.
109   void MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt);
110 
111   std::unique_ptr<ScalableVideoController> svc_controller_;
112   bool inited_;
113   bool rates_configured_;
114   absl::optional<aom_svc_params_t> svc_params_;
115   VideoCodec encoder_settings_;
116   absl::optional<LibaomAv1EncoderAuxConfig> aux_config_;
117   aom_image_t* frame_for_encode_;
118   aom_codec_ctx_t ctx_;
119   aom_codec_enc_cfg_t cfg_;
120   EncodedImageCallback* encoded_image_callback_;
121   SeqNumUnwrapper<uint32_t> rtp_timestamp_unwrapper_;
122 };
123 
VerifyCodecSettings(const VideoCodec & codec_settings)124 int32_t VerifyCodecSettings(const VideoCodec& codec_settings) {
125   if (codec_settings.width < 1) {
126     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
127   }
128   if (codec_settings.height < 1) {
129     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
130   }
131   // maxBitrate == 0 represents an unspecified maxBitRate.
132   if (codec_settings.maxBitrate > 0 &&
133       codec_settings.minBitrate > codec_settings.maxBitrate) {
134     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
135   }
136   if (codec_settings.maxBitrate > 0 &&
137       codec_settings.startBitrate > codec_settings.maxBitrate) {
138     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
139   }
140   if (codec_settings.startBitrate < codec_settings.minBitrate) {
141     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
142   }
143   if (codec_settings.maxFramerate < 1) {
144     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
145   }
146   return WEBRTC_VIDEO_CODEC_OK;
147 }
148 
LibaomAv1Encoder(const absl::optional<LibaomAv1EncoderAuxConfig> & aux_config)149 LibaomAv1Encoder::LibaomAv1Encoder(
150     const absl::optional<LibaomAv1EncoderAuxConfig>& aux_config)
151     : inited_(false),
152       rates_configured_(false),
153       aux_config_(aux_config),
154       frame_for_encode_(nullptr),
155       encoded_image_callback_(nullptr) {}
156 
~LibaomAv1Encoder()157 LibaomAv1Encoder::~LibaomAv1Encoder() {
158   Release();
159 }
160 
InitEncode(const VideoCodec * codec_settings,const Settings & settings)161 int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings,
162                                  const Settings& settings) {
163   if (codec_settings == nullptr) {
164     RTC_LOG(LS_WARNING) << "No codec settings provided to "
165                            "LibaomAv1Encoder.";
166     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
167   }
168   if (settings.number_of_cores < 1) {
169     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
170   }
171   if (inited_) {
172     RTC_LOG(LS_WARNING) << "Initing LibaomAv1Encoder without first releasing.";
173     Release();
174   }
175   encoder_settings_ = *codec_settings;
176 
177   // Sanity checks for encoder configuration.
178   const int32_t result = VerifyCodecSettings(encoder_settings_);
179   if (result < 0) {
180     RTC_LOG(LS_WARNING) << "Incorrect codec settings provided to "
181                            "LibaomAv1Encoder.";
182     return result;
183   }
184   if (encoder_settings_.numberOfSimulcastStreams > 1) {
185     RTC_LOG(LS_WARNING) << "Simulcast is not implemented by LibaomAv1Encoder.";
186     return result;
187   }
188   absl::optional<ScalabilityMode> scalability_mode =
189       encoder_settings_.GetScalabilityMode();
190   if (!scalability_mode.has_value()) {
191     RTC_LOG(LS_WARNING) << "Scalability mode is not set, using 'L1T1'.";
192     scalability_mode = ScalabilityMode::kL1T1;
193   }
194   svc_controller_ = CreateScalabilityStructure(*scalability_mode);
195   if (svc_controller_ == nullptr) {
196     RTC_LOG(LS_WARNING) << "Failed to set scalability mode "
197                         << static_cast<int>(*scalability_mode);
198     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
199   }
200 
201   if (!SetSvcParams(svc_controller_->StreamConfig())) {
202     return WEBRTC_VIDEO_CODEC_ERROR;
203   }
204 
205   // Initialize encoder configuration structure with default values
206   aom_codec_err_t ret =
207       aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg_, kUsageProfile);
208   if (ret != AOM_CODEC_OK) {
209     RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::EncodeInit returned " << ret
210                         << " on aom_codec_enc_config_default.";
211     return WEBRTC_VIDEO_CODEC_ERROR;
212   }
213 
214   // Overwrite default config with input encoder settings & RTC-relevant values.
215   cfg_.g_w = encoder_settings_.width;
216   cfg_.g_h = encoder_settings_.height;
217   cfg_.g_threads =
218       NumberOfThreads(cfg_.g_w, cfg_.g_h, settings.number_of_cores);
219   cfg_.g_timebase.num = 1;
220   cfg_.g_timebase.den = kRtpTicksPerSecond;
221   cfg_.rc_target_bitrate = encoder_settings_.maxBitrate;  // kilobits/sec.
222   cfg_.g_input_bit_depth = kBitDepth;
223   cfg_.kf_mode = AOM_KF_DISABLED;
224   cfg_.rc_min_quantizer = kQpMin;
225   cfg_.rc_max_quantizer = encoder_settings_.qpMax;
226   cfg_.rc_undershoot_pct = 50;
227   cfg_.rc_overshoot_pct = 50;
228   cfg_.rc_buf_initial_sz = 600;
229   cfg_.rc_buf_optimal_sz = 600;
230   cfg_.rc_buf_sz = 1000;
231   cfg_.g_usage = kUsageProfile;
232   cfg_.g_error_resilient = 0;
233   // Low-latency settings.
234   cfg_.rc_end_usage = AOM_CBR;          // Constant Bit Rate (CBR) mode
235   cfg_.g_pass = AOM_RC_ONE_PASS;        // One-pass rate control
236   cfg_.g_lag_in_frames = kLagInFrames;  // No look ahead when lag equals 0.
237 
238   if (frame_for_encode_ != nullptr) {
239     aom_img_free(frame_for_encode_);
240     frame_for_encode_ = nullptr;
241   }
242 
243   // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH
244   aom_codec_flags_t flags = 0;
245 
246   // Initialize an encoder instance.
247   ret = aom_codec_enc_init(&ctx_, aom_codec_av1_cx(), &cfg_, flags);
248   if (ret != AOM_CODEC_OK) {
249     RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::EncodeInit returned " << ret
250                         << " on aom_codec_enc_init.";
251     return WEBRTC_VIDEO_CODEC_ERROR;
252   }
253   inited_ = true;
254 
255   // Set control parameters
256   SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED,
257                                     GetCpuSpeed(cfg_.g_w, cfg_.g_h));
258   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CDEF, 1);
259   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0);
260   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0);
261   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0);
262   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3);
263   SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
264   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
265   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MODE_COST_UPD_FREQ, 3);
266   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MV_COST_UPD_FREQ, 3);
267 
268   if (codec_settings->mode == VideoCodecMode::kScreensharing) {
269     SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TUNE_CONTENT,
270                                       AOM_CONTENT_SCREEN);
271     SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 1);
272   } else {
273     SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 0);
274   }
275 
276   if (cfg_.g_threads == 4 && cfg_.g_w == 640 &&
277       (cfg_.g_h == 360 || cfg_.g_h == 480)) {
278     SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS,
279                                       static_cast<int>(log2(cfg_.g_threads)));
280   } else {
281     SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS,
282                                       static_cast<int>(log2(cfg_.g_threads)));
283   }
284 
285   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ROW_MT, 1);
286   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_OBMC, 0);
287   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_NOISE_SENSITIVITY, 0);
288   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_WARPED_MOTION, 0);
289   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
290   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_REF_FRAME_MVS, 0);
291   SET_ENCODER_PARAM_OR_RETURN_ERROR(
292       AV1E_SET_SUPERBLOCK_SIZE,
293       GetSuperblockSize(cfg_.g_w, cfg_.g_h, cfg_.g_threads));
294   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CFL_INTRA, 0);
295   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
296   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ANGLE_DELTA, 0);
297   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_FILTER_INTRA, 0);
298   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
299   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DISABLE_TRELLIS_QUANT, 1);
300   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIST_WTD_COMP, 0);
301   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0);
302   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DUAL_FILTER, 0);
303   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_COMP, 0);
304   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0);
305   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0);
306   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRABC, 0);
307   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_MASKED_COMP, 0);
308   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PAETH_INTRA, 0);
309   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_QM, 0);
310   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RECT_PARTITIONS, 0);
311   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RESTORATION, 0);
312   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0);
313   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TX64, 0);
314   SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MAX_REFERENCE_FRAMES, 3);
315 
316   return WEBRTC_VIDEO_CODEC_OK;
317 }
318 
319 template <typename P>
SetEncoderControlParameters(int param_id,P param_value)320 bool LibaomAv1Encoder::SetEncoderControlParameters(int param_id,
321                                                    P param_value) {
322   aom_codec_err_t error_code = aom_codec_control(&ctx_, param_id, param_value);
323   if (error_code != AOM_CODEC_OK) {
324     RTC_LOG(LS_WARNING)
325         << "LibaomAv1Encoder::SetEncoderControlParameters returned "
326         << error_code << " on id:  " << param_id << ".";
327   }
328   return error_code == AOM_CODEC_OK;
329 }
330 
331 // Only positive speeds, range for real-time coding currently is: 6 - 8.
332 // Lower means slower/better quality, higher means fastest/lower quality.
GetCpuSpeed(int width,int height)333 int LibaomAv1Encoder::GetCpuSpeed(int width, int height) {
334   if (aux_config_) {
335     if (auto it = aux_config_->max_pixel_count_to_cpu_speed.lower_bound(width *
336                                                                         height);
337         it != aux_config_->max_pixel_count_to_cpu_speed.end()) {
338       return it->second;
339     }
340 
341     return 10;
342   } else {
343     // For smaller resolutions, use lower speed setting (get some coding gain at
344     // the cost of increased encoding complexity).
345     switch (encoder_settings_.GetVideoEncoderComplexity()) {
346       case VideoCodecComplexity::kComplexityHigh:
347         if (width * height <= 320 * 180)
348           return 8;
349         else if (width * height <= 640 * 360)
350           return 9;
351         else
352           return 10;
353       case VideoCodecComplexity::kComplexityHigher:
354         if (width * height <= 320 * 180)
355           return 7;
356         else if (width * height <= 640 * 360)
357           return 8;
358         else if (width * height <= 1280 * 720)
359           return 9;
360         else
361           return 10;
362       case VideoCodecComplexity::kComplexityMax:
363         if (width * height <= 320 * 180)
364           return 6;
365         else if (width * height <= 640 * 360)
366           return 7;
367         else if (width * height <= 1280 * 720)
368           return 8;
369         else
370           return 9;
371       default:
372         return 10;
373     }
374   }
375 }
376 
NumberOfThreads(int width,int height,int number_of_cores)377 int LibaomAv1Encoder::NumberOfThreads(int width,
378                                       int height,
379                                       int number_of_cores) {
380   // Keep the number of encoder threads equal to the possible number of
381   // column/row tiles, which is (1, 2, 4, 8). See comments below for
382   // AV1E_SET_TILE_COLUMNS/ROWS.
383   if (width * height >= 640 * 360 && number_of_cores > 4) {
384     return 4;
385   } else if (width * height >= 320 * 180 && number_of_cores > 2) {
386     return 2;
387   } else {
388 // Use 2 threads for low res on ARM.
389 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
390     defined(WEBRTC_ANDROID)
391     if (width * height >= 320 * 180 && number_of_cores > 2) {
392       return 2;
393     }
394 #endif
395     // 1 thread less than VGA.
396     return 1;
397   }
398 }
399 
SetSvcParams(ScalableVideoController::StreamLayersConfig svc_config)400 bool LibaomAv1Encoder::SetSvcParams(
401     ScalableVideoController::StreamLayersConfig svc_config) {
402   bool svc_enabled =
403       svc_config.num_spatial_layers > 1 || svc_config.num_temporal_layers > 1;
404   if (!svc_enabled) {
405     svc_params_ = absl::nullopt;
406     return true;
407   }
408   if (svc_config.num_spatial_layers < 1 || svc_config.num_spatial_layers > 4) {
409     RTC_LOG(LS_WARNING) << "Av1 supports up to 4 spatial layers. "
410                         << svc_config.num_spatial_layers << " configured.";
411     return false;
412   }
413   if (svc_config.num_temporal_layers < 1 ||
414       svc_config.num_temporal_layers > 8) {
415     RTC_LOG(LS_WARNING) << "Av1 supports up to 8 temporal layers. "
416                         << svc_config.num_temporal_layers << " configured.";
417     return false;
418   }
419   aom_svc_params_t& svc_params = svc_params_.emplace();
420   svc_params.number_spatial_layers = svc_config.num_spatial_layers;
421   svc_params.number_temporal_layers = svc_config.num_temporal_layers;
422 
423   int num_layers =
424       svc_config.num_spatial_layers * svc_config.num_temporal_layers;
425   for (int i = 0; i < num_layers; ++i) {
426     svc_params.min_quantizers[i] = kQpMin;
427     svc_params.max_quantizers[i] = encoder_settings_.qpMax;
428   }
429 
430   // Assume each temporal layer doubles framerate.
431   for (int tid = 0; tid < svc_config.num_temporal_layers; ++tid) {
432     svc_params.framerate_factor[tid] =
433         1 << (svc_config.num_temporal_layers - tid - 1);
434   }
435 
436   for (int sid = 0; sid < svc_config.num_spatial_layers; ++sid) {
437     svc_params.scaling_factor_num[sid] = svc_config.scaling_factor_num[sid];
438     svc_params.scaling_factor_den[sid] = svc_config.scaling_factor_den[sid];
439   }
440 
441   return true;
442 }
443 
SetSvcLayerId(const ScalableVideoController::LayerFrameConfig & layer_frame)444 void LibaomAv1Encoder::SetSvcLayerId(
445     const ScalableVideoController::LayerFrameConfig& layer_frame) {
446   aom_svc_layer_id_t layer_id = {};
447   layer_id.spatial_layer_id = layer_frame.SpatialId();
448   layer_id.temporal_layer_id = layer_frame.TemporalId();
449   SetEncoderControlParameters(AV1E_SET_SVC_LAYER_ID, &layer_id);
450 }
451 
SetSvcRefFrameConfig(const ScalableVideoController::LayerFrameConfig & layer_frame)452 void LibaomAv1Encoder::SetSvcRefFrameConfig(
453     const ScalableVideoController::LayerFrameConfig& layer_frame) {
454   // Buffer name to use for each layer_frame.buffers position. In particular
455   // when there are 2 buffers are referenced, prefer name them last and golden,
456   // because av1 bitstream format has dedicated fields for these two names.
457   // See last_frame_idx and golden_frame_idx in the av1 spec
458   // https://aomediacodec.github.io/av1-spec/av1-spec.pdf
459   static constexpr int kPreferedSlotName[] = {0,  // Last
460                                               3,  // Golden
461                                               1, 2, 4, 5, 6};
462   static constexpr int kAv1NumBuffers = 8;
463 
464   aom_svc_ref_frame_config_t ref_frame_config = {};
465   RTC_CHECK_LE(layer_frame.Buffers().size(), ABSL_ARRAYSIZE(kPreferedSlotName));
466   for (size_t i = 0; i < layer_frame.Buffers().size(); ++i) {
467     const CodecBufferUsage& buffer = layer_frame.Buffers()[i];
468     int slot_name = kPreferedSlotName[i];
469     RTC_CHECK_GE(buffer.id, 0);
470     RTC_CHECK_LT(buffer.id, kAv1NumBuffers);
471     ref_frame_config.ref_idx[slot_name] = buffer.id;
472     if (buffer.referenced) {
473       ref_frame_config.reference[slot_name] = 1;
474     }
475     if (buffer.updated) {
476       ref_frame_config.refresh[buffer.id] = 1;
477     }
478   }
479 
480   SetEncoderControlParameters(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);
481 }
482 
RegisterEncodeCompleteCallback(EncodedImageCallback * encoded_image_callback)483 int32_t LibaomAv1Encoder::RegisterEncodeCompleteCallback(
484     EncodedImageCallback* encoded_image_callback) {
485   encoded_image_callback_ = encoded_image_callback;
486   return WEBRTC_VIDEO_CODEC_OK;
487 }
488 
Release()489 int32_t LibaomAv1Encoder::Release() {
490   if (frame_for_encode_ != nullptr) {
491     aom_img_free(frame_for_encode_);
492     frame_for_encode_ = nullptr;
493   }
494   if (inited_) {
495     if (aom_codec_destroy(&ctx_)) {
496       return WEBRTC_VIDEO_CODEC_MEMORY;
497     }
498     inited_ = false;
499   }
500   rates_configured_ = false;
501   return WEBRTC_VIDEO_CODEC_OK;
502 }
503 
MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt)504 void LibaomAv1Encoder::MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt) {
505   if (!frame_for_encode_) {
506     frame_for_encode_ =
507         aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr);
508 
509   } else if (frame_for_encode_->fmt != fmt) {
510     RTC_LOG(LS_INFO) << "Switching AV1 encoder pixel format to "
511                      << (fmt == AOM_IMG_FMT_NV12 ? "NV12" : "I420");
512     aom_img_free(frame_for_encode_);
513     frame_for_encode_ =
514         aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr);
515   }
516   // else no-op since the image is already in the right format.
517 }
518 
Encode(const VideoFrame & frame,const std::vector<VideoFrameType> * frame_types)519 int32_t LibaomAv1Encoder::Encode(
520     const VideoFrame& frame,
521     const std::vector<VideoFrameType>* frame_types) {
522   if (!inited_ || encoded_image_callback_ == nullptr || !rates_configured_) {
523     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
524   }
525 
526   bool keyframe_required =
527       frame_types != nullptr &&
528       absl::c_linear_search(*frame_types, VideoFrameType::kVideoFrameKey);
529 
530   std::vector<ScalableVideoController::LayerFrameConfig> layer_frames =
531       svc_controller_->NextFrameConfig(keyframe_required);
532 
533   if (layer_frames.empty()) {
534     RTC_LOG(LS_ERROR) << "SVCController returned no configuration for a frame.";
535     return WEBRTC_VIDEO_CODEC_ERROR;
536   }
537 
538   rtc::scoped_refptr<VideoFrameBuffer> buffer = frame.video_frame_buffer();
539   absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
540       supported_formats = {VideoFrameBuffer::Type::kI420,
541                            VideoFrameBuffer::Type::kNV12};
542   rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
543   if (buffer->type() != VideoFrameBuffer::Type::kNative) {
544     // `buffer` is already mapped.
545     mapped_buffer = buffer;
546   } else {
547     // Attempt to map to one of the supported formats.
548     mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
549   }
550 
551   // Convert input frame to I420, if needed.
552   if (!mapped_buffer ||
553       (absl::c_find(supported_formats, mapped_buffer->type()) ==
554            supported_formats.end() &&
555        mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
556     rtc::scoped_refptr<I420BufferInterface> converted_buffer(buffer->ToI420());
557     if (!converted_buffer) {
558       RTC_LOG(LS_ERROR) << "Failed to convert "
559                         << VideoFrameBufferTypeToString(
560                                frame.video_frame_buffer()->type())
561                         << " image to I420. Can't encode frame.";
562       return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE;
563     }
564     RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
565               converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
566 
567     mapped_buffer = converted_buffer;
568   }
569 
570   switch (mapped_buffer->type()) {
571     case VideoFrameBuffer::Type::kI420:
572     case VideoFrameBuffer::Type::kI420A: {
573       // Set frame_for_encode_ data pointers and strides.
574       MaybeRewrapImgWithFormat(AOM_IMG_FMT_I420);
575       auto i420_buffer = mapped_buffer->GetI420();
576       RTC_DCHECK(i420_buffer);
577       frame_for_encode_->planes[AOM_PLANE_Y] =
578           const_cast<unsigned char*>(i420_buffer->DataY());
579       frame_for_encode_->planes[AOM_PLANE_U] =
580           const_cast<unsigned char*>(i420_buffer->DataU());
581       frame_for_encode_->planes[AOM_PLANE_V] =
582           const_cast<unsigned char*>(i420_buffer->DataV());
583       frame_for_encode_->stride[AOM_PLANE_Y] = i420_buffer->StrideY();
584       frame_for_encode_->stride[AOM_PLANE_U] = i420_buffer->StrideU();
585       frame_for_encode_->stride[AOM_PLANE_V] = i420_buffer->StrideV();
586       break;
587     }
588     case VideoFrameBuffer::Type::kNV12: {
589       MaybeRewrapImgWithFormat(AOM_IMG_FMT_NV12);
590       const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12();
591       RTC_DCHECK(nv12_buffer);
592       frame_for_encode_->planes[AOM_PLANE_Y] =
593           const_cast<unsigned char*>(nv12_buffer->DataY());
594       frame_for_encode_->planes[AOM_PLANE_U] =
595           const_cast<unsigned char*>(nv12_buffer->DataUV());
596       frame_for_encode_->planes[AOM_PLANE_V] = nullptr;
597       frame_for_encode_->stride[AOM_PLANE_Y] = nv12_buffer->StrideY();
598       frame_for_encode_->stride[AOM_PLANE_U] = nv12_buffer->StrideUV();
599       frame_for_encode_->stride[AOM_PLANE_V] = 0;
600       break;
601     }
602     default:
603       return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE;
604   }
605 
606   const uint32_t duration =
607       kRtpTicksPerSecond / static_cast<float>(encoder_settings_.maxFramerate);
608 
609   const size_t num_spatial_layers =
610       svc_params_ ? svc_params_->number_spatial_layers : 1;
611   auto next_layer_frame = layer_frames.begin();
612   for (size_t i = 0; i < num_spatial_layers; ++i) {
613     // The libaom AV1 encoder requires that `aom_codec_encode` is called for
614     // every spatial layer, even if the configured bitrate for that layer is
615     // zero. For zero bitrate spatial layers no frames will be produced.
616     absl::optional<ScalableVideoController::LayerFrameConfig>
617         non_encoded_layer_frame;
618     ScalableVideoController::LayerFrameConfig* layer_frame;
619     if (next_layer_frame != layer_frames.end() &&
620         next_layer_frame->SpatialId() == static_cast<int>(i)) {
621       layer_frame = &*next_layer_frame;
622       ++next_layer_frame;
623     } else {
624       // For layers that are not encoded only the spatial id matters.
625       non_encoded_layer_frame.emplace().S(i);
626       layer_frame = &*non_encoded_layer_frame;
627     }
628     const bool end_of_picture = (next_layer_frame == layer_frames.end());
629 
630     aom_enc_frame_flags_t flags =
631         layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0;
632 
633     if (SvcEnabled()) {
634       SetSvcLayerId(*layer_frame);
635       SetSvcRefFrameConfig(*layer_frame);
636 
637       SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ERROR_RESILIENT_MODE,
638                                         layer_frame->TemporalId() > 0 ? 1 : 0);
639     }
640 
641     // Encode a frame. The presentation timestamp `pts` should never wrap, hence
642     // the unwrapping.
643     aom_codec_err_t ret = aom_codec_encode(
644         &ctx_, frame_for_encode_,
645         rtp_timestamp_unwrapper_.Unwrap(frame.timestamp()), duration, flags);
646     if (ret != AOM_CODEC_OK) {
647       RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
648                           << " on aom_codec_encode.";
649       return WEBRTC_VIDEO_CODEC_ERROR;
650     }
651 
652     if (non_encoded_layer_frame) {
653       continue;
654     }
655 
656     // Get encoded image data.
657     EncodedImage encoded_image;
658     aom_codec_iter_t iter = nullptr;
659     int data_pkt_count = 0;
660     while (const aom_codec_cx_pkt_t* pkt =
661                aom_codec_get_cx_data(&ctx_, &iter)) {
662       if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
663         if (data_pkt_count > 0) {
664           RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
665                                  "one data packet for an input video frame.";
666           Release();
667         }
668         encoded_image.SetEncodedData(EncodedImageBuffer::Create(
669             /*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf),
670             /*size=*/pkt->data.frame.sz));
671 
672         if ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0) {
673           layer_frame->Keyframe();
674         }
675 
676         encoded_image._frameType = layer_frame->IsKeyframe()
677                                        ? VideoFrameType::kVideoFrameKey
678                                        : VideoFrameType::kVideoFrameDelta;
679         encoded_image.SetTimestamp(frame.timestamp());
680         encoded_image.capture_time_ms_ = frame.render_time_ms();
681         encoded_image.rotation_ = frame.rotation();
682         encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
683         // If encoded image width/height info are added to aom_codec_cx_pkt_t,
684         // use those values in lieu of the values in frame.
685         if (svc_params_) {
686           int n = svc_params_->scaling_factor_num[layer_frame->SpatialId()];
687           int d = svc_params_->scaling_factor_den[layer_frame->SpatialId()];
688           encoded_image._encodedWidth = cfg_.g_w * n / d;
689           encoded_image._encodedHeight = cfg_.g_h * n / d;
690           encoded_image.SetSpatialIndex(layer_frame->SpatialId());
691           encoded_image.SetTemporalIndex(layer_frame->TemporalId());
692         } else {
693           encoded_image._encodedWidth = cfg_.g_w;
694           encoded_image._encodedHeight = cfg_.g_h;
695         }
696         encoded_image.timing_.flags = VideoSendTiming::kInvalid;
697 
698         int qp = -1;
699         SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_GET_LAST_QUANTIZER, &qp);
700         encoded_image.qp_ = qp;
701 
702         encoded_image.SetColorSpace(frame.color_space());
703         ++data_pkt_count;
704       }
705     }
706 
707     // Deliver encoded image data.
708     if (encoded_image.size() > 0) {
709       CodecSpecificInfo codec_specific_info;
710       codec_specific_info.codecType = kVideoCodecAV1;
711       codec_specific_info.end_of_picture = end_of_picture;
712       bool is_keyframe = layer_frame->IsKeyframe();
713       codec_specific_info.generic_frame_info =
714           svc_controller_->OnEncodeDone(*layer_frame);
715       if (is_keyframe && codec_specific_info.generic_frame_info) {
716         codec_specific_info.template_structure =
717             svc_controller_->DependencyStructure();
718         auto& resolutions = codec_specific_info.template_structure->resolutions;
719         if (SvcEnabled()) {
720           resolutions.resize(svc_params_->number_spatial_layers);
721           for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
722             int n = svc_params_->scaling_factor_num[sid];
723             int d = svc_params_->scaling_factor_den[sid];
724             resolutions[sid] =
725                 RenderResolution(cfg_.g_w * n / d, cfg_.g_h * n / d);
726           }
727         } else {
728           resolutions = {RenderResolution(cfg_.g_w, cfg_.g_h)};
729         }
730       }
731       encoded_image_callback_->OnEncodedImage(encoded_image,
732                                               &codec_specific_info);
733     }
734   }
735 
736   return WEBRTC_VIDEO_CODEC_OK;
737 }
738 
SetRates(const RateControlParameters & parameters)739 void LibaomAv1Encoder::SetRates(const RateControlParameters& parameters) {
740   if (!inited_) {
741     RTC_LOG(LS_WARNING) << "SetRates() while encoder is not initialized";
742     return;
743   }
744   if (parameters.framerate_fps < kMinimumFrameRate) {
745     RTC_LOG(LS_WARNING) << "Unsupported framerate (must be >= "
746                         << kMinimumFrameRate
747                         << " ): " << parameters.framerate_fps;
748     return;
749   }
750   if (parameters.bitrate.get_sum_bps() == 0) {
751     RTC_LOG(LS_WARNING) << "Attempt to set target bit rate to zero";
752     return;
753   }
754 
755   // The bitrates caluclated internally in libaom when `AV1E_SET_SVC_PARAMS` is
756   // called depends on the currently configured `rc_target_bitrate`. If the
757   // total target bitrate is not updated first a division by zero could happen.
758   svc_controller_->OnRatesUpdated(parameters.bitrate);
759   cfg_.rc_target_bitrate = parameters.bitrate.get_sum_kbps();
760   aom_codec_err_t error_code = aom_codec_enc_config_set(&ctx_, &cfg_);
761   if (error_code != AOM_CODEC_OK) {
762     RTC_LOG(LS_WARNING) << "Error configuring encoder, error code: "
763                         << error_code;
764   }
765 
766   if (SvcEnabled()) {
767     for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
768       // libaom bitrate for spatial id S and temporal id T means bitrate
769       // of frames with spatial_id=S and temporal_id<=T
770       // while `parameters.bitrate` provdies bitrate of frames with
771       // spatial_id=S and temporal_id=T
772       int accumulated_bitrate_bps = 0;
773       for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) {
774         int layer_index = sid * svc_params_->number_temporal_layers + tid;
775         accumulated_bitrate_bps += parameters.bitrate.GetBitrate(sid, tid);
776         // `svc_params.layer_target_bitrate` expects bitrate in kbps.
777         svc_params_->layer_target_bitrate[layer_index] =
778             accumulated_bitrate_bps / 1000;
779       }
780     }
781     SetEncoderControlParameters(AV1E_SET_SVC_PARAMS, &*svc_params_);
782   }
783 
784   rates_configured_ = true;
785 
786   // Set frame rate to closest integer value.
787   encoder_settings_.maxFramerate =
788       static_cast<uint32_t>(parameters.framerate_fps + 0.5);
789 }
790 
GetEncoderInfo() const791 VideoEncoder::EncoderInfo LibaomAv1Encoder::GetEncoderInfo() const {
792   EncoderInfo info;
793   info.supports_native_handle = false;
794   info.implementation_name = "libaom";
795   info.has_trusted_rate_controller = true;
796   info.is_hardware_accelerated = false;
797   info.scaling_settings = VideoEncoder::ScalingSettings(kMinQindex, kMaxQindex);
798   info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420,
799                                   VideoFrameBuffer::Type::kNV12};
800   if (SvcEnabled()) {
801     for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
802       info.fps_allocation[sid].resize(svc_params_->number_temporal_layers);
803       for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) {
804         info.fps_allocation[sid][tid] =
805             encoder_settings_.maxFramerate / svc_params_->framerate_factor[tid];
806       }
807     }
808   }
809   return info;
810 }
811 
812 }  // namespace
813 
CreateLibaomAv1Encoder()814 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder() {
815   return std::make_unique<LibaomAv1Encoder>(absl::nullopt);
816 }
817 
CreateLibaomAv1Encoder(const LibaomAv1EncoderAuxConfig & aux_config)818 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
819     const LibaomAv1EncoderAuxConfig& aux_config) {
820   return std::make_unique<LibaomAv1Encoder>(aux_config);
821 }
822 
823 }  // namespace webrtc
824