1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "modules/video_coding/codecs/av1/libaom_av1_encoder.h"
11
12 #include <stddef.h>
13 #include <stdint.h>
14
15 #include <memory>
16 #include <utility>
17 #include <vector>
18
19 #include "absl/algorithm/container.h"
20 #include "absl/base/macros.h"
21 #include "absl/types/optional.h"
22 #include "api/scoped_refptr.h"
23 #include "api/video/encoded_image.h"
24 #include "api/video/i420_buffer.h"
25 #include "api/video/video_frame.h"
26 #include "api/video_codecs/video_codec.h"
27 #include "api/video_codecs/video_encoder.h"
28 #include "modules/video_coding/include/video_codec_interface.h"
29 #include "modules/video_coding/include/video_error_codes.h"
30 #include "modules/video_coding/svc/create_scalability_structure.h"
31 #include "modules/video_coding/svc/scalable_video_controller.h"
32 #include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
33 #include "rtc_base/checks.h"
34 #include "rtc_base/logging.h"
35 #include "rtc_base/numerics/sequence_number_util.h"
36 #include "third_party/libaom/source/libaom/aom/aom_codec.h"
37 #include "third_party/libaom/source/libaom/aom/aom_encoder.h"
38 #include "third_party/libaom/source/libaom/aom/aomcx.h"
39
40 #define SET_ENCODER_PARAM_OR_RETURN_ERROR(param_id, param_value) \
41 do { \
42 if (!SetEncoderControlParameters(param_id, param_value)) { \
43 return WEBRTC_VIDEO_CODEC_ERROR; \
44 } \
45 } while (0)
46
47 namespace webrtc {
48 namespace {
49
50 // Encoder configuration parameters
51 constexpr int kQpMin = 10;
52 constexpr int kUsageProfile = AOM_USAGE_REALTIME;
53 constexpr int kMinQindex = 145; // Min qindex threshold for QP scaling.
54 constexpr int kMaxQindex = 205; // Max qindex threshold for QP scaling.
55 constexpr int kBitDepth = 8;
56 constexpr int kLagInFrames = 0; // No look ahead.
57 constexpr int kRtpTicksPerSecond = 90000;
58 constexpr float kMinimumFrameRate = 1.0;
59
GetSuperblockSize(int width,int height,int threads)60 aom_superblock_size_t GetSuperblockSize(int width, int height, int threads) {
61 int resolution = width * height;
62 if (threads >= 4 && resolution >= 960 * 540 && resolution < 1920 * 1080)
63 return AOM_SUPERBLOCK_SIZE_64X64;
64 else
65 return AOM_SUPERBLOCK_SIZE_DYNAMIC;
66 }
67
68 class LibaomAv1Encoder final : public VideoEncoder {
69 public:
70 explicit LibaomAv1Encoder(
71 const absl::optional<LibaomAv1EncoderAuxConfig>& aux_config);
72 ~LibaomAv1Encoder();
73
74 int InitEncode(const VideoCodec* codec_settings,
75 const Settings& settings) override;
76
77 int32_t RegisterEncodeCompleteCallback(
78 EncodedImageCallback* encoded_image_callback) override;
79
80 int32_t Release() override;
81
82 int32_t Encode(const VideoFrame& frame,
83 const std::vector<VideoFrameType>* frame_types) override;
84
85 void SetRates(const RateControlParameters& parameters) override;
86
87 EncoderInfo GetEncoderInfo() const override;
88
89 private:
90 template <typename P>
91 bool SetEncoderControlParameters(int param_id, P param_value);
92
93 // Get value to be used for encoder cpu_speed setting
94 int GetCpuSpeed(int width, int height);
95
96 // Determine number of encoder threads to use.
97 int NumberOfThreads(int width, int height, int number_of_cores);
98
SvcEnabled() const99 bool SvcEnabled() const { return svc_params_.has_value(); }
100 // Fills svc_params_ memeber value. Returns false on error.
101 bool SetSvcParams(ScalableVideoController::StreamLayersConfig svc_config);
102 // Configures the encoder with layer for the next frame.
103 void SetSvcLayerId(
104 const ScalableVideoController::LayerFrameConfig& layer_frame);
105 // Configures the encoder which buffers next frame updates and can reference.
106 void SetSvcRefFrameConfig(
107 const ScalableVideoController::LayerFrameConfig& layer_frame);
108 // If pixel format doesn't match, then reallocate.
109 void MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt);
110
111 std::unique_ptr<ScalableVideoController> svc_controller_;
112 bool inited_;
113 bool rates_configured_;
114 absl::optional<aom_svc_params_t> svc_params_;
115 VideoCodec encoder_settings_;
116 absl::optional<LibaomAv1EncoderAuxConfig> aux_config_;
117 aom_image_t* frame_for_encode_;
118 aom_codec_ctx_t ctx_;
119 aom_codec_enc_cfg_t cfg_;
120 EncodedImageCallback* encoded_image_callback_;
121 SeqNumUnwrapper<uint32_t> rtp_timestamp_unwrapper_;
122 };
123
VerifyCodecSettings(const VideoCodec & codec_settings)124 int32_t VerifyCodecSettings(const VideoCodec& codec_settings) {
125 if (codec_settings.width < 1) {
126 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
127 }
128 if (codec_settings.height < 1) {
129 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
130 }
131 // maxBitrate == 0 represents an unspecified maxBitRate.
132 if (codec_settings.maxBitrate > 0 &&
133 codec_settings.minBitrate > codec_settings.maxBitrate) {
134 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
135 }
136 if (codec_settings.maxBitrate > 0 &&
137 codec_settings.startBitrate > codec_settings.maxBitrate) {
138 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
139 }
140 if (codec_settings.startBitrate < codec_settings.minBitrate) {
141 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
142 }
143 if (codec_settings.maxFramerate < 1) {
144 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
145 }
146 return WEBRTC_VIDEO_CODEC_OK;
147 }
148
LibaomAv1Encoder(const absl::optional<LibaomAv1EncoderAuxConfig> & aux_config)149 LibaomAv1Encoder::LibaomAv1Encoder(
150 const absl::optional<LibaomAv1EncoderAuxConfig>& aux_config)
151 : inited_(false),
152 rates_configured_(false),
153 aux_config_(aux_config),
154 frame_for_encode_(nullptr),
155 encoded_image_callback_(nullptr) {}
156
~LibaomAv1Encoder()157 LibaomAv1Encoder::~LibaomAv1Encoder() {
158 Release();
159 }
160
InitEncode(const VideoCodec * codec_settings,const Settings & settings)161 int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings,
162 const Settings& settings) {
163 if (codec_settings == nullptr) {
164 RTC_LOG(LS_WARNING) << "No codec settings provided to "
165 "LibaomAv1Encoder.";
166 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
167 }
168 if (settings.number_of_cores < 1) {
169 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
170 }
171 if (inited_) {
172 RTC_LOG(LS_WARNING) << "Initing LibaomAv1Encoder without first releasing.";
173 Release();
174 }
175 encoder_settings_ = *codec_settings;
176
177 // Sanity checks for encoder configuration.
178 const int32_t result = VerifyCodecSettings(encoder_settings_);
179 if (result < 0) {
180 RTC_LOG(LS_WARNING) << "Incorrect codec settings provided to "
181 "LibaomAv1Encoder.";
182 return result;
183 }
184 if (encoder_settings_.numberOfSimulcastStreams > 1) {
185 RTC_LOG(LS_WARNING) << "Simulcast is not implemented by LibaomAv1Encoder.";
186 return result;
187 }
188 absl::optional<ScalabilityMode> scalability_mode =
189 encoder_settings_.GetScalabilityMode();
190 if (!scalability_mode.has_value()) {
191 RTC_LOG(LS_WARNING) << "Scalability mode is not set, using 'L1T1'.";
192 scalability_mode = ScalabilityMode::kL1T1;
193 }
194 svc_controller_ = CreateScalabilityStructure(*scalability_mode);
195 if (svc_controller_ == nullptr) {
196 RTC_LOG(LS_WARNING) << "Failed to set scalability mode "
197 << static_cast<int>(*scalability_mode);
198 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
199 }
200
201 if (!SetSvcParams(svc_controller_->StreamConfig())) {
202 return WEBRTC_VIDEO_CODEC_ERROR;
203 }
204
205 // Initialize encoder configuration structure with default values
206 aom_codec_err_t ret =
207 aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg_, kUsageProfile);
208 if (ret != AOM_CODEC_OK) {
209 RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::EncodeInit returned " << ret
210 << " on aom_codec_enc_config_default.";
211 return WEBRTC_VIDEO_CODEC_ERROR;
212 }
213
214 // Overwrite default config with input encoder settings & RTC-relevant values.
215 cfg_.g_w = encoder_settings_.width;
216 cfg_.g_h = encoder_settings_.height;
217 cfg_.g_threads =
218 NumberOfThreads(cfg_.g_w, cfg_.g_h, settings.number_of_cores);
219 cfg_.g_timebase.num = 1;
220 cfg_.g_timebase.den = kRtpTicksPerSecond;
221 cfg_.rc_target_bitrate = encoder_settings_.maxBitrate; // kilobits/sec.
222 cfg_.g_input_bit_depth = kBitDepth;
223 cfg_.kf_mode = AOM_KF_DISABLED;
224 cfg_.rc_min_quantizer = kQpMin;
225 cfg_.rc_max_quantizer = encoder_settings_.qpMax;
226 cfg_.rc_undershoot_pct = 50;
227 cfg_.rc_overshoot_pct = 50;
228 cfg_.rc_buf_initial_sz = 600;
229 cfg_.rc_buf_optimal_sz = 600;
230 cfg_.rc_buf_sz = 1000;
231 cfg_.g_usage = kUsageProfile;
232 cfg_.g_error_resilient = 0;
233 // Low-latency settings.
234 cfg_.rc_end_usage = AOM_CBR; // Constant Bit Rate (CBR) mode
235 cfg_.g_pass = AOM_RC_ONE_PASS; // One-pass rate control
236 cfg_.g_lag_in_frames = kLagInFrames; // No look ahead when lag equals 0.
237
238 if (frame_for_encode_ != nullptr) {
239 aom_img_free(frame_for_encode_);
240 frame_for_encode_ = nullptr;
241 }
242
243 // Flag options: AOM_CODEC_USE_PSNR and AOM_CODEC_USE_HIGHBITDEPTH
244 aom_codec_flags_t flags = 0;
245
246 // Initialize an encoder instance.
247 ret = aom_codec_enc_init(&ctx_, aom_codec_av1_cx(), &cfg_, flags);
248 if (ret != AOM_CODEC_OK) {
249 RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::EncodeInit returned " << ret
250 << " on aom_codec_enc_init.";
251 return WEBRTC_VIDEO_CODEC_ERROR;
252 }
253 inited_ = true;
254
255 // Set control parameters
256 SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_CPUUSED,
257 GetCpuSpeed(cfg_.g_w, cfg_.g_h));
258 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CDEF, 1);
259 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TPL_MODEL, 0);
260 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DELTAQ_MODE, 0);
261 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ORDER_HINT, 0);
262 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_AQ_MODE, 3);
263 SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_SET_MAX_INTRA_BITRATE_PCT, 300);
264 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_COEFF_COST_UPD_FREQ, 3);
265 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MODE_COST_UPD_FREQ, 3);
266 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MV_COST_UPD_FREQ, 3);
267
268 if (codec_settings->mode == VideoCodecMode::kScreensharing) {
269 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TUNE_CONTENT,
270 AOM_CONTENT_SCREEN);
271 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 1);
272 } else {
273 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PALETTE, 0);
274 }
275
276 if (cfg_.g_threads == 4 && cfg_.g_w == 640 &&
277 (cfg_.g_h == 360 || cfg_.g_h == 480)) {
278 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_ROWS,
279 static_cast<int>(log2(cfg_.g_threads)));
280 } else {
281 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_TILE_COLUMNS,
282 static_cast<int>(log2(cfg_.g_threads)));
283 }
284
285 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ROW_MT, 1);
286 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_OBMC, 0);
287 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_NOISE_SENSITIVITY, 0);
288 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_WARPED_MOTION, 0);
289 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
290 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_REF_FRAME_MVS, 0);
291 SET_ENCODER_PARAM_OR_RETURN_ERROR(
292 AV1E_SET_SUPERBLOCK_SIZE,
293 GetSuperblockSize(cfg_.g_w, cfg_.g_h, cfg_.g_threads));
294 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_CFL_INTRA, 0);
295 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTRA, 0);
296 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_ANGLE_DELTA, 0);
297 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_FILTER_INTRA, 0);
298 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_INTRA_DEFAULT_TX_ONLY, 1);
299 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_DISABLE_TRELLIS_QUANT, 1);
300 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIST_WTD_COMP, 0);
301 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DIFF_WTD_COMP, 0);
302 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_DUAL_FILTER, 0);
303 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_COMP, 0);
304 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTERINTRA_WEDGE, 0);
305 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRA_EDGE_FILTER, 0);
306 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_INTRABC, 0);
307 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_MASKED_COMP, 0);
308 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_PAETH_INTRA, 0);
309 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_QM, 0);
310 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RECT_PARTITIONS, 0);
311 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_RESTORATION, 0);
312 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_SMOOTH_INTERINTRA, 0);
313 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ENABLE_TX64, 0);
314 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_MAX_REFERENCE_FRAMES, 3);
315
316 return WEBRTC_VIDEO_CODEC_OK;
317 }
318
319 template <typename P>
SetEncoderControlParameters(int param_id,P param_value)320 bool LibaomAv1Encoder::SetEncoderControlParameters(int param_id,
321 P param_value) {
322 aom_codec_err_t error_code = aom_codec_control(&ctx_, param_id, param_value);
323 if (error_code != AOM_CODEC_OK) {
324 RTC_LOG(LS_WARNING)
325 << "LibaomAv1Encoder::SetEncoderControlParameters returned "
326 << error_code << " on id: " << param_id << ".";
327 }
328 return error_code == AOM_CODEC_OK;
329 }
330
331 // Only positive speeds, range for real-time coding currently is: 6 - 8.
332 // Lower means slower/better quality, higher means fastest/lower quality.
GetCpuSpeed(int width,int height)333 int LibaomAv1Encoder::GetCpuSpeed(int width, int height) {
334 if (aux_config_) {
335 if (auto it = aux_config_->max_pixel_count_to_cpu_speed.lower_bound(width *
336 height);
337 it != aux_config_->max_pixel_count_to_cpu_speed.end()) {
338 return it->second;
339 }
340
341 return 10;
342 } else {
343 // For smaller resolutions, use lower speed setting (get some coding gain at
344 // the cost of increased encoding complexity).
345 switch (encoder_settings_.GetVideoEncoderComplexity()) {
346 case VideoCodecComplexity::kComplexityHigh:
347 if (width * height <= 320 * 180)
348 return 8;
349 else if (width * height <= 640 * 360)
350 return 9;
351 else
352 return 10;
353 case VideoCodecComplexity::kComplexityHigher:
354 if (width * height <= 320 * 180)
355 return 7;
356 else if (width * height <= 640 * 360)
357 return 8;
358 else if (width * height <= 1280 * 720)
359 return 9;
360 else
361 return 10;
362 case VideoCodecComplexity::kComplexityMax:
363 if (width * height <= 320 * 180)
364 return 6;
365 else if (width * height <= 640 * 360)
366 return 7;
367 else if (width * height <= 1280 * 720)
368 return 8;
369 else
370 return 9;
371 default:
372 return 10;
373 }
374 }
375 }
376
NumberOfThreads(int width,int height,int number_of_cores)377 int LibaomAv1Encoder::NumberOfThreads(int width,
378 int height,
379 int number_of_cores) {
380 // Keep the number of encoder threads equal to the possible number of
381 // column/row tiles, which is (1, 2, 4, 8). See comments below for
382 // AV1E_SET_TILE_COLUMNS/ROWS.
383 if (width * height >= 640 * 360 && number_of_cores > 4) {
384 return 4;
385 } else if (width * height >= 320 * 180 && number_of_cores > 2) {
386 return 2;
387 } else {
388 // Use 2 threads for low res on ARM.
389 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
390 defined(WEBRTC_ANDROID)
391 if (width * height >= 320 * 180 && number_of_cores > 2) {
392 return 2;
393 }
394 #endif
395 // 1 thread less than VGA.
396 return 1;
397 }
398 }
399
SetSvcParams(ScalableVideoController::StreamLayersConfig svc_config)400 bool LibaomAv1Encoder::SetSvcParams(
401 ScalableVideoController::StreamLayersConfig svc_config) {
402 bool svc_enabled =
403 svc_config.num_spatial_layers > 1 || svc_config.num_temporal_layers > 1;
404 if (!svc_enabled) {
405 svc_params_ = absl::nullopt;
406 return true;
407 }
408 if (svc_config.num_spatial_layers < 1 || svc_config.num_spatial_layers > 4) {
409 RTC_LOG(LS_WARNING) << "Av1 supports up to 4 spatial layers. "
410 << svc_config.num_spatial_layers << " configured.";
411 return false;
412 }
413 if (svc_config.num_temporal_layers < 1 ||
414 svc_config.num_temporal_layers > 8) {
415 RTC_LOG(LS_WARNING) << "Av1 supports up to 8 temporal layers. "
416 << svc_config.num_temporal_layers << " configured.";
417 return false;
418 }
419 aom_svc_params_t& svc_params = svc_params_.emplace();
420 svc_params.number_spatial_layers = svc_config.num_spatial_layers;
421 svc_params.number_temporal_layers = svc_config.num_temporal_layers;
422
423 int num_layers =
424 svc_config.num_spatial_layers * svc_config.num_temporal_layers;
425 for (int i = 0; i < num_layers; ++i) {
426 svc_params.min_quantizers[i] = kQpMin;
427 svc_params.max_quantizers[i] = encoder_settings_.qpMax;
428 }
429
430 // Assume each temporal layer doubles framerate.
431 for (int tid = 0; tid < svc_config.num_temporal_layers; ++tid) {
432 svc_params.framerate_factor[tid] =
433 1 << (svc_config.num_temporal_layers - tid - 1);
434 }
435
436 for (int sid = 0; sid < svc_config.num_spatial_layers; ++sid) {
437 svc_params.scaling_factor_num[sid] = svc_config.scaling_factor_num[sid];
438 svc_params.scaling_factor_den[sid] = svc_config.scaling_factor_den[sid];
439 }
440
441 return true;
442 }
443
SetSvcLayerId(const ScalableVideoController::LayerFrameConfig & layer_frame)444 void LibaomAv1Encoder::SetSvcLayerId(
445 const ScalableVideoController::LayerFrameConfig& layer_frame) {
446 aom_svc_layer_id_t layer_id = {};
447 layer_id.spatial_layer_id = layer_frame.SpatialId();
448 layer_id.temporal_layer_id = layer_frame.TemporalId();
449 SetEncoderControlParameters(AV1E_SET_SVC_LAYER_ID, &layer_id);
450 }
451
SetSvcRefFrameConfig(const ScalableVideoController::LayerFrameConfig & layer_frame)452 void LibaomAv1Encoder::SetSvcRefFrameConfig(
453 const ScalableVideoController::LayerFrameConfig& layer_frame) {
454 // Buffer name to use for each layer_frame.buffers position. In particular
455 // when there are 2 buffers are referenced, prefer name them last and golden,
456 // because av1 bitstream format has dedicated fields for these two names.
457 // See last_frame_idx and golden_frame_idx in the av1 spec
458 // https://aomediacodec.github.io/av1-spec/av1-spec.pdf
459 static constexpr int kPreferedSlotName[] = {0, // Last
460 3, // Golden
461 1, 2, 4, 5, 6};
462 static constexpr int kAv1NumBuffers = 8;
463
464 aom_svc_ref_frame_config_t ref_frame_config = {};
465 RTC_CHECK_LE(layer_frame.Buffers().size(), ABSL_ARRAYSIZE(kPreferedSlotName));
466 for (size_t i = 0; i < layer_frame.Buffers().size(); ++i) {
467 const CodecBufferUsage& buffer = layer_frame.Buffers()[i];
468 int slot_name = kPreferedSlotName[i];
469 RTC_CHECK_GE(buffer.id, 0);
470 RTC_CHECK_LT(buffer.id, kAv1NumBuffers);
471 ref_frame_config.ref_idx[slot_name] = buffer.id;
472 if (buffer.referenced) {
473 ref_frame_config.reference[slot_name] = 1;
474 }
475 if (buffer.updated) {
476 ref_frame_config.refresh[buffer.id] = 1;
477 }
478 }
479
480 SetEncoderControlParameters(AV1E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);
481 }
482
RegisterEncodeCompleteCallback(EncodedImageCallback * encoded_image_callback)483 int32_t LibaomAv1Encoder::RegisterEncodeCompleteCallback(
484 EncodedImageCallback* encoded_image_callback) {
485 encoded_image_callback_ = encoded_image_callback;
486 return WEBRTC_VIDEO_CODEC_OK;
487 }
488
Release()489 int32_t LibaomAv1Encoder::Release() {
490 if (frame_for_encode_ != nullptr) {
491 aom_img_free(frame_for_encode_);
492 frame_for_encode_ = nullptr;
493 }
494 if (inited_) {
495 if (aom_codec_destroy(&ctx_)) {
496 return WEBRTC_VIDEO_CODEC_MEMORY;
497 }
498 inited_ = false;
499 }
500 rates_configured_ = false;
501 return WEBRTC_VIDEO_CODEC_OK;
502 }
503
MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt)504 void LibaomAv1Encoder::MaybeRewrapImgWithFormat(const aom_img_fmt_t fmt) {
505 if (!frame_for_encode_) {
506 frame_for_encode_ =
507 aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr);
508
509 } else if (frame_for_encode_->fmt != fmt) {
510 RTC_LOG(LS_INFO) << "Switching AV1 encoder pixel format to "
511 << (fmt == AOM_IMG_FMT_NV12 ? "NV12" : "I420");
512 aom_img_free(frame_for_encode_);
513 frame_for_encode_ =
514 aom_img_wrap(nullptr, fmt, cfg_.g_w, cfg_.g_h, 1, nullptr);
515 }
516 // else no-op since the image is already in the right format.
517 }
518
Encode(const VideoFrame & frame,const std::vector<VideoFrameType> * frame_types)519 int32_t LibaomAv1Encoder::Encode(
520 const VideoFrame& frame,
521 const std::vector<VideoFrameType>* frame_types) {
522 if (!inited_ || encoded_image_callback_ == nullptr || !rates_configured_) {
523 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
524 }
525
526 bool keyframe_required =
527 frame_types != nullptr &&
528 absl::c_linear_search(*frame_types, VideoFrameType::kVideoFrameKey);
529
530 std::vector<ScalableVideoController::LayerFrameConfig> layer_frames =
531 svc_controller_->NextFrameConfig(keyframe_required);
532
533 if (layer_frames.empty()) {
534 RTC_LOG(LS_ERROR) << "SVCController returned no configuration for a frame.";
535 return WEBRTC_VIDEO_CODEC_ERROR;
536 }
537
538 rtc::scoped_refptr<VideoFrameBuffer> buffer = frame.video_frame_buffer();
539 absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
540 supported_formats = {VideoFrameBuffer::Type::kI420,
541 VideoFrameBuffer::Type::kNV12};
542 rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
543 if (buffer->type() != VideoFrameBuffer::Type::kNative) {
544 // `buffer` is already mapped.
545 mapped_buffer = buffer;
546 } else {
547 // Attempt to map to one of the supported formats.
548 mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
549 }
550
551 // Convert input frame to I420, if needed.
552 if (!mapped_buffer ||
553 (absl::c_find(supported_formats, mapped_buffer->type()) ==
554 supported_formats.end() &&
555 mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
556 rtc::scoped_refptr<I420BufferInterface> converted_buffer(buffer->ToI420());
557 if (!converted_buffer) {
558 RTC_LOG(LS_ERROR) << "Failed to convert "
559 << VideoFrameBufferTypeToString(
560 frame.video_frame_buffer()->type())
561 << " image to I420. Can't encode frame.";
562 return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE;
563 }
564 RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
565 converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
566
567 mapped_buffer = converted_buffer;
568 }
569
570 switch (mapped_buffer->type()) {
571 case VideoFrameBuffer::Type::kI420:
572 case VideoFrameBuffer::Type::kI420A: {
573 // Set frame_for_encode_ data pointers and strides.
574 MaybeRewrapImgWithFormat(AOM_IMG_FMT_I420);
575 auto i420_buffer = mapped_buffer->GetI420();
576 RTC_DCHECK(i420_buffer);
577 frame_for_encode_->planes[AOM_PLANE_Y] =
578 const_cast<unsigned char*>(i420_buffer->DataY());
579 frame_for_encode_->planes[AOM_PLANE_U] =
580 const_cast<unsigned char*>(i420_buffer->DataU());
581 frame_for_encode_->planes[AOM_PLANE_V] =
582 const_cast<unsigned char*>(i420_buffer->DataV());
583 frame_for_encode_->stride[AOM_PLANE_Y] = i420_buffer->StrideY();
584 frame_for_encode_->stride[AOM_PLANE_U] = i420_buffer->StrideU();
585 frame_for_encode_->stride[AOM_PLANE_V] = i420_buffer->StrideV();
586 break;
587 }
588 case VideoFrameBuffer::Type::kNV12: {
589 MaybeRewrapImgWithFormat(AOM_IMG_FMT_NV12);
590 const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12();
591 RTC_DCHECK(nv12_buffer);
592 frame_for_encode_->planes[AOM_PLANE_Y] =
593 const_cast<unsigned char*>(nv12_buffer->DataY());
594 frame_for_encode_->planes[AOM_PLANE_U] =
595 const_cast<unsigned char*>(nv12_buffer->DataUV());
596 frame_for_encode_->planes[AOM_PLANE_V] = nullptr;
597 frame_for_encode_->stride[AOM_PLANE_Y] = nv12_buffer->StrideY();
598 frame_for_encode_->stride[AOM_PLANE_U] = nv12_buffer->StrideUV();
599 frame_for_encode_->stride[AOM_PLANE_V] = 0;
600 break;
601 }
602 default:
603 return WEBRTC_VIDEO_CODEC_ENCODER_FAILURE;
604 }
605
606 const uint32_t duration =
607 kRtpTicksPerSecond / static_cast<float>(encoder_settings_.maxFramerate);
608
609 const size_t num_spatial_layers =
610 svc_params_ ? svc_params_->number_spatial_layers : 1;
611 auto next_layer_frame = layer_frames.begin();
612 for (size_t i = 0; i < num_spatial_layers; ++i) {
613 // The libaom AV1 encoder requires that `aom_codec_encode` is called for
614 // every spatial layer, even if the configured bitrate for that layer is
615 // zero. For zero bitrate spatial layers no frames will be produced.
616 absl::optional<ScalableVideoController::LayerFrameConfig>
617 non_encoded_layer_frame;
618 ScalableVideoController::LayerFrameConfig* layer_frame;
619 if (next_layer_frame != layer_frames.end() &&
620 next_layer_frame->SpatialId() == static_cast<int>(i)) {
621 layer_frame = &*next_layer_frame;
622 ++next_layer_frame;
623 } else {
624 // For layers that are not encoded only the spatial id matters.
625 non_encoded_layer_frame.emplace().S(i);
626 layer_frame = &*non_encoded_layer_frame;
627 }
628 const bool end_of_picture = (next_layer_frame == layer_frames.end());
629
630 aom_enc_frame_flags_t flags =
631 layer_frame->IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0;
632
633 if (SvcEnabled()) {
634 SetSvcLayerId(*layer_frame);
635 SetSvcRefFrameConfig(*layer_frame);
636
637 SET_ENCODER_PARAM_OR_RETURN_ERROR(AV1E_SET_ERROR_RESILIENT_MODE,
638 layer_frame->TemporalId() > 0 ? 1 : 0);
639 }
640
641 // Encode a frame. The presentation timestamp `pts` should never wrap, hence
642 // the unwrapping.
643 aom_codec_err_t ret = aom_codec_encode(
644 &ctx_, frame_for_encode_,
645 rtp_timestamp_unwrapper_.Unwrap(frame.timestamp()), duration, flags);
646 if (ret != AOM_CODEC_OK) {
647 RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret
648 << " on aom_codec_encode.";
649 return WEBRTC_VIDEO_CODEC_ERROR;
650 }
651
652 if (non_encoded_layer_frame) {
653 continue;
654 }
655
656 // Get encoded image data.
657 EncodedImage encoded_image;
658 aom_codec_iter_t iter = nullptr;
659 int data_pkt_count = 0;
660 while (const aom_codec_cx_pkt_t* pkt =
661 aom_codec_get_cx_data(&ctx_, &iter)) {
662 if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
663 if (data_pkt_count > 0) {
664 RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
665 "one data packet for an input video frame.";
666 Release();
667 }
668 encoded_image.SetEncodedData(EncodedImageBuffer::Create(
669 /*data=*/static_cast<const uint8_t*>(pkt->data.frame.buf),
670 /*size=*/pkt->data.frame.sz));
671
672 if ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0) {
673 layer_frame->Keyframe();
674 }
675
676 encoded_image._frameType = layer_frame->IsKeyframe()
677 ? VideoFrameType::kVideoFrameKey
678 : VideoFrameType::kVideoFrameDelta;
679 encoded_image.SetTimestamp(frame.timestamp());
680 encoded_image.capture_time_ms_ = frame.render_time_ms();
681 encoded_image.rotation_ = frame.rotation();
682 encoded_image.content_type_ = VideoContentType::UNSPECIFIED;
683 // If encoded image width/height info are added to aom_codec_cx_pkt_t,
684 // use those values in lieu of the values in frame.
685 if (svc_params_) {
686 int n = svc_params_->scaling_factor_num[layer_frame->SpatialId()];
687 int d = svc_params_->scaling_factor_den[layer_frame->SpatialId()];
688 encoded_image._encodedWidth = cfg_.g_w * n / d;
689 encoded_image._encodedHeight = cfg_.g_h * n / d;
690 encoded_image.SetSpatialIndex(layer_frame->SpatialId());
691 encoded_image.SetTemporalIndex(layer_frame->TemporalId());
692 } else {
693 encoded_image._encodedWidth = cfg_.g_w;
694 encoded_image._encodedHeight = cfg_.g_h;
695 }
696 encoded_image.timing_.flags = VideoSendTiming::kInvalid;
697
698 int qp = -1;
699 SET_ENCODER_PARAM_OR_RETURN_ERROR(AOME_GET_LAST_QUANTIZER, &qp);
700 encoded_image.qp_ = qp;
701
702 encoded_image.SetColorSpace(frame.color_space());
703 ++data_pkt_count;
704 }
705 }
706
707 // Deliver encoded image data.
708 if (encoded_image.size() > 0) {
709 CodecSpecificInfo codec_specific_info;
710 codec_specific_info.codecType = kVideoCodecAV1;
711 codec_specific_info.end_of_picture = end_of_picture;
712 bool is_keyframe = layer_frame->IsKeyframe();
713 codec_specific_info.generic_frame_info =
714 svc_controller_->OnEncodeDone(*layer_frame);
715 if (is_keyframe && codec_specific_info.generic_frame_info) {
716 codec_specific_info.template_structure =
717 svc_controller_->DependencyStructure();
718 auto& resolutions = codec_specific_info.template_structure->resolutions;
719 if (SvcEnabled()) {
720 resolutions.resize(svc_params_->number_spatial_layers);
721 for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
722 int n = svc_params_->scaling_factor_num[sid];
723 int d = svc_params_->scaling_factor_den[sid];
724 resolutions[sid] =
725 RenderResolution(cfg_.g_w * n / d, cfg_.g_h * n / d);
726 }
727 } else {
728 resolutions = {RenderResolution(cfg_.g_w, cfg_.g_h)};
729 }
730 }
731 encoded_image_callback_->OnEncodedImage(encoded_image,
732 &codec_specific_info);
733 }
734 }
735
736 return WEBRTC_VIDEO_CODEC_OK;
737 }
738
SetRates(const RateControlParameters & parameters)739 void LibaomAv1Encoder::SetRates(const RateControlParameters& parameters) {
740 if (!inited_) {
741 RTC_LOG(LS_WARNING) << "SetRates() while encoder is not initialized";
742 return;
743 }
744 if (parameters.framerate_fps < kMinimumFrameRate) {
745 RTC_LOG(LS_WARNING) << "Unsupported framerate (must be >= "
746 << kMinimumFrameRate
747 << " ): " << parameters.framerate_fps;
748 return;
749 }
750 if (parameters.bitrate.get_sum_bps() == 0) {
751 RTC_LOG(LS_WARNING) << "Attempt to set target bit rate to zero";
752 return;
753 }
754
755 // The bitrates caluclated internally in libaom when `AV1E_SET_SVC_PARAMS` is
756 // called depends on the currently configured `rc_target_bitrate`. If the
757 // total target bitrate is not updated first a division by zero could happen.
758 svc_controller_->OnRatesUpdated(parameters.bitrate);
759 cfg_.rc_target_bitrate = parameters.bitrate.get_sum_kbps();
760 aom_codec_err_t error_code = aom_codec_enc_config_set(&ctx_, &cfg_);
761 if (error_code != AOM_CODEC_OK) {
762 RTC_LOG(LS_WARNING) << "Error configuring encoder, error code: "
763 << error_code;
764 }
765
766 if (SvcEnabled()) {
767 for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
768 // libaom bitrate for spatial id S and temporal id T means bitrate
769 // of frames with spatial_id=S and temporal_id<=T
770 // while `parameters.bitrate` provdies bitrate of frames with
771 // spatial_id=S and temporal_id=T
772 int accumulated_bitrate_bps = 0;
773 for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) {
774 int layer_index = sid * svc_params_->number_temporal_layers + tid;
775 accumulated_bitrate_bps += parameters.bitrate.GetBitrate(sid, tid);
776 // `svc_params.layer_target_bitrate` expects bitrate in kbps.
777 svc_params_->layer_target_bitrate[layer_index] =
778 accumulated_bitrate_bps / 1000;
779 }
780 }
781 SetEncoderControlParameters(AV1E_SET_SVC_PARAMS, &*svc_params_);
782 }
783
784 rates_configured_ = true;
785
786 // Set frame rate to closest integer value.
787 encoder_settings_.maxFramerate =
788 static_cast<uint32_t>(parameters.framerate_fps + 0.5);
789 }
790
GetEncoderInfo() const791 VideoEncoder::EncoderInfo LibaomAv1Encoder::GetEncoderInfo() const {
792 EncoderInfo info;
793 info.supports_native_handle = false;
794 info.implementation_name = "libaom";
795 info.has_trusted_rate_controller = true;
796 info.is_hardware_accelerated = false;
797 info.scaling_settings = VideoEncoder::ScalingSettings(kMinQindex, kMaxQindex);
798 info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420,
799 VideoFrameBuffer::Type::kNV12};
800 if (SvcEnabled()) {
801 for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) {
802 info.fps_allocation[sid].resize(svc_params_->number_temporal_layers);
803 for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) {
804 info.fps_allocation[sid][tid] =
805 encoder_settings_.maxFramerate / svc_params_->framerate_factor[tid];
806 }
807 }
808 }
809 return info;
810 }
811
812 } // namespace
813
CreateLibaomAv1Encoder()814 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder() {
815 return std::make_unique<LibaomAv1Encoder>(absl::nullopt);
816 }
817
CreateLibaomAv1Encoder(const LibaomAv1EncoderAuxConfig & aux_config)818 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
819 const LibaomAv1EncoderAuxConfig& aux_config) {
820 return std::make_unique<LibaomAv1Encoder>(aux_config);
821 }
822
823 } // namespace webrtc
824