1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 *
10 */
11
12 #ifdef RTC_ENABLE_VP9
13
14 #include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
15
16 #include <algorithm>
17 #include <limits>
18 #include <utility>
19 #include <vector>
20
21 #include "absl/algorithm/container.h"
22 #include "absl/memory/memory.h"
23 #include "absl/strings/match.h"
24 #include "api/video/color_space.h"
25 #include "api/video/i010_buffer.h"
26 #include "common_video/include/video_frame_buffer.h"
27 #include "common_video/libyuv/include/webrtc_libyuv.h"
28 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
29 #include "modules/video_coding/svc/create_scalability_structure.h"
30 #include "modules/video_coding/svc/scalability_mode_util.h"
31 #include "modules/video_coding/svc/scalable_video_controller.h"
32 #include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
33 #include "modules/video_coding/svc/svc_rate_allocator.h"
34 #include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
35 #include "rtc_base/checks.h"
36 #include "rtc_base/experiments/field_trial_list.h"
37 #include "rtc_base/experiments/field_trial_parser.h"
38 #include "rtc_base/experiments/rate_control_settings.h"
39 #include "rtc_base/logging.h"
40 #include "rtc_base/strings/string_builder.h"
41 #include "rtc_base/time_utils.h"
42 #include "rtc_base/trace_event.h"
43 #include "third_party/libyuv/include/libyuv/convert.h"
44 #include "vpx/vp8cx.h"
45 #include "vpx/vpx_encoder.h"
46
47 namespace webrtc {
48
49 namespace {
50 // Maps from gof_idx to encoder internal reference frame buffer index. These
51 // maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames.
52 uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
53 uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
54
55 // Maximum allowed PID difference for differnet per-layer frame-rate case.
56 const int kMaxAllowedPidDiff = 30;
57
58 // TODO(ilink): Tune these thresholds further.
59 // Selected using ConverenceMotion_1280_720_50.yuv clip.
60 // No toggling observed on any link capacity from 100-2000kbps.
61 // HD was reached consistently when link capacity was 1500kbps.
62 // Set resolutions are a bit more conservative than svc_config.cc sets, e.g.
63 // for 300kbps resolution converged to 270p instead of 360p.
64 constexpr int kLowVp9QpThreshold = 149;
65 constexpr int kHighVp9QpThreshold = 205;
66
GetActiveLayers(const VideoBitrateAllocation & allocation)67 std::pair<size_t, size_t> GetActiveLayers(
68 const VideoBitrateAllocation& allocation) {
69 for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) {
70 if (allocation.GetSpatialLayerSum(sl_idx) > 0) {
71 size_t last_layer = sl_idx + 1;
72 while (last_layer < kMaxSpatialLayers &&
73 allocation.GetSpatialLayerSum(last_layer) > 0) {
74 ++last_layer;
75 }
76 return std::make_pair(sl_idx, last_layer);
77 }
78 }
79 return {0, 0};
80 }
81
CreateVp9ScalabilityStructure(const VideoCodec & codec)82 std::unique_ptr<ScalableVideoController> CreateVp9ScalabilityStructure(
83 const VideoCodec& codec) {
84 int num_spatial_layers = codec.VP9().numberOfSpatialLayers;
85 int num_temporal_layers =
86 std::max(1, int{codec.VP9().numberOfTemporalLayers});
87 if (num_spatial_layers == 1 && num_temporal_layers == 1) {
88 return std::make_unique<ScalableVideoControllerNoLayering>();
89 }
90
91 char name[20];
92 rtc::SimpleStringBuilder ss(name);
93 if (codec.mode == VideoCodecMode::kScreensharing) {
94 // TODO(bugs.webrtc.org/11999): Compose names of the structures when they
95 // are implemented.
96 return nullptr;
97 } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn ||
98 num_spatial_layers == 1) {
99 ss << "L" << num_spatial_layers << "T" << num_temporal_layers;
100 } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOnKeyPic) {
101 ss << "L" << num_spatial_layers << "T" << num_temporal_layers << "_KEY";
102 } else {
103 RTC_DCHECK_EQ(codec.VP9().interLayerPred, InterLayerPredMode::kOff);
104 ss << "S" << num_spatial_layers << "T" << num_temporal_layers;
105 }
106
107 // Check spatial ratio.
108 if (num_spatial_layers > 1 && codec.spatialLayers[0].targetBitrate > 0) {
109 if (codec.width != codec.spatialLayers[num_spatial_layers - 1].width ||
110 codec.height != codec.spatialLayers[num_spatial_layers - 1].height) {
111 RTC_LOG(LS_WARNING)
112 << "Top layer resolution expected to match overall resolution";
113 return nullptr;
114 }
115 // Check if the ratio is one of the supported.
116 int numerator;
117 int denominator;
118 if (codec.spatialLayers[1].width == 2 * codec.spatialLayers[0].width) {
119 numerator = 1;
120 denominator = 2;
121 // no suffix for 1:2 ratio.
122 } else if (2 * codec.spatialLayers[1].width ==
123 3 * codec.spatialLayers[0].width) {
124 numerator = 2;
125 denominator = 3;
126 ss << "h";
127 } else {
128 RTC_LOG(LS_WARNING) << "Unsupported scalability ratio "
129 << codec.spatialLayers[0].width << ":"
130 << codec.spatialLayers[1].width;
131 return nullptr;
132 }
133 // Validate ratio is consistent for all spatial layer transitions.
134 for (int sid = 1; sid < num_spatial_layers; ++sid) {
135 if (codec.spatialLayers[sid].width * numerator !=
136 codec.spatialLayers[sid - 1].width * denominator ||
137 codec.spatialLayers[sid].height * numerator !=
138 codec.spatialLayers[sid - 1].height * denominator) {
139 RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator
140 << ":" << denominator;
141 return nullptr;
142 }
143 }
144 }
145
146 absl::optional<ScalabilityMode> scalability_mode =
147 ScalabilityModeFromString(name);
148 if (!scalability_mode.has_value()) {
149 RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name;
150 return nullptr;
151 }
152 auto scalability_structure_controller =
153 CreateScalabilityStructure(*scalability_mode);
154 if (scalability_structure_controller == nullptr) {
155 RTC_LOG(LS_WARNING) << "Unsupported scalability structure " << name;
156 } else {
157 RTC_LOG(LS_INFO) << "Created scalability structure " << name;
158 }
159 return scalability_structure_controller;
160 }
161
Vp9References(rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers)162 vpx_svc_ref_frame_config_t Vp9References(
163 rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers) {
164 vpx_svc_ref_frame_config_t ref_config = {};
165 for (const ScalableVideoController::LayerFrameConfig& layer_frame : layers) {
166 const auto& buffers = layer_frame.Buffers();
167 RTC_DCHECK_LE(buffers.size(), 3);
168 int sid = layer_frame.SpatialId();
169 if (!buffers.empty()) {
170 ref_config.lst_fb_idx[sid] = buffers[0].id;
171 ref_config.reference_last[sid] = buffers[0].referenced;
172 if (buffers[0].updated) {
173 ref_config.update_buffer_slot[sid] |= (1 << buffers[0].id);
174 }
175 }
176 if (buffers.size() > 1) {
177 ref_config.gld_fb_idx[sid] = buffers[1].id;
178 ref_config.reference_golden[sid] = buffers[1].referenced;
179 if (buffers[1].updated) {
180 ref_config.update_buffer_slot[sid] |= (1 << buffers[1].id);
181 }
182 }
183 if (buffers.size() > 2) {
184 ref_config.alt_fb_idx[sid] = buffers[2].id;
185 ref_config.reference_alt_ref[sid] = buffers[2].referenced;
186 if (buffers[2].updated) {
187 ref_config.update_buffer_slot[sid] |= (1 << buffers[2].id);
188 }
189 }
190 }
191 // TODO(bugs.webrtc.org/11999): Fill ref_config.duration
192 return ref_config;
193 }
194
AllowDenoising()195 bool AllowDenoising() {
196 // Do not enable the denoiser on ARM since optimization is pending.
197 // Denoiser is on by default on other platforms.
198 #if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64) && \
199 !defined(ANDROID)
200 return true;
201 #else
202 return false;
203 #endif
204 }
205
206 } // namespace
207
EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt * pkt,void * user_data)208 void LibvpxVp9Encoder::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
209 void* user_data) {
210 LibvpxVp9Encoder* enc = static_cast<LibvpxVp9Encoder*>(user_data);
211 enc->GetEncodedLayerFrame(pkt);
212 }
213
LibvpxVp9Encoder(const cricket::VideoCodec & codec,std::unique_ptr<LibvpxInterface> interface,const FieldTrialsView & trials)214 LibvpxVp9Encoder::LibvpxVp9Encoder(const cricket::VideoCodec& codec,
215 std::unique_ptr<LibvpxInterface> interface,
216 const FieldTrialsView& trials)
217 : libvpx_(std::move(interface)),
218 encoded_image_(),
219 encoded_complete_callback_(nullptr),
220 profile_(
221 ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)),
222 inited_(false),
223 timestamp_(0),
224 rc_max_intra_target_(0),
225 encoder_(nullptr),
226 config_(nullptr),
227 raw_(nullptr),
228 input_image_(nullptr),
229 force_key_frame_(true),
230 pics_since_key_(0),
231 num_temporal_layers_(0),
232 num_spatial_layers_(0),
233 num_active_spatial_layers_(0),
234 first_active_layer_(0),
235 layer_deactivation_requires_key_frame_(absl::StartsWith(
236 trials.Lookup("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation"),
237 "Enabled")),
238 is_svc_(false),
239 inter_layer_pred_(InterLayerPredMode::kOn),
240 external_ref_control_(false), // Set in InitEncode because of tests.
241 trusted_rate_controller_(
242 RateControlSettings::ParseFromKeyValueConfig(&trials)
243 .LibvpxVp9TrustedRateController()),
244 layer_buffering_(false),
245 full_superframe_drop_(true),
246 first_frame_in_picture_(true),
247 ss_info_needed_(false),
248 force_all_active_layers_(false),
249 is_flexible_mode_(false),
250 variable_framerate_experiment_(ParseVariableFramerateConfig(trials)),
251 variable_framerate_controller_(
252 variable_framerate_experiment_.framerate_limit),
253 quality_scaler_experiment_(ParseQualityScalerConfig(trials)),
254 external_ref_ctrl_(
255 !absl::StartsWith(trials.Lookup("WebRTC-Vp9ExternalRefCtrl"),
256 "Disabled")),
257 performance_flags_(ParsePerformanceFlagsFromTrials(trials)),
258 num_steady_state_frames_(0),
259 config_changed_(true) {
260 codec_ = {};
261 memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
262 }
263
~LibvpxVp9Encoder()264 LibvpxVp9Encoder::~LibvpxVp9Encoder() {
265 Release();
266 }
267
SetFecControllerOverride(FecControllerOverride *)268 void LibvpxVp9Encoder::SetFecControllerOverride(FecControllerOverride*) {
269 // Ignored.
270 }
271
Release()272 int LibvpxVp9Encoder::Release() {
273 int ret_val = WEBRTC_VIDEO_CODEC_OK;
274
275 if (encoder_ != nullptr) {
276 if (inited_) {
277 if (libvpx_->codec_destroy(encoder_)) {
278 ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
279 }
280 }
281 delete encoder_;
282 encoder_ = nullptr;
283 }
284 if (config_ != nullptr) {
285 delete config_;
286 config_ = nullptr;
287 }
288 if (raw_ != nullptr) {
289 libvpx_->img_free(raw_);
290 raw_ = nullptr;
291 }
292 inited_ = false;
293 return ret_val;
294 }
295
ExplicitlyConfiguredSpatialLayers() const296 bool LibvpxVp9Encoder::ExplicitlyConfiguredSpatialLayers() const {
297 // We check target_bitrate_bps of the 0th layer to see if the spatial layers
298 // (i.e. bitrates) were explicitly configured.
299 return codec_.spatialLayers[0].targetBitrate > 0;
300 }
301
SetSvcRates(const VideoBitrateAllocation & bitrate_allocation)302 bool LibvpxVp9Encoder::SetSvcRates(
303 const VideoBitrateAllocation& bitrate_allocation) {
304 std::pair<size_t, size_t> current_layers =
305 GetActiveLayers(current_bitrate_allocation_);
306 std::pair<size_t, size_t> new_layers = GetActiveLayers(bitrate_allocation);
307
308 const bool layer_activation_requires_key_frame =
309 inter_layer_pred_ == InterLayerPredMode::kOff ||
310 inter_layer_pred_ == InterLayerPredMode::kOnKeyPic;
311 const bool lower_layers_enabled = new_layers.first < current_layers.first;
312 const bool higher_layers_enabled = new_layers.second > current_layers.second;
313 const bool disabled_layers = new_layers.first > current_layers.first ||
314 new_layers.second < current_layers.second;
315
316 if (lower_layers_enabled ||
317 (higher_layers_enabled && layer_activation_requires_key_frame) ||
318 (disabled_layers && layer_deactivation_requires_key_frame_)) {
319 force_key_frame_ = true;
320 }
321
322 if (current_layers != new_layers) {
323 ss_info_needed_ = true;
324 }
325
326 config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps();
327
328 if (ExplicitlyConfiguredSpatialLayers()) {
329 for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
330 const bool was_layer_active = (config_->ss_target_bitrate[sl_idx] > 0);
331 config_->ss_target_bitrate[sl_idx] =
332 bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000;
333
334 for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) {
335 config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] =
336 bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000;
337 }
338
339 if (!was_layer_active) {
340 // Reset frame rate controller if layer is resumed after pause.
341 framerate_controller_[sl_idx].Reset();
342 }
343
344 framerate_controller_[sl_idx].SetTargetRate(
345 codec_.spatialLayers[sl_idx].maxFramerate);
346 }
347 } else {
348 float rate_ratio[VPX_MAX_LAYERS] = {0};
349 float total = 0;
350 for (int i = 0; i < num_spatial_layers_; ++i) {
351 if (svc_params_.scaling_factor_num[i] <= 0 ||
352 svc_params_.scaling_factor_den[i] <= 0) {
353 RTC_LOG(LS_ERROR) << "Scaling factors not specified!";
354 return false;
355 }
356 rate_ratio[i] = static_cast<float>(svc_params_.scaling_factor_num[i]) /
357 svc_params_.scaling_factor_den[i];
358 total += rate_ratio[i];
359 }
360
361 for (int i = 0; i < num_spatial_layers_; ++i) {
362 RTC_CHECK_GT(total, 0);
363 config_->ss_target_bitrate[i] = static_cast<unsigned int>(
364 config_->rc_target_bitrate * rate_ratio[i] / total);
365 if (num_temporal_layers_ == 1) {
366 config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
367 } else if (num_temporal_layers_ == 2) {
368 config_->layer_target_bitrate[i * num_temporal_layers_] =
369 config_->ss_target_bitrate[i] * 2 / 3;
370 config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
371 config_->ss_target_bitrate[i];
372 } else if (num_temporal_layers_ == 3) {
373 config_->layer_target_bitrate[i * num_temporal_layers_] =
374 config_->ss_target_bitrate[i] / 2;
375 config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
376 config_->layer_target_bitrate[i * num_temporal_layers_] +
377 (config_->ss_target_bitrate[i] / 4);
378 config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
379 config_->ss_target_bitrate[i];
380 } else {
381 RTC_LOG(LS_ERROR) << "Unsupported number of temporal layers: "
382 << num_temporal_layers_;
383 return false;
384 }
385
386 framerate_controller_[i].SetTargetRate(codec_.maxFramerate);
387 }
388 }
389
390 num_active_spatial_layers_ = 0;
391 first_active_layer_ = 0;
392 bool seen_active_layer = false;
393 bool expect_no_more_active_layers = false;
394 for (int i = 0; i < num_spatial_layers_; ++i) {
395 if (config_->ss_target_bitrate[i] > 0) {
396 RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is "
397 "deactivated.";
398 if (!seen_active_layer) {
399 first_active_layer_ = i;
400 }
401 num_active_spatial_layers_ = i + 1;
402 seen_active_layer = true;
403 } else {
404 expect_no_more_active_layers = seen_active_layer;
405 }
406 }
407
408 if (seen_active_layer && performance_flags_.use_per_layer_speed) {
409 bool denoiser_on =
410 AllowDenoising() && codec_.VP9()->denoisingOn &&
411 performance_flags_by_spatial_index_[num_active_spatial_layers_ - 1]
412 .allow_denoising;
413 libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
414 denoiser_on ? 1 : 0);
415 }
416
417 if (higher_layers_enabled && !force_key_frame_) {
418 // Prohibit drop of all layers for the next frame, so newly enabled
419 // layer would have a valid spatial reference.
420 for (size_t i = 0; i < num_spatial_layers_; ++i) {
421 svc_drop_frame_.framedrop_thresh[i] = 0;
422 }
423 force_all_active_layers_ = true;
424 }
425
426 if (svc_controller_) {
427 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
428 // Bitrates in `layer_target_bitrate` are accumulated for each temporal
429 // layer but in `VideoBitrateAllocation` they should be separated.
430 int previous_bitrate_kbps = 0;
431 for (int tid = 0; tid < num_temporal_layers_; ++tid) {
432 int accumulated_bitrate_kbps =
433 config_->layer_target_bitrate[sid * num_temporal_layers_ + tid];
434 int single_layer_bitrate_kbps =
435 accumulated_bitrate_kbps - previous_bitrate_kbps;
436 RTC_DCHECK_GE(single_layer_bitrate_kbps, 0);
437 current_bitrate_allocation_.SetBitrate(
438 sid, tid, single_layer_bitrate_kbps * 1'000);
439 previous_bitrate_kbps = accumulated_bitrate_kbps;
440 }
441 }
442 svc_controller_->OnRatesUpdated(current_bitrate_allocation_);
443 } else {
444 current_bitrate_allocation_ = bitrate_allocation;
445 }
446 config_changed_ = true;
447 return true;
448 }
449
DisableSpatialLayer(int sid)450 void LibvpxVp9Encoder::DisableSpatialLayer(int sid) {
451 RTC_DCHECK_LT(sid, num_spatial_layers_);
452 if (config_->ss_target_bitrate[sid] == 0) {
453 return;
454 }
455 config_->ss_target_bitrate[sid] = 0;
456 for (int tid = 0; tid < num_temporal_layers_; ++tid) {
457 config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 0;
458 }
459 config_changed_ = true;
460 }
461
EnableSpatialLayer(int sid)462 void LibvpxVp9Encoder::EnableSpatialLayer(int sid) {
463 RTC_DCHECK_LT(sid, num_spatial_layers_);
464 if (config_->ss_target_bitrate[sid] > 0) {
465 return;
466 }
467 for (int tid = 0; tid < num_temporal_layers_; ++tid) {
468 config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] =
469 current_bitrate_allocation_.GetTemporalLayerSum(sid, tid) / 1000;
470 }
471 config_->ss_target_bitrate[sid] =
472 current_bitrate_allocation_.GetSpatialLayerSum(sid) / 1000;
473 RTC_DCHECK_GT(config_->ss_target_bitrate[sid], 0);
474 config_changed_ = true;
475 }
476
SetActiveSpatialLayers()477 void LibvpxVp9Encoder::SetActiveSpatialLayers() {
478 // Svc controller may decide to skip a frame at certain spatial layer even
479 // when bitrate for it is non-zero, however libvpx uses configured bitrate as
480 // a signal which layers should be produced.
481 RTC_DCHECK(svc_controller_);
482 RTC_DCHECK(!layer_frames_.empty());
483 RTC_DCHECK(absl::c_is_sorted(
484 layer_frames_, [](const ScalableVideoController::LayerFrameConfig& lhs,
485 const ScalableVideoController::LayerFrameConfig& rhs) {
486 return lhs.SpatialId() < rhs.SpatialId();
487 }));
488
489 auto frame_it = layer_frames_.begin();
490 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
491 if (frame_it != layer_frames_.end() && frame_it->SpatialId() == sid) {
492 EnableSpatialLayer(sid);
493 ++frame_it;
494 } else {
495 DisableSpatialLayer(sid);
496 }
497 }
498 }
499
SetRates(const RateControlParameters & parameters)500 void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) {
501 if (!inited_) {
502 RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized.";
503 return;
504 }
505 if (encoder_->err) {
506 RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err;
507 return;
508 }
509 if (parameters.framerate_fps < 1.0) {
510 RTC_LOG(LS_WARNING) << "Unsupported framerate: "
511 << parameters.framerate_fps;
512 return;
513 }
514
515 codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5);
516
517 bool res = SetSvcRates(parameters.bitrate);
518 RTC_DCHECK(res) << "Failed to set new bitrate allocation";
519 config_changed_ = true;
520 }
521
522 // TODO(eladalon): s/inst/codec_settings/g.
InitEncode(const VideoCodec * inst,const Settings & settings)523 int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
524 const Settings& settings) {
525 if (inst == nullptr) {
526 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
527 }
528 if (inst->maxFramerate < 1) {
529 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
530 }
531 // Allow zero to represent an unspecified maxBitRate
532 if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {
533 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
534 }
535 if (inst->width < 1 || inst->height < 1) {
536 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
537 }
538 if (settings.number_of_cores < 1) {
539 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
540 }
541 if (inst->VP9().numberOfTemporalLayers > 3) {
542 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
543 }
544 // libvpx probably does not support more than 3 spatial layers.
545 if (inst->VP9().numberOfSpatialLayers > 3) {
546 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
547 }
548
549 absl::optional<vpx_img_fmt_t> previous_img_fmt =
550 raw_ ? absl::make_optional<vpx_img_fmt_t>(raw_->fmt) : absl::nullopt;
551
552 int ret_val = Release();
553 if (ret_val < 0) {
554 return ret_val;
555 }
556 if (encoder_ == nullptr) {
557 encoder_ = new vpx_codec_ctx_t;
558 memset(encoder_, 0, sizeof(*encoder_));
559 }
560 if (config_ == nullptr) {
561 config_ = new vpx_codec_enc_cfg_t;
562 memset(config_, 0, sizeof(*config_));
563 }
564 timestamp_ = 0;
565 if (&codec_ != inst) {
566 codec_ = *inst;
567 }
568 memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
569
570 force_key_frame_ = true;
571 pics_since_key_ = 0;
572
573 absl::optional<ScalabilityMode> scalability_mode = inst->GetScalabilityMode();
574 if (scalability_mode.has_value()) {
575 // Use settings from `ScalabilityMode` identifier.
576 RTC_LOG(LS_INFO) << "Create scalability structure "
577 << ScalabilityModeToString(*scalability_mode);
578 svc_controller_ = CreateScalabilityStructure(*scalability_mode);
579 if (!svc_controller_) {
580 RTC_LOG(LS_WARNING) << "Failed to create scalability structure.";
581 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
582 }
583 ScalableVideoController::StreamLayersConfig info =
584 svc_controller_->StreamConfig();
585 num_spatial_layers_ = info.num_spatial_layers;
586 num_temporal_layers_ = info.num_temporal_layers;
587 inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode);
588 } else {
589 num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
590 RTC_DCHECK_GT(num_spatial_layers_, 0);
591 num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
592 if (num_temporal_layers_ == 0) {
593 num_temporal_layers_ = 1;
594 }
595 inter_layer_pred_ = inst->VP9().interLayerPred;
596 svc_controller_ = CreateVp9ScalabilityStructure(*inst);
597 }
598
599 framerate_controller_ = std::vector<FramerateControllerDeprecated>(
600 num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate));
601
602 is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1);
603
604 // Populate encoder configuration with default values.
605 if (libvpx_->codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) {
606 return WEBRTC_VIDEO_CODEC_ERROR;
607 }
608
609 vpx_img_fmt img_fmt = VPX_IMG_FMT_NONE;
610 unsigned int bits_for_storage = 8;
611 switch (profile_) {
612 case VP9Profile::kProfile0:
613 img_fmt = previous_img_fmt.value_or(VPX_IMG_FMT_I420);
614 bits_for_storage = 8;
615 config_->g_bit_depth = VPX_BITS_8;
616 config_->g_profile = 0;
617 config_->g_input_bit_depth = 8;
618 break;
619 case VP9Profile::kProfile1:
620 // Encoding of profile 1 is not implemented. It would require extended
621 // support for I444, I422, and I440 buffers.
622 RTC_DCHECK_NOTREACHED();
623 break;
624 case VP9Profile::kProfile2:
625 img_fmt = VPX_IMG_FMT_I42016;
626 bits_for_storage = 16;
627 config_->g_bit_depth = VPX_BITS_10;
628 config_->g_profile = 2;
629 config_->g_input_bit_depth = 10;
630 break;
631 case VP9Profile::kProfile3:
632 // Encoding of profile 3 is not implemented.
633 RTC_DCHECK_NOTREACHED();
634 break;
635 }
636
637 // Creating a wrapper to the image - setting image data to nullptr. Actual
638 // pointer will be set in encode. Setting align to 1, as it is meaningless
639 // (actual memory is not allocated).
640 raw_ = libvpx_->img_wrap(nullptr, img_fmt, codec_.width, codec_.height, 1,
641 nullptr);
642 raw_->bit_depth = bits_for_storage;
643
644 config_->g_w = codec_.width;
645 config_->g_h = codec_.height;
646 config_->rc_target_bitrate = inst->startBitrate; // in kbit/s
647 config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0;
648 // Setting the time base of the codec.
649 config_->g_timebase.num = 1;
650 config_->g_timebase.den = 90000;
651 config_->g_lag_in_frames = 0; // 0- no frame lagging
652 config_->g_threads = 1;
653 // Rate control settings.
654 config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0;
655 config_->rc_end_usage = VPX_CBR;
656 config_->g_pass = VPX_RC_ONE_PASS;
657 config_->rc_min_quantizer =
658 codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2;
659 config_->rc_max_quantizer = 52;
660 config_->rc_undershoot_pct = 50;
661 config_->rc_overshoot_pct = 50;
662 config_->rc_buf_initial_sz = 500;
663 config_->rc_buf_optimal_sz = 600;
664 config_->rc_buf_sz = 1000;
665 // Set the maximum target size of any key-frame.
666 rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz);
667 // Key-frame interval is enforced manually by this wrapper.
668 config_->kf_mode = VPX_KF_DISABLED;
669 // TODO(webm:1592): work-around for libvpx issue, as it can still
670 // put some key-frames at will even in VPX_KF_DISABLED kf_mode.
671 config_->kf_max_dist = inst->VP9().keyFrameInterval;
672 config_->kf_min_dist = config_->kf_max_dist;
673 if (quality_scaler_experiment_.enabled) {
674 // In that experiment webrtc wide quality scaler is used instead of libvpx
675 // internal scaler.
676 config_->rc_resize_allowed = 0;
677 } else {
678 config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0;
679 }
680 // Determine number of threads based on the image size and #cores.
681 config_->g_threads =
682 NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores);
683
684 is_flexible_mode_ = inst->VP9().flexibleMode;
685
686 if (num_spatial_layers_ > 1 &&
687 codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
688 RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "
689 "several spatial layers";
690 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
691 }
692
693 // External reference control is required for different frame rate on spatial
694 // layers because libvpx generates rtp incompatible references in this case.
695 external_ref_control_ = external_ref_ctrl_ ||
696 (num_spatial_layers_ > 1 &&
697 codec_.mode == VideoCodecMode::kScreensharing) ||
698 inter_layer_pred_ == InterLayerPredMode::kOn;
699
700 if (num_temporal_layers_ == 1) {
701 gof_.SetGofInfoVP9(kTemporalStructureMode1);
702 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
703 config_->ts_number_layers = 1;
704 config_->ts_rate_decimator[0] = 1;
705 config_->ts_periodicity = 1;
706 config_->ts_layer_id[0] = 0;
707 } else if (num_temporal_layers_ == 2) {
708 gof_.SetGofInfoVP9(kTemporalStructureMode2);
709 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
710 config_->ts_number_layers = 2;
711 config_->ts_rate_decimator[0] = 2;
712 config_->ts_rate_decimator[1] = 1;
713 config_->ts_periodicity = 2;
714 config_->ts_layer_id[0] = 0;
715 config_->ts_layer_id[1] = 1;
716 } else if (num_temporal_layers_ == 3) {
717 gof_.SetGofInfoVP9(kTemporalStructureMode3);
718 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
719 config_->ts_number_layers = 3;
720 config_->ts_rate_decimator[0] = 4;
721 config_->ts_rate_decimator[1] = 2;
722 config_->ts_rate_decimator[2] = 1;
723 config_->ts_periodicity = 4;
724 config_->ts_layer_id[0] = 0;
725 config_->ts_layer_id[1] = 2;
726 config_->ts_layer_id[2] = 1;
727 config_->ts_layer_id[3] = 2;
728 } else {
729 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
730 }
731
732 if (external_ref_control_) {
733 config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
734 if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 &&
735 codec_.mode == VideoCodecMode::kScreensharing) {
736 // External reference control for several temporal layers with different
737 // frame rates on spatial layers is not implemented yet.
738 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
739 }
740 }
741 ref_buf_ = {};
742
743 return InitAndSetControlSettings(inst);
744 }
745
NumberOfThreads(int width,int height,int number_of_cores)746 int LibvpxVp9Encoder::NumberOfThreads(int width,
747 int height,
748 int number_of_cores) {
749 // Keep the number of encoder threads equal to the possible number of column
750 // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
751 if (width * height >= 1280 * 720 && number_of_cores > 4) {
752 return 4;
753 } else if (width * height >= 640 * 360 && number_of_cores > 2) {
754 return 2;
755 } else {
756 // Use 2 threads for low res on ARM.
757 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
758 defined(WEBRTC_ANDROID)
759 if (width * height >= 320 * 180 && number_of_cores > 2) {
760 return 2;
761 }
762 #endif
763 // 1 thread less than VGA.
764 return 1;
765 }
766 }
767
InitAndSetControlSettings(const VideoCodec * inst)768 int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
769 // Set QP-min/max per spatial and temporal layer.
770 int tot_num_layers = num_spatial_layers_ * num_temporal_layers_;
771 for (int i = 0; i < tot_num_layers; ++i) {
772 svc_params_.max_quantizers[i] = config_->rc_max_quantizer;
773 svc_params_.min_quantizers[i] = config_->rc_min_quantizer;
774 }
775 config_->ss_number_layers = num_spatial_layers_;
776 if (svc_controller_) {
777 auto stream_config = svc_controller_->StreamConfig();
778 for (int i = 0; i < stream_config.num_spatial_layers; ++i) {
779 svc_params_.scaling_factor_num[i] = stream_config.scaling_factor_num[i];
780 svc_params_.scaling_factor_den[i] = stream_config.scaling_factor_den[i];
781 }
782 } else if (ExplicitlyConfiguredSpatialLayers()) {
783 for (int i = 0; i < num_spatial_layers_; ++i) {
784 const auto& layer = codec_.spatialLayers[i];
785 RTC_CHECK_GT(layer.width, 0);
786 const int scale_factor = codec_.width / layer.width;
787 RTC_DCHECK_GT(scale_factor, 0);
788
789 // Ensure scaler factor is integer.
790 if (scale_factor * layer.width != codec_.width) {
791 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
792 }
793
794 // Ensure scale factor is the same in both dimensions.
795 if (scale_factor * layer.height != codec_.height) {
796 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
797 }
798
799 // Ensure scale factor is power of two.
800 const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0;
801 if (!is_pow_of_two) {
802 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
803 }
804
805 svc_params_.scaling_factor_num[i] = 1;
806 svc_params_.scaling_factor_den[i] = scale_factor;
807
808 RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0);
809 RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate);
810 if (i > 0) {
811 // Frame rate of high spatial layer is supposed to be equal or higher
812 // than frame rate of low spatial layer.
813 RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate,
814 codec_.spatialLayers[i - 1].maxFramerate);
815 }
816 }
817 } else {
818 int scaling_factor_num = 256;
819 for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
820 // 1:2 scaling in each dimension.
821 svc_params_.scaling_factor_num[i] = scaling_factor_num;
822 svc_params_.scaling_factor_den[i] = 256;
823 }
824 }
825
826 UpdatePerformanceFlags();
827 RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(),
828 static_cast<size_t>(num_spatial_layers_));
829
830 SvcRateAllocator init_allocator(codec_);
831 current_bitrate_allocation_ =
832 init_allocator.Allocate(VideoBitrateAllocationParameters(
833 inst->startBitrate * 1000, inst->maxFramerate));
834 if (!SetSvcRates(current_bitrate_allocation_)) {
835 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
836 }
837
838 const vpx_codec_err_t rv = libvpx_->codec_enc_init(
839 encoder_, vpx_codec_vp9_cx(), config_,
840 config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH);
841 if (rv != VPX_CODEC_OK) {
842 RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv);
843 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
844 }
845
846 if (performance_flags_.use_per_layer_speed) {
847 for (int si = 0; si < num_spatial_layers_; ++si) {
848 svc_params_.speed_per_layer[si] =
849 performance_flags_by_spatial_index_[si].base_layer_speed;
850 svc_params_.loopfilter_ctrl[si] =
851 performance_flags_by_spatial_index_[si].deblock_mode;
852 }
853 bool denoiser_on =
854 AllowDenoising() && inst->VP9().denoisingOn &&
855 performance_flags_by_spatial_index_[num_spatial_layers_ - 1]
856 .allow_denoising;
857 libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
858 denoiser_on ? 1 : 0);
859 }
860
861 libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
862 rc_max_intra_target_);
863 libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE,
864 inst->VP9().adaptiveQpMode ? 3 : 0);
865
866 libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
867 libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
868
869 if (is_svc_) {
870 libvpx_->codec_control(encoder_, VP9E_SET_SVC, 1);
871 libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
872 }
873 if (!is_svc_ || !performance_flags_.use_per_layer_speed) {
874 libvpx_->codec_control(
875 encoder_, VP8E_SET_CPUUSED,
876 performance_flags_by_spatial_index_.rbegin()->base_layer_speed);
877 }
878
879 if (num_spatial_layers_ > 1) {
880 switch (inter_layer_pred_) {
881 case InterLayerPredMode::kOn:
882 libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0);
883 break;
884 case InterLayerPredMode::kOff:
885 libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1);
886 break;
887 case InterLayerPredMode::kOnKeyPic:
888 libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2);
889 break;
890 default:
891 RTC_DCHECK_NOTREACHED();
892 }
893
894 memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_));
895 const bool reverse_constrained_drop_mode =
896 inter_layer_pred_ == InterLayerPredMode::kOn &&
897 codec_.mode == VideoCodecMode::kScreensharing &&
898 num_spatial_layers_ > 1;
899 if (reverse_constrained_drop_mode) {
900 // Screenshare dropping mode: drop a layer only together with all lower
901 // layers. This ensures that drops on lower layers won't reduce frame-rate
902 // for higher layers and reference structure is RTP-compatible.
903 #if 0
904 // CONSTRAINED_FROM_ABOVE_DROP is not defined in the available version of
905 // libvpx
906 svc_drop_frame_.framedrop_mode = CONSTRAINED_FROM_ABOVE_DROP;
907 #else
908 abort();
909 #endif
910 svc_drop_frame_.max_consec_drop = 5;
911 for (size_t i = 0; i < num_spatial_layers_; ++i) {
912 svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
913 }
914 // No buffering is needed because the highest layer is always present in
915 // all frames in CONSTRAINED_FROM_ABOVE drop mode.
916 layer_buffering_ = false;
917 } else {
918 // Configure encoder to drop entire superframe whenever it needs to drop
919 // a layer. This mode is preferred over per-layer dropping which causes
920 // quality flickering and is not compatible with RTP non-flexible mode.
921 svc_drop_frame_.framedrop_mode =
922 full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
923 // Buffering is needed only for constrained layer drop, as it's not clear
924 // which frame is the last.
925 layer_buffering_ = !full_superframe_drop_;
926 svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max();
927 for (size_t i = 0; i < num_spatial_layers_; ++i) {
928 svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
929 }
930 }
931 libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
932 &svc_drop_frame_);
933 }
934
935 // Register callback for getting each spatial layer.
936 vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
937 LibvpxVp9Encoder::EncoderOutputCodedPacketCallback,
938 reinterpret_cast<void*>(this)};
939 libvpx_->codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK,
940 reinterpret_cast<void*>(&cbp));
941
942 // Control function to set the number of column tiles in encoding a frame, in
943 // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
944 // The number tile columns will be capped by the encoder based on image size
945 // (minimum width of tile column is 256 pixels, maximum is 4096).
946 libvpx_->codec_control(encoder_, VP9E_SET_TILE_COLUMNS,
947 static_cast<int>((config_->g_threads >> 1)));
948
949 // Turn on row-based multithreading.
950 libvpx_->codec_control(encoder_, VP9E_SET_ROW_MT, 1);
951
952 if (AllowDenoising() && !performance_flags_.use_per_layer_speed) {
953 libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
954 inst->VP9().denoisingOn ? 1 : 0);
955 }
956
957 if (codec_.mode == VideoCodecMode::kScreensharing) {
958 // Adjust internal parameters to screen content.
959 libvpx_->codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1);
960 }
961 // Enable encoder skip of static/low content blocks.
962 libvpx_->codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
963 inited_ = true;
964 config_changed_ = true;
965 return WEBRTC_VIDEO_CODEC_OK;
966 }
967
MaxIntraTarget(uint32_t optimal_buffer_size)968 uint32_t LibvpxVp9Encoder::MaxIntraTarget(uint32_t optimal_buffer_size) {
969 // Set max to the optimal buffer level (normalized by target BR),
970 // and scaled by a scale_par.
971 // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps].
972 // This value is presented in percentage of perFrameBw:
973 // perFrameBw = targetBR[Kbps] * 1000 / framerate.
974 // The target in % is as follows:
975 float scale_par = 0.5;
976 uint32_t target_pct =
977 optimal_buffer_size * scale_par * codec_.maxFramerate / 10;
978 // Don't go below 3 times the per frame bandwidth.
979 const uint32_t min_intra_size = 300;
980 return (target_pct < min_intra_size) ? min_intra_size : target_pct;
981 }
982
Encode(const VideoFrame & input_image,const std::vector<VideoFrameType> * frame_types)983 int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
984 const std::vector<VideoFrameType>* frame_types) {
985 if (!inited_) {
986 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
987 }
988 if (encoded_complete_callback_ == nullptr) {
989 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
990 }
991 if (num_active_spatial_layers_ == 0) {
992 // All spatial layers are disabled, return without encoding anything.
993 return WEBRTC_VIDEO_CODEC_OK;
994 }
995
996 // We only support one stream at the moment.
997 if (frame_types && !frame_types->empty()) {
998 if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) {
999 force_key_frame_ = true;
1000 }
1001 }
1002
1003 if (pics_since_key_ + 1 ==
1004 static_cast<size_t>(codec_.VP9()->keyFrameInterval)) {
1005 force_key_frame_ = true;
1006 }
1007
1008 if (svc_controller_) {
1009 layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_);
1010 if (layer_frames_.empty()) {
1011 return WEBRTC_VIDEO_CODEC_ERROR;
1012 }
1013 if (layer_frames_.front().IsKeyframe()) {
1014 force_key_frame_ = true;
1015 }
1016 }
1017
1018 vpx_svc_layer_id_t layer_id = {0};
1019 if (!force_key_frame_) {
1020 const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
1021 layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
1022
1023 if (codec_.mode == VideoCodecMode::kScreensharing) {
1024 const uint32_t frame_timestamp_ms =
1025 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
1026
1027 // To ensure that several rate-limiters with different limits don't
1028 // interfere, they must be queried in order of increasing limit.
1029
1030 bool use_steady_state_limiter =
1031 variable_framerate_experiment_.enabled &&
1032 input_image.update_rect().IsEmpty() &&
1033 num_steady_state_frames_ >=
1034 variable_framerate_experiment_.frames_before_steady_state;
1035
1036 // Need to check all frame limiters, even if lower layers are disabled,
1037 // because variable frame-rate limiter should be checked after the first
1038 // layer. It's easier to overwrite active layers after, then check all
1039 // cases.
1040 for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
1041 const float layer_fps =
1042 framerate_controller_[layer_id.spatial_layer_id].GetTargetRate();
1043 // Use steady state rate-limiter at the correct place.
1044 if (use_steady_state_limiter &&
1045 layer_fps > variable_framerate_experiment_.framerate_limit - 1e-9) {
1046 if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) {
1047 layer_id.spatial_layer_id = num_active_spatial_layers_;
1048 }
1049 // Break always: if rate limiter triggered frame drop, no need to
1050 // continue; otherwise, the rate is less than the next limiters.
1051 break;
1052 }
1053 if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
1054 ++layer_id.spatial_layer_id;
1055 } else {
1056 break;
1057 }
1058 }
1059
1060 if (use_steady_state_limiter &&
1061 layer_id.spatial_layer_id < num_active_spatial_layers_) {
1062 variable_framerate_controller_.AddFrame(frame_timestamp_ms);
1063 }
1064 }
1065
1066 if (force_all_active_layers_) {
1067 layer_id.spatial_layer_id = first_active_layer_;
1068 force_all_active_layers_ = false;
1069 }
1070
1071 RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_);
1072 if (layer_id.spatial_layer_id >= num_active_spatial_layers_) {
1073 // Drop entire picture.
1074 return WEBRTC_VIDEO_CODEC_OK;
1075 }
1076 }
1077
1078 // Need to set temporal layer id on ALL layers, even disabled ones.
1079 // Otherwise libvpx might produce frames on a disabled layer:
1080 // http://crbug.com/1051476
1081 for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
1082 layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id;
1083 }
1084
1085 if (layer_id.spatial_layer_id < first_active_layer_) {
1086 layer_id.spatial_layer_id = first_active_layer_;
1087 }
1088
1089 if (svc_controller_) {
1090 layer_id.spatial_layer_id = layer_frames_.front().SpatialId();
1091 layer_id.temporal_layer_id = layer_frames_.front().TemporalId();
1092 for (const auto& layer : layer_frames_) {
1093 layer_id.temporal_layer_id_per_spatial[layer.SpatialId()] =
1094 layer.TemporalId();
1095 }
1096 SetActiveSpatialLayers();
1097 }
1098
1099 if (is_svc_ && performance_flags_.use_per_layer_speed) {
1100 // Update speed settings that might depend on temporal index.
1101 bool speed_updated = false;
1102 for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
1103 const int target_speed =
1104 layer_id.temporal_layer_id_per_spatial[sl_idx] == 0
1105 ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed
1106 : performance_flags_by_spatial_index_[sl_idx].high_layer_speed;
1107 if (svc_params_.speed_per_layer[sl_idx] != target_speed) {
1108 svc_params_.speed_per_layer[sl_idx] = target_speed;
1109 speed_updated = true;
1110 }
1111 }
1112 if (speed_updated) {
1113 libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
1114 }
1115 }
1116
1117 libvpx_->codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
1118
1119 if (num_spatial_layers_ > 1) {
1120 // Update frame dropping settings as they may change on per-frame basis.
1121 libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
1122 &svc_drop_frame_);
1123 }
1124
1125 if (config_changed_) {
1126 if (libvpx_->codec_enc_config_set(encoder_, config_)) {
1127 return WEBRTC_VIDEO_CODEC_ERROR;
1128 }
1129
1130 if (!performance_flags_.use_per_layer_speed) {
1131 // Not setting individual speeds per layer, find the highest active
1132 // resolution instead and base the speed on that.
1133 for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
1134 if (config_->ss_target_bitrate[i] > 0) {
1135 int width = (svc_params_.scaling_factor_num[i] * config_->g_w) /
1136 svc_params_.scaling_factor_den[i];
1137 int height = (svc_params_.scaling_factor_num[i] * config_->g_h) /
1138 svc_params_.scaling_factor_den[i];
1139 int speed =
1140 std::prev(performance_flags_.settings_by_resolution.lower_bound(
1141 width * height))
1142 ->second.base_layer_speed;
1143 libvpx_->codec_control(encoder_, VP8E_SET_CPUUSED, speed);
1144 break;
1145 }
1146 }
1147 }
1148 config_changed_ = false;
1149 }
1150
1151 RTC_DCHECK_EQ(input_image.width(), raw_->d_w);
1152 RTC_DCHECK_EQ(input_image.height(), raw_->d_h);
1153
1154 // Set input image for use in the callback.
1155 // This was necessary since you need some information from input_image.
1156 // You can save only the necessary information (such as timestamp) instead of
1157 // doing this.
1158 input_image_ = &input_image;
1159
1160 // In case we need to map the buffer, `mapped_buffer` is used to keep it alive
1161 // through reference counting until after encoding has finished.
1162 rtc::scoped_refptr<const VideoFrameBuffer> mapped_buffer;
1163 const I010BufferInterface* i010_buffer;
1164 rtc::scoped_refptr<const I010BufferInterface> i010_copy;
1165 switch (profile_) {
1166 case VP9Profile::kProfile0: {
1167 mapped_buffer =
1168 PrepareBufferForProfile0(input_image.video_frame_buffer());
1169 if (!mapped_buffer) {
1170 return WEBRTC_VIDEO_CODEC_ERROR;
1171 }
1172 break;
1173 }
1174 case VP9Profile::kProfile1: {
1175 RTC_DCHECK_NOTREACHED();
1176 break;
1177 }
1178 case VP9Profile::kProfile2: {
1179 // We can inject kI010 frames directly for encode. All other formats
1180 // should be converted to it.
1181 switch (input_image.video_frame_buffer()->type()) {
1182 case VideoFrameBuffer::Type::kI010: {
1183 i010_buffer = input_image.video_frame_buffer()->GetI010();
1184 break;
1185 }
1186 default: {
1187 auto i420_buffer = input_image.video_frame_buffer()->ToI420();
1188 if (!i420_buffer) {
1189 RTC_LOG(LS_ERROR) << "Failed to convert "
1190 << VideoFrameBufferTypeToString(
1191 input_image.video_frame_buffer()->type())
1192 << " image to I420. Can't encode frame.";
1193 return WEBRTC_VIDEO_CODEC_ERROR;
1194 }
1195 i010_copy = I010Buffer::Copy(*i420_buffer);
1196 i010_buffer = i010_copy.get();
1197 }
1198 }
1199 raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(
1200 reinterpret_cast<const uint8_t*>(i010_buffer->DataY()));
1201 raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(
1202 reinterpret_cast<const uint8_t*>(i010_buffer->DataU()));
1203 raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(
1204 reinterpret_cast<const uint8_t*>(i010_buffer->DataV()));
1205 raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2;
1206 raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2;
1207 raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2;
1208 break;
1209 }
1210 case VP9Profile::kProfile3: {
1211 RTC_DCHECK_NOTREACHED();
1212 break;
1213 }
1214 }
1215
1216 vpx_enc_frame_flags_t flags = 0;
1217 if (force_key_frame_) {
1218 flags = VPX_EFLAG_FORCE_KF;
1219 }
1220
1221 if (svc_controller_) {
1222 vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_);
1223 libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG,
1224 &ref_config);
1225 } else if (external_ref_control_) {
1226 vpx_svc_ref_frame_config_t ref_config =
1227 SetReferences(force_key_frame_, layer_id.spatial_layer_id);
1228
1229 if (VideoCodecMode::kScreensharing == codec_.mode) {
1230 for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
1231 ref_config.duration[sl_idx] = static_cast<int64_t>(
1232 90000 / (std::min(static_cast<float>(codec_.maxFramerate),
1233 framerate_controller_[sl_idx].GetTargetRate())));
1234 }
1235 }
1236
1237 libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG,
1238 &ref_config);
1239 }
1240
1241 first_frame_in_picture_ = true;
1242
1243 // TODO(ssilkin): Frame duration should be specified per spatial layer
1244 // since their frame rate can be different. For now calculate frame duration
1245 // based on target frame rate of the highest spatial layer, which frame rate
1246 // is supposed to be equal or higher than frame rate of low spatial layers.
1247 // Also, timestamp should represent actual time passed since previous frame
1248 // (not 'expected' time). Then rate controller can drain buffer more
1249 // accurately.
1250 RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_);
1251 float target_framerate_fps =
1252 (codec_.mode == VideoCodecMode::kScreensharing)
1253 ? std::min(static_cast<float>(codec_.maxFramerate),
1254 framerate_controller_[num_active_spatial_layers_ - 1]
1255 .GetTargetRate())
1256 : codec_.maxFramerate;
1257 uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps);
1258 const vpx_codec_err_t rv = libvpx_->codec_encode(
1259 encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME);
1260 if (rv != VPX_CODEC_OK) {
1261 RTC_LOG(LS_ERROR) << "Encoding error: " << libvpx_->codec_err_to_string(rv)
1262 << "\n"
1263 "Details: "
1264 << libvpx_->codec_error(encoder_) << "\n"
1265 << libvpx_->codec_error_detail(encoder_);
1266 return WEBRTC_VIDEO_CODEC_ERROR;
1267 }
1268 timestamp_ += duration;
1269
1270 if (layer_buffering_) {
1271 const bool end_of_picture = true;
1272 DeliverBufferedFrame(end_of_picture);
1273 }
1274
1275 return WEBRTC_VIDEO_CODEC_OK;
1276 }
1277
PopulateCodecSpecific(CodecSpecificInfo * codec_specific,absl::optional<int> * spatial_idx,absl::optional<int> * temporal_idx,const vpx_codec_cx_pkt & pkt)1278 bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
1279 absl::optional<int>* spatial_idx,
1280 absl::optional<int>* temporal_idx,
1281 const vpx_codec_cx_pkt& pkt) {
1282 RTC_CHECK(codec_specific != nullptr);
1283 codec_specific->codecType = kVideoCodecVP9;
1284 CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9);
1285
1286 vp9_info->first_frame_in_picture = first_frame_in_picture_;
1287 vp9_info->flexible_mode = is_flexible_mode_;
1288
1289 if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
1290 pics_since_key_ = 0;
1291 } else if (first_frame_in_picture_) {
1292 ++pics_since_key_;
1293 }
1294
1295 vpx_svc_layer_id_t layer_id = {0};
1296 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1297
1298 // Can't have keyframe with non-zero temporal layer.
1299 RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0);
1300
1301 RTC_CHECK_GT(num_temporal_layers_, 0);
1302 RTC_CHECK_GT(num_active_spatial_layers_, 0);
1303 if (num_temporal_layers_ == 1) {
1304 RTC_CHECK_EQ(layer_id.temporal_layer_id, 0);
1305 vp9_info->temporal_idx = kNoTemporalIdx;
1306 *temporal_idx = absl::nullopt;
1307 } else {
1308 vp9_info->temporal_idx = layer_id.temporal_layer_id;
1309 *temporal_idx = layer_id.temporal_layer_id;
1310 }
1311 if (num_active_spatial_layers_ == 1) {
1312 RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
1313 *spatial_idx = absl::nullopt;
1314 } else {
1315 *spatial_idx = layer_id.spatial_layer_id;
1316 }
1317
1318 const bool is_key_pic = (pics_since_key_ == 0);
1319 const bool is_inter_layer_pred_allowed =
1320 (inter_layer_pred_ == InterLayerPredMode::kOn ||
1321 (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic));
1322
1323 // Always set inter_layer_predicted to true on high layer frame if inter-layer
1324 // prediction (ILP) is allowed even if encoder didn't actually use it.
1325 // Setting inter_layer_predicted to false would allow receiver to decode high
1326 // layer frame without decoding low layer frame. If that would happen (e.g.
1327 // if low layer frame is lost) then receiver won't be able to decode next high
1328 // layer frame which uses ILP.
1329 vp9_info->inter_layer_predicted =
1330 first_frame_in_picture_ ? false : is_inter_layer_pred_allowed;
1331
1332 // Mark all low spatial layer frames as references (not just frames of
1333 // active low spatial layers) if inter-layer prediction is enabled since
1334 // these frames are indirect references of high spatial layer, which can
1335 // later be enabled without key frame.
1336 vp9_info->non_ref_for_inter_layer_pred =
1337 !is_inter_layer_pred_allowed ||
1338 layer_id.spatial_layer_id + 1 == num_spatial_layers_;
1339
1340 // Always populate this, so that the packetizer can properly set the marker
1341 // bit.
1342 vp9_info->num_spatial_layers = num_active_spatial_layers_;
1343 vp9_info->first_active_layer = first_active_layer_;
1344
1345 vp9_info->num_ref_pics = 0;
1346 FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
1347 vp9_info);
1348 if (vp9_info->flexible_mode) {
1349 vp9_info->gof_idx = kNoGofIdx;
1350 if (!svc_controller_) {
1351 if (num_temporal_layers_ == 1) {
1352 vp9_info->temporal_up_switch = true;
1353 } else {
1354 // In flexible mode with > 1 temporal layer but no SVC controller we
1355 // can't techincally determine if a frame is an upswitch point, use
1356 // gof-based data as proxy for now.
1357 // TODO(sprang): Remove once SVC controller is the only choice.
1358 vp9_info->gof_idx =
1359 static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
1360 vp9_info->temporal_up_switch =
1361 gof_.temporal_up_switch[vp9_info->gof_idx];
1362 }
1363 }
1364 } else {
1365 vp9_info->gof_idx =
1366 static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
1367 vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
1368 RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] ||
1369 vp9_info->num_ref_pics == 0);
1370 }
1371
1372 vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0);
1373
1374 // Write SS on key frame of independently coded spatial layers and on base
1375 // temporal/spatial layer frame if number of layers changed without issuing
1376 // of key picture (inter-layer prediction is enabled).
1377 const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted;
1378 if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
1379 layer_id.spatial_layer_id == first_active_layer_)) {
1380 vp9_info->ss_data_available = true;
1381 vp9_info->spatial_layer_resolution_present = true;
1382 // Signal disabled layers.
1383 for (size_t i = 0; i < first_active_layer_; ++i) {
1384 vp9_info->width[i] = 0;
1385 vp9_info->height[i] = 0;
1386 }
1387 for (size_t i = first_active_layer_; i < num_active_spatial_layers_; ++i) {
1388 vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] /
1389 svc_params_.scaling_factor_den[i];
1390 vp9_info->height[i] = codec_.height * svc_params_.scaling_factor_num[i] /
1391 svc_params_.scaling_factor_den[i];
1392 }
1393 if (vp9_info->flexible_mode) {
1394 vp9_info->gof.num_frames_in_gof = 0;
1395 } else {
1396 vp9_info->gof.CopyGofInfoVP9(gof_);
1397 }
1398
1399 ss_info_needed_ = false;
1400 } else {
1401 vp9_info->ss_data_available = false;
1402 }
1403
1404 first_frame_in_picture_ = false;
1405
1406 // Populate codec-agnostic section in the codec specific structure.
1407 if (svc_controller_) {
1408 auto it = absl::c_find_if(
1409 layer_frames_,
1410 [&](const ScalableVideoController::LayerFrameConfig& config) {
1411 return config.SpatialId() == layer_id.spatial_layer_id;
1412 });
1413 if (it == layer_frames_.end()) {
1414 RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S"
1415 << layer_id.spatial_layer_id << "T"
1416 << layer_id.temporal_layer_id
1417 << " that wasn't requested.";
1418 return false;
1419 }
1420 codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it);
1421 if (is_key_frame) {
1422 codec_specific->template_structure =
1423 svc_controller_->DependencyStructure();
1424 auto& resolutions = codec_specific->template_structure->resolutions;
1425 resolutions.resize(num_spatial_layers_);
1426 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
1427 resolutions[sid] = RenderResolution(
1428 /*width=*/codec_.width * svc_params_.scaling_factor_num[sid] /
1429 svc_params_.scaling_factor_den[sid],
1430 /*height=*/codec_.height * svc_params_.scaling_factor_num[sid] /
1431 svc_params_.scaling_factor_den[sid]);
1432 }
1433 }
1434 if (is_flexible_mode_) {
1435 // Populate data for legacy temporal-upswitch state.
1436 // We can switch up to a higher temporal layer only if all temporal layers
1437 // higher than this (within the current spatial layer) are switch points.
1438 vp9_info->temporal_up_switch = true;
1439 for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_;
1440 ++i) {
1441 // Assumes decode targets are always ordered first by spatial then by
1442 // temporal id.
1443 size_t dti_index =
1444 (layer_id.spatial_layer_id * num_temporal_layers_) + i;
1445 vp9_info->temporal_up_switch &=
1446 (codec_specific->generic_frame_info
1447 ->decode_target_indications[dti_index] ==
1448 DecodeTargetIndication::kSwitch);
1449 }
1450 }
1451 }
1452 return true;
1453 }
1454
FillReferenceIndices(const vpx_codec_cx_pkt & pkt,const size_t pic_num,const bool inter_layer_predicted,CodecSpecificInfoVP9 * vp9_info)1455 void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
1456 const size_t pic_num,
1457 const bool inter_layer_predicted,
1458 CodecSpecificInfoVP9* vp9_info) {
1459 vpx_svc_layer_id_t layer_id = {0};
1460 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1461
1462 const bool is_key_frame =
1463 (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
1464
1465 std::vector<RefFrameBuffer> ref_buf_list;
1466
1467 if (is_svc_) {
1468 vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
1469 libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG,
1470 &enc_layer_conf);
1471 char ref_buf_flags[] = "00000000";
1472 // There should be one character per buffer + 1 termination '\0'.
1473 static_assert(sizeof(ref_buf_flags) == kNumVp9Buffers + 1);
1474
1475 if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) {
1476 const size_t fb_idx =
1477 enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id];
1478 RTC_DCHECK_LT(fb_idx, ref_buf_.size());
1479 if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
1480 ref_buf_[fb_idx]) == ref_buf_list.end()) {
1481 ref_buf_list.push_back(ref_buf_[fb_idx]);
1482 ref_buf_flags[fb_idx] = '1';
1483 }
1484 }
1485
1486 if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) {
1487 const size_t fb_idx =
1488 enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id];
1489 RTC_DCHECK_LT(fb_idx, ref_buf_.size());
1490 if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
1491 ref_buf_[fb_idx]) == ref_buf_list.end()) {
1492 ref_buf_list.push_back(ref_buf_[fb_idx]);
1493 ref_buf_flags[fb_idx] = '1';
1494 }
1495 }
1496
1497 if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) {
1498 const size_t fb_idx =
1499 enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id];
1500 RTC_DCHECK_LT(fb_idx, ref_buf_.size());
1501 if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
1502 ref_buf_[fb_idx]) == ref_buf_list.end()) {
1503 ref_buf_list.push_back(ref_buf_[fb_idx]);
1504 ref_buf_flags[fb_idx] = '1';
1505 }
1506 }
1507
1508 RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
1509 << layer_id.spatial_layer_id << " tl "
1510 << layer_id.temporal_layer_id << " refered buffers "
1511 << ref_buf_flags;
1512
1513 } else if (!is_key_frame) {
1514 RTC_DCHECK_EQ(num_spatial_layers_, 1);
1515 RTC_DCHECK_EQ(num_temporal_layers_, 1);
1516 // In non-SVC mode encoder doesn't provide reference list. Assume each frame
1517 // refers previous one, which is stored in buffer 0.
1518 ref_buf_list.push_back(ref_buf_[0]);
1519 }
1520
1521 std::vector<size_t> ref_pid_list;
1522
1523 vp9_info->num_ref_pics = 0;
1524 for (const RefFrameBuffer& ref_buf : ref_buf_list) {
1525 RTC_DCHECK_LE(ref_buf.pic_num, pic_num);
1526 if (ref_buf.pic_num < pic_num) {
1527 if (inter_layer_pred_ != InterLayerPredMode::kOn) {
1528 // RTP spec limits temporal prediction to the same spatial layer.
1529 // It is safe to ignore this requirement if inter-layer prediction is
1530 // enabled for all frames when all base frames are relayed to receiver.
1531 RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
1532 } else {
1533 RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
1534 }
1535 RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id);
1536
1537 // Encoder may reference several spatial layers on the same previous
1538 // frame in case if some spatial layers are skipped on the current frame.
1539 // We shouldn't put duplicate references as it may break some old
1540 // clients and isn't RTP compatible.
1541 if (std::find(ref_pid_list.begin(), ref_pid_list.end(),
1542 ref_buf.pic_num) != ref_pid_list.end()) {
1543 continue;
1544 }
1545 ref_pid_list.push_back(ref_buf.pic_num);
1546
1547 const size_t p_diff = pic_num - ref_buf.pic_num;
1548 RTC_DCHECK_LE(p_diff, 127UL);
1549
1550 vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff);
1551 ++vp9_info->num_ref_pics;
1552 } else {
1553 RTC_DCHECK(inter_layer_predicted);
1554 // RTP spec only allows to use previous spatial layer for inter-layer
1555 // prediction.
1556 RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id);
1557 }
1558 }
1559 }
1560
UpdateReferenceBuffers(const vpx_codec_cx_pkt & pkt,const size_t pic_num)1561 void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
1562 const size_t pic_num) {
1563 vpx_svc_layer_id_t layer_id = {0};
1564 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1565
1566 RefFrameBuffer frame_buf = {.pic_num = pic_num,
1567 .spatial_layer_id = layer_id.spatial_layer_id,
1568 .temporal_layer_id = layer_id.temporal_layer_id};
1569
1570 if (is_svc_) {
1571 vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
1572 libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG,
1573 &enc_layer_conf);
1574 const int update_buffer_slot =
1575 enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id];
1576
1577 for (size_t i = 0; i < ref_buf_.size(); ++i) {
1578 if (update_buffer_slot & (1 << i)) {
1579 ref_buf_[i] = frame_buf;
1580 }
1581 }
1582
1583 RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
1584 << layer_id.spatial_layer_id << " tl "
1585 << layer_id.temporal_layer_id << " updated buffers "
1586 << (update_buffer_slot & (1 << 0) ? 1 : 0)
1587 << (update_buffer_slot & (1 << 1) ? 1 : 0)
1588 << (update_buffer_slot & (1 << 2) ? 1 : 0)
1589 << (update_buffer_slot & (1 << 3) ? 1 : 0)
1590 << (update_buffer_slot & (1 << 4) ? 1 : 0)
1591 << (update_buffer_slot & (1 << 5) ? 1 : 0)
1592 << (update_buffer_slot & (1 << 6) ? 1 : 0)
1593 << (update_buffer_slot & (1 << 7) ? 1 : 0);
1594 } else {
1595 RTC_DCHECK_EQ(num_spatial_layers_, 1);
1596 RTC_DCHECK_EQ(num_temporal_layers_, 1);
1597 // In non-svc mode encoder doesn't provide reference list. Assume each frame
1598 // is reference and stored in buffer 0.
1599 ref_buf_[0] = frame_buf;
1600 }
1601 }
1602
SetReferences(bool is_key_pic,int first_active_spatial_layer_id)1603 vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences(
1604 bool is_key_pic,
1605 int first_active_spatial_layer_id) {
1606 // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs.
1607 RTC_DCHECK_LE(gof_.num_frames_in_gof, 4);
1608
1609 vpx_svc_ref_frame_config_t ref_config;
1610 memset(&ref_config, 0, sizeof(ref_config));
1611
1612 const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1);
1613 const bool is_inter_layer_pred_allowed =
1614 inter_layer_pred_ == InterLayerPredMode::kOn ||
1615 (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic);
1616 absl::optional<int> last_updated_buf_idx;
1617
1618 // Put temporal reference to LAST and spatial reference to GOLDEN. Update
1619 // frame buffer (i.e. store encoded frame) if current frame is a temporal
1620 // reference (i.e. it belongs to a low temporal layer) or it is a spatial
1621 // reference. In later case, always store spatial reference in the last
1622 // reference frame buffer.
1623 // For the case of 3 temporal and 3 spatial layers we need 6 frame buffers
1624 // for temporal references plus 1 buffer for spatial reference. 7 buffers
1625 // in total.
1626
1627 for (int sl_idx = first_active_spatial_layer_id;
1628 sl_idx < num_active_spatial_layers_; ++sl_idx) {
1629 const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1;
1630 const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof;
1631
1632 if (!is_key_pic) {
1633 // Set up temporal reference.
1634 const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx];
1635
1636 // Last reference frame buffer is reserved for spatial reference. It is
1637 // not supposed to be used for temporal prediction.
1638 RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1);
1639
1640 const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
1641 // Incorrect spatial layer may be in the buffer due to a key-frame.
1642 const bool same_spatial_layer =
1643 ref_buf_[buf_idx].spatial_layer_id == sl_idx;
1644 bool correct_pid = false;
1645 if (is_flexible_mode_) {
1646 correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff;
1647 } else {
1648 // Below code assumes single temporal referecence.
1649 RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
1650 correct_pid = pid_diff == gof_.pid_diff[gof_idx][0];
1651 }
1652
1653 if (same_spatial_layer && correct_pid) {
1654 ref_config.lst_fb_idx[sl_idx] = buf_idx;
1655 ref_config.reference_last[sl_idx] = 1;
1656 } else {
1657 // This reference doesn't match with one specified by GOF. This can
1658 // only happen if spatial layer is enabled dynamically without key
1659 // frame. Spatial prediction is supposed to be enabled in this case.
1660 RTC_DCHECK(is_inter_layer_pred_allowed &&
1661 sl_idx > first_active_spatial_layer_id);
1662 }
1663 }
1664
1665 if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) {
1666 // Set up spatial reference.
1667 RTC_DCHECK(last_updated_buf_idx);
1668 ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx;
1669 ref_config.reference_golden[sl_idx] = 1;
1670 } else {
1671 RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 ||
1672 sl_idx == first_active_spatial_layer_id ||
1673 inter_layer_pred_ == InterLayerPredMode::kOff);
1674 }
1675
1676 last_updated_buf_idx.reset();
1677
1678 if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 ||
1679 num_temporal_layers_ == 1) {
1680 last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx];
1681
1682 // Ensure last frame buffer is not used for temporal prediction (it is
1683 // reserved for spatial reference).
1684 RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1);
1685 } else if (is_inter_layer_pred_allowed) {
1686 last_updated_buf_idx = kNumVp9Buffers - 1;
1687 }
1688
1689 if (last_updated_buf_idx) {
1690 ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx;
1691 }
1692 }
1693
1694 return ref_config;
1695 }
1696
GetEncodedLayerFrame(const vpx_codec_cx_pkt * pkt)1697 void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
1698 RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT);
1699
1700 if (pkt->data.frame.sz == 0) {
1701 // Ignore dropped frame.
1702 return;
1703 }
1704
1705 vpx_svc_layer_id_t layer_id = {0};
1706 libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1707
1708 if (layer_buffering_) {
1709 // Deliver buffered low spatial layer frame.
1710 const bool end_of_picture = false;
1711 DeliverBufferedFrame(end_of_picture);
1712 }
1713
1714 encoded_image_.SetEncodedData(EncodedImageBuffer::Create(
1715 static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz));
1716
1717 codec_specific_ = {};
1718 absl::optional<int> spatial_index;
1719 absl::optional<int> temporal_index;
1720 if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index,
1721 *pkt)) {
1722 // Drop the frame.
1723 encoded_image_.set_size(0);
1724 return;
1725 }
1726 encoded_image_.SetSpatialIndex(spatial_index);
1727 encoded_image_.SetTemporalIndex(temporal_index);
1728
1729 const bool is_key_frame =
1730 ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) &&
1731 !codec_specific_.codecSpecific.VP9.inter_layer_predicted;
1732
1733 // Ensure encoder issued key frame on request.
1734 RTC_DCHECK(is_key_frame || !force_key_frame_);
1735
1736 // Check if encoded frame is a key frame.
1737 encoded_image_._frameType = VideoFrameType::kVideoFrameDelta;
1738 if (is_key_frame) {
1739 encoded_image_._frameType = VideoFrameType::kVideoFrameKey;
1740 force_key_frame_ = false;
1741 }
1742
1743 UpdateReferenceBuffers(*pkt, pics_since_key_);
1744
1745 TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size());
1746 encoded_image_.SetTimestamp(input_image_->timestamp());
1747 encoded_image_.SetColorSpace(input_image_->color_space());
1748 encoded_image_._encodedHeight =
1749 pkt->data.frame.height[layer_id.spatial_layer_id];
1750 encoded_image_._encodedWidth =
1751 pkt->data.frame.width[layer_id.spatial_layer_id];
1752 int qp = -1;
1753 libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
1754 encoded_image_.qp_ = qp;
1755
1756 if (!layer_buffering_) {
1757 const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 ==
1758 num_active_spatial_layers_;
1759 DeliverBufferedFrame(end_of_picture);
1760 }
1761 }
1762
DeliverBufferedFrame(bool end_of_picture)1763 void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) {
1764 if (encoded_image_.size() > 0) {
1765 if (num_spatial_layers_ > 1) {
1766 // Restore frame dropping settings, as dropping may be temporary forbidden
1767 // due to dynamically enabled layers.
1768 for (size_t i = 0; i < num_spatial_layers_; ++i) {
1769 svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
1770 }
1771 }
1772
1773 codec_specific_.end_of_picture = end_of_picture;
1774
1775 encoded_complete_callback_->OnEncodedImage(encoded_image_,
1776 &codec_specific_);
1777
1778 if (codec_.mode == VideoCodecMode::kScreensharing) {
1779 const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0);
1780 const uint32_t frame_timestamp_ms =
1781 1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency;
1782 framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms);
1783
1784 const size_t steady_state_size = SteadyStateSize(
1785 spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx);
1786
1787 // Only frames on spatial layers, which may be limited in a steady state
1788 // are considered for steady state detection.
1789 if (framerate_controller_[spatial_idx].GetTargetRate() >
1790 variable_framerate_experiment_.framerate_limit + 1e-9) {
1791 if (encoded_image_.qp_ <=
1792 variable_framerate_experiment_.steady_state_qp &&
1793 encoded_image_.size() <= steady_state_size) {
1794 ++num_steady_state_frames_;
1795 } else {
1796 num_steady_state_frames_ = 0;
1797 }
1798 }
1799 }
1800 encoded_image_.set_size(0);
1801 }
1802 }
1803
RegisterEncodeCompleteCallback(EncodedImageCallback * callback)1804 int LibvpxVp9Encoder::RegisterEncodeCompleteCallback(
1805 EncodedImageCallback* callback) {
1806 encoded_complete_callback_ = callback;
1807 return WEBRTC_VIDEO_CODEC_OK;
1808 }
1809
GetEncoderInfo() const1810 VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const {
1811 EncoderInfo info;
1812 info.supports_native_handle = false;
1813 info.implementation_name = "libvpx";
1814 if (quality_scaler_experiment_.enabled && inited_ &&
1815 codec_.VP9().automaticResizeOn) {
1816 info.scaling_settings = VideoEncoder::ScalingSettings(
1817 quality_scaler_experiment_.low_qp, quality_scaler_experiment_.high_qp);
1818 } else {
1819 info.scaling_settings = VideoEncoder::ScalingSettings::kOff;
1820 }
1821 info.has_trusted_rate_controller = trusted_rate_controller_;
1822 info.is_hardware_accelerated = false;
1823 if (inited_) {
1824 // Find the max configured fps of any active spatial layer.
1825 float max_fps = 0.0;
1826 for (size_t si = 0; si < num_spatial_layers_; ++si) {
1827 if (codec_.spatialLayers[si].active &&
1828 codec_.spatialLayers[si].maxFramerate > max_fps) {
1829 max_fps = codec_.spatialLayers[si].maxFramerate;
1830 }
1831 }
1832
1833 for (size_t si = 0; si < num_spatial_layers_; ++si) {
1834 info.fps_allocation[si].clear();
1835 if (!codec_.spatialLayers[si].active) {
1836 continue;
1837 }
1838
1839 // This spatial layer may already use a fraction of the total frame rate.
1840 const float sl_fps_fraction =
1841 codec_.spatialLayers[si].maxFramerate / max_fps;
1842 for (size_t ti = 0; ti < num_temporal_layers_; ++ti) {
1843 const uint32_t decimator =
1844 num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti];
1845 RTC_DCHECK_GT(decimator, 0);
1846 info.fps_allocation[si].push_back(
1847 rtc::saturated_cast<uint8_t>(EncoderInfo::kMaxFramerateFraction *
1848 (sl_fps_fraction / decimator)));
1849 }
1850 }
1851 if (profile_ == VP9Profile::kProfile0) {
1852 info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420,
1853 VideoFrameBuffer::Type::kNV12};
1854 }
1855 }
1856 if (!encoder_info_override_.resolution_bitrate_limits().empty()) {
1857 info.resolution_bitrate_limits =
1858 encoder_info_override_.resolution_bitrate_limits();
1859 }
1860 return info;
1861 }
1862
SteadyStateSize(int sid,int tid)1863 size_t LibvpxVp9Encoder::SteadyStateSize(int sid, int tid) {
1864 const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate(
1865 sid, tid == kNoTemporalIdx ? 0 : tid);
1866 const float fps = (codec_.mode == VideoCodecMode::kScreensharing)
1867 ? std::min(static_cast<float>(codec_.maxFramerate),
1868 framerate_controller_[sid].GetTargetRate())
1869 : codec_.maxFramerate;
1870 return static_cast<size_t>(
1871 bitrate_bps / (8 * fps) *
1872 (100 -
1873 variable_framerate_experiment_.steady_state_undershoot_percentage) /
1874 100 +
1875 0.5);
1876 }
1877
1878 // static
1879 LibvpxVp9Encoder::VariableFramerateExperiment
ParseVariableFramerateConfig(const FieldTrialsView & trials)1880 LibvpxVp9Encoder::ParseVariableFramerateConfig(const FieldTrialsView& trials) {
1881 FieldTrialFlag enabled = FieldTrialFlag("Enabled");
1882 FieldTrialParameter<double> framerate_limit("min_fps", 5.0);
1883 FieldTrialParameter<int> qp("min_qp", 32);
1884 FieldTrialParameter<int> undershoot_percentage("undershoot", 30);
1885 FieldTrialParameter<int> frames_before_steady_state(
1886 "frames_before_steady_state", 5);
1887 ParseFieldTrial({&enabled, &framerate_limit, &qp, &undershoot_percentage,
1888 &frames_before_steady_state},
1889 trials.Lookup("WebRTC-VP9VariableFramerateScreenshare"));
1890 VariableFramerateExperiment config;
1891 config.enabled = enabled.Get();
1892 config.framerate_limit = framerate_limit.Get();
1893 config.steady_state_qp = qp.Get();
1894 config.steady_state_undershoot_percentage = undershoot_percentage.Get();
1895 config.frames_before_steady_state = frames_before_steady_state.Get();
1896
1897 return config;
1898 }
1899
1900 // static
1901 LibvpxVp9Encoder::QualityScalerExperiment
ParseQualityScalerConfig(const FieldTrialsView & trials)1902 LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) {
1903 FieldTrialFlag disabled = FieldTrialFlag("Disabled");
1904 FieldTrialParameter<int> low_qp("low_qp", kLowVp9QpThreshold);
1905 FieldTrialParameter<int> high_qp("hihg_qp", kHighVp9QpThreshold);
1906 ParseFieldTrial({&disabled, &low_qp, &high_qp},
1907 trials.Lookup("WebRTC-VP9QualityScaler"));
1908 QualityScalerExperiment config;
1909 config.enabled = !disabled.Get();
1910 RTC_LOG(LS_INFO) << "Webrtc quality scaler for vp9 is "
1911 << (config.enabled ? "enabled." : "disabled");
1912 config.low_qp = low_qp.Get();
1913 config.high_qp = high_qp.Get();
1914
1915 return config;
1916 }
1917
UpdatePerformanceFlags()1918 void LibvpxVp9Encoder::UpdatePerformanceFlags() {
1919 flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution;
1920 if (codec_.GetVideoEncoderComplexity() ==
1921 VideoCodecComplexity::kComplexityLow) {
1922 // For low tier devices, always use speed 9. Only disable upper
1923 // layer deblocking below QCIF.
1924 params_by_resolution[0] = {.base_layer_speed = 9,
1925 .high_layer_speed = 9,
1926 .deblock_mode = 1,
1927 .allow_denoising = true};
1928 params_by_resolution[352 * 288] = {.base_layer_speed = 9,
1929 .high_layer_speed = 9,
1930 .deblock_mode = 0,
1931 .allow_denoising = true};
1932 } else {
1933 params_by_resolution = performance_flags_.settings_by_resolution;
1934 }
1935
1936 const auto find_speed = [&](int min_pixel_count) {
1937 RTC_DCHECK(!params_by_resolution.empty());
1938 auto it = params_by_resolution.upper_bound(min_pixel_count);
1939 return std::prev(it)->second;
1940 };
1941 performance_flags_by_spatial_index_.clear();
1942
1943 if (is_svc_) {
1944 for (int si = 0; si < num_spatial_layers_; ++si) {
1945 performance_flags_by_spatial_index_.push_back(find_speed(
1946 codec_.spatialLayers[si].width * codec_.spatialLayers[si].height));
1947 }
1948 } else {
1949 performance_flags_by_spatial_index_.push_back(
1950 find_speed(codec_.width * codec_.height));
1951 }
1952 }
1953
1954 // static
1955 LibvpxVp9Encoder::PerformanceFlags
ParsePerformanceFlagsFromTrials(const FieldTrialsView & trials)1956 LibvpxVp9Encoder::ParsePerformanceFlagsFromTrials(
1957 const FieldTrialsView& trials) {
1958 struct Params : public PerformanceFlags::ParameterSet {
1959 int min_pixel_count = 0;
1960 };
1961
1962 FieldTrialStructList<Params> trials_list(
1963 {FieldTrialStructMember("min_pixel_count",
1964 [](Params* p) { return &p->min_pixel_count; }),
1965 FieldTrialStructMember("high_layer_speed",
1966 [](Params* p) { return &p->high_layer_speed; }),
1967 FieldTrialStructMember("base_layer_speed",
1968 [](Params* p) { return &p->base_layer_speed; }),
1969 FieldTrialStructMember("deblock_mode",
1970 [](Params* p) { return &p->deblock_mode; }),
1971 FieldTrialStructMember("denoiser",
1972 [](Params* p) { return &p->allow_denoising; })},
1973 {});
1974
1975 FieldTrialFlag per_layer_speed("use_per_layer_speed");
1976
1977 ParseFieldTrial({&trials_list, &per_layer_speed},
1978 trials.Lookup("WebRTC-VP9-PerformanceFlags"));
1979
1980 PerformanceFlags flags;
1981 flags.use_per_layer_speed = per_layer_speed.Get();
1982
1983 constexpr int kMinSpeed = 1;
1984 constexpr int kMaxSpeed = 9;
1985 for (auto& f : trials_list.Get()) {
1986 if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed ||
1987 f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed ||
1988 f.deblock_mode < 0 || f.deblock_mode > 2) {
1989 RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: "
1990 << "min_pixel_count = " << f.min_pixel_count
1991 << ", high_layer_speed = " << f.high_layer_speed
1992 << ", base_layer_speed = " << f.base_layer_speed
1993 << ", deblock_mode = " << f.deblock_mode;
1994 continue;
1995 }
1996 flags.settings_by_resolution[f.min_pixel_count] = f;
1997 }
1998
1999 if (flags.settings_by_resolution.empty()) {
2000 return GetDefaultPerformanceFlags();
2001 }
2002
2003 return flags;
2004 }
2005
2006 // static
2007 LibvpxVp9Encoder::PerformanceFlags
GetDefaultPerformanceFlags()2008 LibvpxVp9Encoder::GetDefaultPerformanceFlags() {
2009 PerformanceFlags flags;
2010 flags.use_per_layer_speed = true;
2011 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID)
2012 // Speed 8 on all layers for all resolutions.
2013 flags.settings_by_resolution[0] = {.base_layer_speed = 8,
2014 .high_layer_speed = 8,
2015 .deblock_mode = 0,
2016 .allow_denoising = true};
2017 #else
2018
2019 // For smaller resolutions, use lower speed setting for the temporal base
2020 // layer (get some coding gain at the cost of increased encoding complexity).
2021 // Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and
2022 // disable deblocking for upper-most temporal layers.
2023 flags.settings_by_resolution[0] = {.base_layer_speed = 5,
2024 .high_layer_speed = 8,
2025 .deblock_mode = 1,
2026 .allow_denoising = true};
2027
2028 // Use speed 7 for QCIF and above.
2029 // Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and
2030 // enable deblocking for all temporal layers.
2031 flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7,
2032 .high_layer_speed = 8,
2033 .deblock_mode = 0,
2034 .allow_denoising = true};
2035
2036 // For very high resolution (1080p and up), turn the speed all the way up
2037 // since this is very CPU intensive. Also disable denoising to save CPU, at
2038 // these resolutions denoising appear less effective and hopefully you also
2039 // have a less noisy video source at this point.
2040 flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9,
2041 .high_layer_speed = 9,
2042 .deblock_mode = 0,
2043 .allow_denoising = false};
2044
2045 #endif
2046 return flags;
2047 }
2048
MaybeRewrapRawWithFormat(const vpx_img_fmt fmt)2049 void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) {
2050 if (!raw_) {
2051 raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1,
2052 nullptr);
2053 } else if (raw_->fmt != fmt) {
2054 RTC_LOG(LS_INFO) << "Switching VP9 encoder pixel format to "
2055 << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420");
2056 libvpx_->img_free(raw_);
2057 raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1,
2058 nullptr);
2059 }
2060 // else no-op since the image is already in the right format.
2061 }
2062
PrepareBufferForProfile0(rtc::scoped_refptr<VideoFrameBuffer> buffer)2063 rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
2064 rtc::scoped_refptr<VideoFrameBuffer> buffer) {
2065 absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
2066 supported_formats = {VideoFrameBuffer::Type::kI420,
2067 VideoFrameBuffer::Type::kNV12};
2068
2069 rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
2070 if (buffer->type() != VideoFrameBuffer::Type::kNative) {
2071 // `buffer` is already mapped.
2072 mapped_buffer = buffer;
2073 } else {
2074 // Attempt to map to one of the supported formats.
2075 mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
2076 }
2077 if (!mapped_buffer ||
2078 (absl::c_find(supported_formats, mapped_buffer->type()) ==
2079 supported_formats.end() &&
2080 mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
2081 // Unknown pixel format or unable to map, convert to I420 and prepare that
2082 // buffer instead to ensure Scale() is safe to use.
2083 auto converted_buffer = buffer->ToI420();
2084 if (!converted_buffer) {
2085 RTC_LOG(LS_ERROR) << "Failed to convert "
2086 << VideoFrameBufferTypeToString(buffer->type())
2087 << " image to I420. Can't encode frame.";
2088 return {};
2089 }
2090 RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
2091 converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
2092
2093 // Because `buffer` had to be converted, use `converted_buffer` instead.
2094 buffer = mapped_buffer = converted_buffer;
2095 }
2096
2097 // Prepare `raw_` from `mapped_buffer`.
2098 switch (mapped_buffer->type()) {
2099 case VideoFrameBuffer::Type::kI420:
2100 case VideoFrameBuffer::Type::kI420A: {
2101 MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420);
2102 const I420BufferInterface* i420_buffer = mapped_buffer->GetI420();
2103 RTC_DCHECK(i420_buffer);
2104 raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY());
2105 raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU());
2106 raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV());
2107 raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
2108 raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU();
2109 raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV();
2110 break;
2111 }
2112 case VideoFrameBuffer::Type::kNV12: {
2113 MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12);
2114 const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12();
2115 RTC_DCHECK(nv12_buffer);
2116 raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY());
2117 raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV());
2118 raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1;
2119 raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY();
2120 raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV();
2121 raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV();
2122 break;
2123 }
2124 default:
2125 RTC_DCHECK_NOTREACHED();
2126 }
2127 return mapped_buffer;
2128 }
2129
2130 } // namespace webrtc
2131
2132 #endif // RTC_ENABLE_VP9
2133