• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  *
10  */
11 
12 #ifdef RTC_ENABLE_VP9
13 
14 #include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
15 
16 #include <algorithm>
17 #include <limits>
18 #include <utility>
19 #include <vector>
20 
21 #include "absl/algorithm/container.h"
22 #include "absl/memory/memory.h"
23 #include "absl/strings/match.h"
24 #include "api/video/color_space.h"
25 #include "api/video/i010_buffer.h"
26 #include "common_video/include/video_frame_buffer.h"
27 #include "common_video/libyuv/include/webrtc_libyuv.h"
28 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
29 #include "modules/video_coding/svc/create_scalability_structure.h"
30 #include "modules/video_coding/svc/scalability_mode_util.h"
31 #include "modules/video_coding/svc/scalable_video_controller.h"
32 #include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
33 #include "modules/video_coding/svc/svc_rate_allocator.h"
34 #include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
35 #include "rtc_base/checks.h"
36 #include "rtc_base/experiments/field_trial_list.h"
37 #include "rtc_base/experiments/field_trial_parser.h"
38 #include "rtc_base/experiments/rate_control_settings.h"
39 #include "rtc_base/logging.h"
40 #include "rtc_base/strings/string_builder.h"
41 #include "rtc_base/time_utils.h"
42 #include "rtc_base/trace_event.h"
43 #include "third_party/libyuv/include/libyuv/convert.h"
44 #include "vpx/vp8cx.h"
45 #include "vpx/vpx_encoder.h"
46 
47 namespace webrtc {
48 
49 namespace {
50 // Maps from gof_idx to encoder internal reference frame buffer index. These
51 // maps work for 1,2 and 3 temporal layers with GOF length of 1,2 and 4 frames.
52 uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
53 uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
54 
55 // Maximum allowed PID difference for differnet per-layer frame-rate case.
56 const int kMaxAllowedPidDiff = 30;
57 
58 // TODO(ilink): Tune these thresholds further.
59 // Selected using ConverenceMotion_1280_720_50.yuv clip.
60 // No toggling observed on any link capacity from 100-2000kbps.
61 // HD was reached consistently when link capacity was 1500kbps.
62 // Set resolutions are a bit more conservative than svc_config.cc sets, e.g.
63 // for 300kbps resolution converged to 270p instead of 360p.
64 constexpr int kLowVp9QpThreshold = 149;
65 constexpr int kHighVp9QpThreshold = 205;
66 
GetActiveLayers(const VideoBitrateAllocation & allocation)67 std::pair<size_t, size_t> GetActiveLayers(
68     const VideoBitrateAllocation& allocation) {
69   for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) {
70     if (allocation.GetSpatialLayerSum(sl_idx) > 0) {
71       size_t last_layer = sl_idx + 1;
72       while (last_layer < kMaxSpatialLayers &&
73              allocation.GetSpatialLayerSum(last_layer) > 0) {
74         ++last_layer;
75       }
76       return std::make_pair(sl_idx, last_layer);
77     }
78   }
79   return {0, 0};
80 }
81 
CreateVp9ScalabilityStructure(const VideoCodec & codec)82 std::unique_ptr<ScalableVideoController> CreateVp9ScalabilityStructure(
83     const VideoCodec& codec) {
84   int num_spatial_layers = codec.VP9().numberOfSpatialLayers;
85   int num_temporal_layers =
86       std::max(1, int{codec.VP9().numberOfTemporalLayers});
87   if (num_spatial_layers == 1 && num_temporal_layers == 1) {
88     return std::make_unique<ScalableVideoControllerNoLayering>();
89   }
90 
91   char name[20];
92   rtc::SimpleStringBuilder ss(name);
93   if (codec.mode == VideoCodecMode::kScreensharing) {
94     // TODO(bugs.webrtc.org/11999): Compose names of the structures when they
95     // are implemented.
96     return nullptr;
97   } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn ||
98              num_spatial_layers == 1) {
99     ss << "L" << num_spatial_layers << "T" << num_temporal_layers;
100   } else if (codec.VP9().interLayerPred == InterLayerPredMode::kOnKeyPic) {
101     ss << "L" << num_spatial_layers << "T" << num_temporal_layers << "_KEY";
102   } else {
103     RTC_DCHECK_EQ(codec.VP9().interLayerPred, InterLayerPredMode::kOff);
104     ss << "S" << num_spatial_layers << "T" << num_temporal_layers;
105   }
106 
107   // Check spatial ratio.
108   if (num_spatial_layers > 1 && codec.spatialLayers[0].targetBitrate > 0) {
109     if (codec.width != codec.spatialLayers[num_spatial_layers - 1].width ||
110         codec.height != codec.spatialLayers[num_spatial_layers - 1].height) {
111       RTC_LOG(LS_WARNING)
112           << "Top layer resolution expected to match overall resolution";
113       return nullptr;
114     }
115     // Check if the ratio is one of the supported.
116     int numerator;
117     int denominator;
118     if (codec.spatialLayers[1].width == 2 * codec.spatialLayers[0].width) {
119       numerator = 1;
120       denominator = 2;
121       // no suffix for 1:2 ratio.
122     } else if (2 * codec.spatialLayers[1].width ==
123                3 * codec.spatialLayers[0].width) {
124       numerator = 2;
125       denominator = 3;
126       ss << "h";
127     } else {
128       RTC_LOG(LS_WARNING) << "Unsupported scalability ratio "
129                           << codec.spatialLayers[0].width << ":"
130                           << codec.spatialLayers[1].width;
131       return nullptr;
132     }
133     // Validate ratio is consistent for all spatial layer transitions.
134     for (int sid = 1; sid < num_spatial_layers; ++sid) {
135       if (codec.spatialLayers[sid].width * numerator !=
136               codec.spatialLayers[sid - 1].width * denominator ||
137           codec.spatialLayers[sid].height * numerator !=
138               codec.spatialLayers[sid - 1].height * denominator) {
139         RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator
140                             << ":" << denominator;
141         return nullptr;
142       }
143     }
144   }
145 
146   absl::optional<ScalabilityMode> scalability_mode =
147       ScalabilityModeFromString(name);
148   if (!scalability_mode.has_value()) {
149     RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name;
150     return nullptr;
151   }
152   auto scalability_structure_controller =
153       CreateScalabilityStructure(*scalability_mode);
154   if (scalability_structure_controller == nullptr) {
155     RTC_LOG(LS_WARNING) << "Unsupported scalability structure " << name;
156   } else {
157     RTC_LOG(LS_INFO) << "Created scalability structure " << name;
158   }
159   return scalability_structure_controller;
160 }
161 
Vp9References(rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers)162 vpx_svc_ref_frame_config_t Vp9References(
163     rtc::ArrayView<const ScalableVideoController::LayerFrameConfig> layers) {
164   vpx_svc_ref_frame_config_t ref_config = {};
165   for (const ScalableVideoController::LayerFrameConfig& layer_frame : layers) {
166     const auto& buffers = layer_frame.Buffers();
167     RTC_DCHECK_LE(buffers.size(), 3);
168     int sid = layer_frame.SpatialId();
169     if (!buffers.empty()) {
170       ref_config.lst_fb_idx[sid] = buffers[0].id;
171       ref_config.reference_last[sid] = buffers[0].referenced;
172       if (buffers[0].updated) {
173         ref_config.update_buffer_slot[sid] |= (1 << buffers[0].id);
174       }
175     }
176     if (buffers.size() > 1) {
177       ref_config.gld_fb_idx[sid] = buffers[1].id;
178       ref_config.reference_golden[sid] = buffers[1].referenced;
179       if (buffers[1].updated) {
180         ref_config.update_buffer_slot[sid] |= (1 << buffers[1].id);
181       }
182     }
183     if (buffers.size() > 2) {
184       ref_config.alt_fb_idx[sid] = buffers[2].id;
185       ref_config.reference_alt_ref[sid] = buffers[2].referenced;
186       if (buffers[2].updated) {
187         ref_config.update_buffer_slot[sid] |= (1 << buffers[2].id);
188       }
189     }
190   }
191   // TODO(bugs.webrtc.org/11999): Fill ref_config.duration
192   return ref_config;
193 }
194 
AllowDenoising()195 bool AllowDenoising() {
196   // Do not enable the denoiser on ARM since optimization is pending.
197   // Denoiser is on by default on other platforms.
198 #if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64) && \
199     !defined(ANDROID)
200   return true;
201 #else
202   return false;
203 #endif
204 }
205 
206 }  // namespace
207 
EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt * pkt,void * user_data)208 void LibvpxVp9Encoder::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
209                                                         void* user_data) {
210   LibvpxVp9Encoder* enc = static_cast<LibvpxVp9Encoder*>(user_data);
211   enc->GetEncodedLayerFrame(pkt);
212 }
213 
LibvpxVp9Encoder(const cricket::VideoCodec & codec,std::unique_ptr<LibvpxInterface> interface,const FieldTrialsView & trials)214 LibvpxVp9Encoder::LibvpxVp9Encoder(const cricket::VideoCodec& codec,
215                                    std::unique_ptr<LibvpxInterface> interface,
216                                    const FieldTrialsView& trials)
217     : libvpx_(std::move(interface)),
218       encoded_image_(),
219       encoded_complete_callback_(nullptr),
220       profile_(
221           ParseSdpForVP9Profile(codec.params).value_or(VP9Profile::kProfile0)),
222       inited_(false),
223       timestamp_(0),
224       rc_max_intra_target_(0),
225       encoder_(nullptr),
226       config_(nullptr),
227       raw_(nullptr),
228       input_image_(nullptr),
229       force_key_frame_(true),
230       pics_since_key_(0),
231       num_temporal_layers_(0),
232       num_spatial_layers_(0),
233       num_active_spatial_layers_(0),
234       first_active_layer_(0),
235       layer_deactivation_requires_key_frame_(absl::StartsWith(
236           trials.Lookup("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation"),
237           "Enabled")),
238       is_svc_(false),
239       inter_layer_pred_(InterLayerPredMode::kOn),
240       external_ref_control_(false),  // Set in InitEncode because of tests.
241       trusted_rate_controller_(
242           RateControlSettings::ParseFromKeyValueConfig(&trials)
243               .LibvpxVp9TrustedRateController()),
244       layer_buffering_(false),
245       full_superframe_drop_(true),
246       first_frame_in_picture_(true),
247       ss_info_needed_(false),
248       force_all_active_layers_(false),
249       is_flexible_mode_(false),
250       variable_framerate_experiment_(ParseVariableFramerateConfig(trials)),
251       variable_framerate_controller_(
252           variable_framerate_experiment_.framerate_limit),
253       quality_scaler_experiment_(ParseQualityScalerConfig(trials)),
254       external_ref_ctrl_(
255           !absl::StartsWith(trials.Lookup("WebRTC-Vp9ExternalRefCtrl"),
256                             "Disabled")),
257       performance_flags_(ParsePerformanceFlagsFromTrials(trials)),
258       num_steady_state_frames_(0),
259       config_changed_(true) {
260   codec_ = {};
261   memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
262 }
263 
~LibvpxVp9Encoder()264 LibvpxVp9Encoder::~LibvpxVp9Encoder() {
265   Release();
266 }
267 
SetFecControllerOverride(FecControllerOverride *)268 void LibvpxVp9Encoder::SetFecControllerOverride(FecControllerOverride*) {
269   // Ignored.
270 }
271 
Release()272 int LibvpxVp9Encoder::Release() {
273   int ret_val = WEBRTC_VIDEO_CODEC_OK;
274 
275   if (encoder_ != nullptr) {
276     if (inited_) {
277       if (libvpx_->codec_destroy(encoder_)) {
278         ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
279       }
280     }
281     delete encoder_;
282     encoder_ = nullptr;
283   }
284   if (config_ != nullptr) {
285     delete config_;
286     config_ = nullptr;
287   }
288   if (raw_ != nullptr) {
289     libvpx_->img_free(raw_);
290     raw_ = nullptr;
291   }
292   inited_ = false;
293   return ret_val;
294 }
295 
ExplicitlyConfiguredSpatialLayers() const296 bool LibvpxVp9Encoder::ExplicitlyConfiguredSpatialLayers() const {
297   // We check target_bitrate_bps of the 0th layer to see if the spatial layers
298   // (i.e. bitrates) were explicitly configured.
299   return codec_.spatialLayers[0].targetBitrate > 0;
300 }
301 
SetSvcRates(const VideoBitrateAllocation & bitrate_allocation)302 bool LibvpxVp9Encoder::SetSvcRates(
303     const VideoBitrateAllocation& bitrate_allocation) {
304   std::pair<size_t, size_t> current_layers =
305       GetActiveLayers(current_bitrate_allocation_);
306   std::pair<size_t, size_t> new_layers = GetActiveLayers(bitrate_allocation);
307 
308   const bool layer_activation_requires_key_frame =
309       inter_layer_pred_ == InterLayerPredMode::kOff ||
310       inter_layer_pred_ == InterLayerPredMode::kOnKeyPic;
311   const bool lower_layers_enabled = new_layers.first < current_layers.first;
312   const bool higher_layers_enabled = new_layers.second > current_layers.second;
313   const bool disabled_layers = new_layers.first > current_layers.first ||
314                                new_layers.second < current_layers.second;
315 
316   if (lower_layers_enabled ||
317       (higher_layers_enabled && layer_activation_requires_key_frame) ||
318       (disabled_layers && layer_deactivation_requires_key_frame_)) {
319     force_key_frame_ = true;
320   }
321 
322   if (current_layers != new_layers) {
323     ss_info_needed_ = true;
324   }
325 
326   config_->rc_target_bitrate = bitrate_allocation.get_sum_kbps();
327 
328   if (ExplicitlyConfiguredSpatialLayers()) {
329     for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
330       const bool was_layer_active = (config_->ss_target_bitrate[sl_idx] > 0);
331       config_->ss_target_bitrate[sl_idx] =
332           bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000;
333 
334       for (size_t tl_idx = 0; tl_idx < num_temporal_layers_; ++tl_idx) {
335         config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] =
336             bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000;
337       }
338 
339       if (!was_layer_active) {
340         // Reset frame rate controller if layer is resumed after pause.
341         framerate_controller_[sl_idx].Reset();
342       }
343 
344       framerate_controller_[sl_idx].SetTargetRate(
345           codec_.spatialLayers[sl_idx].maxFramerate);
346     }
347   } else {
348     float rate_ratio[VPX_MAX_LAYERS] = {0};
349     float total = 0;
350     for (int i = 0; i < num_spatial_layers_; ++i) {
351       if (svc_params_.scaling_factor_num[i] <= 0 ||
352           svc_params_.scaling_factor_den[i] <= 0) {
353         RTC_LOG(LS_ERROR) << "Scaling factors not specified!";
354         return false;
355       }
356       rate_ratio[i] = static_cast<float>(svc_params_.scaling_factor_num[i]) /
357                       svc_params_.scaling_factor_den[i];
358       total += rate_ratio[i];
359     }
360 
361     for (int i = 0; i < num_spatial_layers_; ++i) {
362       RTC_CHECK_GT(total, 0);
363       config_->ss_target_bitrate[i] = static_cast<unsigned int>(
364           config_->rc_target_bitrate * rate_ratio[i] / total);
365       if (num_temporal_layers_ == 1) {
366         config_->layer_target_bitrate[i] = config_->ss_target_bitrate[i];
367       } else if (num_temporal_layers_ == 2) {
368         config_->layer_target_bitrate[i * num_temporal_layers_] =
369             config_->ss_target_bitrate[i] * 2 / 3;
370         config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
371             config_->ss_target_bitrate[i];
372       } else if (num_temporal_layers_ == 3) {
373         config_->layer_target_bitrate[i * num_temporal_layers_] =
374             config_->ss_target_bitrate[i] / 2;
375         config_->layer_target_bitrate[i * num_temporal_layers_ + 1] =
376             config_->layer_target_bitrate[i * num_temporal_layers_] +
377             (config_->ss_target_bitrate[i] / 4);
378         config_->layer_target_bitrate[i * num_temporal_layers_ + 2] =
379             config_->ss_target_bitrate[i];
380       } else {
381         RTC_LOG(LS_ERROR) << "Unsupported number of temporal layers: "
382                           << num_temporal_layers_;
383         return false;
384       }
385 
386       framerate_controller_[i].SetTargetRate(codec_.maxFramerate);
387     }
388   }
389 
390   num_active_spatial_layers_ = 0;
391   first_active_layer_ = 0;
392   bool seen_active_layer = false;
393   bool expect_no_more_active_layers = false;
394   for (int i = 0; i < num_spatial_layers_; ++i) {
395     if (config_->ss_target_bitrate[i] > 0) {
396       RTC_DCHECK(!expect_no_more_active_layers) << "Only middle layer is "
397                                                    "deactivated.";
398       if (!seen_active_layer) {
399         first_active_layer_ = i;
400       }
401       num_active_spatial_layers_ = i + 1;
402       seen_active_layer = true;
403     } else {
404       expect_no_more_active_layers = seen_active_layer;
405     }
406   }
407 
408   if (seen_active_layer && performance_flags_.use_per_layer_speed) {
409     bool denoiser_on =
410         AllowDenoising() && codec_.VP9()->denoisingOn &&
411         performance_flags_by_spatial_index_[num_active_spatial_layers_ - 1]
412             .allow_denoising;
413     libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
414                            denoiser_on ? 1 : 0);
415   }
416 
417   if (higher_layers_enabled && !force_key_frame_) {
418     // Prohibit drop of all layers for the next frame, so newly enabled
419     // layer would have a valid spatial reference.
420     for (size_t i = 0; i < num_spatial_layers_; ++i) {
421       svc_drop_frame_.framedrop_thresh[i] = 0;
422     }
423     force_all_active_layers_ = true;
424   }
425 
426   if (svc_controller_) {
427     for (int sid = 0; sid < num_spatial_layers_; ++sid) {
428       // Bitrates in `layer_target_bitrate` are accumulated for each temporal
429       // layer but in `VideoBitrateAllocation` they should be separated.
430       int previous_bitrate_kbps = 0;
431       for (int tid = 0; tid < num_temporal_layers_; ++tid) {
432         int accumulated_bitrate_kbps =
433             config_->layer_target_bitrate[sid * num_temporal_layers_ + tid];
434         int single_layer_bitrate_kbps =
435             accumulated_bitrate_kbps - previous_bitrate_kbps;
436         RTC_DCHECK_GE(single_layer_bitrate_kbps, 0);
437         current_bitrate_allocation_.SetBitrate(
438             sid, tid, single_layer_bitrate_kbps * 1'000);
439         previous_bitrate_kbps = accumulated_bitrate_kbps;
440       }
441     }
442     svc_controller_->OnRatesUpdated(current_bitrate_allocation_);
443   } else {
444     current_bitrate_allocation_ = bitrate_allocation;
445   }
446   config_changed_ = true;
447   return true;
448 }
449 
DisableSpatialLayer(int sid)450 void LibvpxVp9Encoder::DisableSpatialLayer(int sid) {
451   RTC_DCHECK_LT(sid, num_spatial_layers_);
452   if (config_->ss_target_bitrate[sid] == 0) {
453     return;
454   }
455   config_->ss_target_bitrate[sid] = 0;
456   for (int tid = 0; tid < num_temporal_layers_; ++tid) {
457     config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] = 0;
458   }
459   config_changed_ = true;
460 }
461 
EnableSpatialLayer(int sid)462 void LibvpxVp9Encoder::EnableSpatialLayer(int sid) {
463   RTC_DCHECK_LT(sid, num_spatial_layers_);
464   if (config_->ss_target_bitrate[sid] > 0) {
465     return;
466   }
467   for (int tid = 0; tid < num_temporal_layers_; ++tid) {
468     config_->layer_target_bitrate[sid * num_temporal_layers_ + tid] =
469         current_bitrate_allocation_.GetTemporalLayerSum(sid, tid) / 1000;
470   }
471   config_->ss_target_bitrate[sid] =
472       current_bitrate_allocation_.GetSpatialLayerSum(sid) / 1000;
473   RTC_DCHECK_GT(config_->ss_target_bitrate[sid], 0);
474   config_changed_ = true;
475 }
476 
SetActiveSpatialLayers()477 void LibvpxVp9Encoder::SetActiveSpatialLayers() {
478   // Svc controller may decide to skip a frame at certain spatial layer even
479   // when bitrate for it is non-zero, however libvpx uses configured bitrate as
480   // a signal which layers should be produced.
481   RTC_DCHECK(svc_controller_);
482   RTC_DCHECK(!layer_frames_.empty());
483   RTC_DCHECK(absl::c_is_sorted(
484       layer_frames_, [](const ScalableVideoController::LayerFrameConfig& lhs,
485                         const ScalableVideoController::LayerFrameConfig& rhs) {
486         return lhs.SpatialId() < rhs.SpatialId();
487       }));
488 
489   auto frame_it = layer_frames_.begin();
490   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
491     if (frame_it != layer_frames_.end() && frame_it->SpatialId() == sid) {
492       EnableSpatialLayer(sid);
493       ++frame_it;
494     } else {
495       DisableSpatialLayer(sid);
496     }
497   }
498 }
499 
SetRates(const RateControlParameters & parameters)500 void LibvpxVp9Encoder::SetRates(const RateControlParameters& parameters) {
501   if (!inited_) {
502     RTC_LOG(LS_WARNING) << "SetRates() called while uninitialized.";
503     return;
504   }
505   if (encoder_->err) {
506     RTC_LOG(LS_WARNING) << "Encoder in error state: " << encoder_->err;
507     return;
508   }
509   if (parameters.framerate_fps < 1.0) {
510     RTC_LOG(LS_WARNING) << "Unsupported framerate: "
511                         << parameters.framerate_fps;
512     return;
513   }
514 
515   codec_.maxFramerate = static_cast<uint32_t>(parameters.framerate_fps + 0.5);
516 
517   bool res = SetSvcRates(parameters.bitrate);
518   RTC_DCHECK(res) << "Failed to set new bitrate allocation";
519   config_changed_ = true;
520 }
521 
522 // TODO(eladalon): s/inst/codec_settings/g.
InitEncode(const VideoCodec * inst,const Settings & settings)523 int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
524                                  const Settings& settings) {
525   if (inst == nullptr) {
526     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
527   }
528   if (inst->maxFramerate < 1) {
529     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
530   }
531   // Allow zero to represent an unspecified maxBitRate
532   if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {
533     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
534   }
535   if (inst->width < 1 || inst->height < 1) {
536     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
537   }
538   if (settings.number_of_cores < 1) {
539     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
540   }
541   if (inst->VP9().numberOfTemporalLayers > 3) {
542     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
543   }
544   // libvpx probably does not support more than 3 spatial layers.
545   if (inst->VP9().numberOfSpatialLayers > 3) {
546     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
547   }
548 
549   absl::optional<vpx_img_fmt_t> previous_img_fmt =
550       raw_ ? absl::make_optional<vpx_img_fmt_t>(raw_->fmt) : absl::nullopt;
551 
552   int ret_val = Release();
553   if (ret_val < 0) {
554     return ret_val;
555   }
556   if (encoder_ == nullptr) {
557     encoder_ = new vpx_codec_ctx_t;
558     memset(encoder_, 0, sizeof(*encoder_));
559   }
560   if (config_ == nullptr) {
561     config_ = new vpx_codec_enc_cfg_t;
562     memset(config_, 0, sizeof(*config_));
563   }
564   timestamp_ = 0;
565   if (&codec_ != inst) {
566     codec_ = *inst;
567   }
568   memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
569 
570   force_key_frame_ = true;
571   pics_since_key_ = 0;
572 
573   absl::optional<ScalabilityMode> scalability_mode = inst->GetScalabilityMode();
574   if (scalability_mode.has_value()) {
575     // Use settings from `ScalabilityMode` identifier.
576     RTC_LOG(LS_INFO) << "Create scalability structure "
577                      << ScalabilityModeToString(*scalability_mode);
578     svc_controller_ = CreateScalabilityStructure(*scalability_mode);
579     if (!svc_controller_) {
580       RTC_LOG(LS_WARNING) << "Failed to create scalability structure.";
581       return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
582     }
583     ScalableVideoController::StreamLayersConfig info =
584         svc_controller_->StreamConfig();
585     num_spatial_layers_ = info.num_spatial_layers;
586     num_temporal_layers_ = info.num_temporal_layers;
587     inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode);
588   } else {
589     num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
590     RTC_DCHECK_GT(num_spatial_layers_, 0);
591     num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
592     if (num_temporal_layers_ == 0) {
593       num_temporal_layers_ = 1;
594     }
595     inter_layer_pred_ = inst->VP9().interLayerPred;
596     svc_controller_ = CreateVp9ScalabilityStructure(*inst);
597   }
598 
599   framerate_controller_ = std::vector<FramerateControllerDeprecated>(
600       num_spatial_layers_, FramerateControllerDeprecated(codec_.maxFramerate));
601 
602   is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1);
603 
604   // Populate encoder configuration with default values.
605   if (libvpx_->codec_enc_config_default(vpx_codec_vp9_cx(), config_, 0)) {
606     return WEBRTC_VIDEO_CODEC_ERROR;
607   }
608 
609   vpx_img_fmt img_fmt = VPX_IMG_FMT_NONE;
610   unsigned int bits_for_storage = 8;
611   switch (profile_) {
612     case VP9Profile::kProfile0:
613       img_fmt = previous_img_fmt.value_or(VPX_IMG_FMT_I420);
614       bits_for_storage = 8;
615       config_->g_bit_depth = VPX_BITS_8;
616       config_->g_profile = 0;
617       config_->g_input_bit_depth = 8;
618       break;
619     case VP9Profile::kProfile1:
620       // Encoding of profile 1 is not implemented. It would require extended
621       // support for I444, I422, and I440 buffers.
622       RTC_DCHECK_NOTREACHED();
623       break;
624     case VP9Profile::kProfile2:
625       img_fmt = VPX_IMG_FMT_I42016;
626       bits_for_storage = 16;
627       config_->g_bit_depth = VPX_BITS_10;
628       config_->g_profile = 2;
629       config_->g_input_bit_depth = 10;
630       break;
631     case VP9Profile::kProfile3:
632       // Encoding of profile 3 is not implemented.
633       RTC_DCHECK_NOTREACHED();
634       break;
635   }
636 
637   // Creating a wrapper to the image - setting image data to nullptr. Actual
638   // pointer will be set in encode. Setting align to 1, as it is meaningless
639   // (actual memory is not allocated).
640   raw_ = libvpx_->img_wrap(nullptr, img_fmt, codec_.width, codec_.height, 1,
641                            nullptr);
642   raw_->bit_depth = bits_for_storage;
643 
644   config_->g_w = codec_.width;
645   config_->g_h = codec_.height;
646   config_->rc_target_bitrate = inst->startBitrate;  // in kbit/s
647   config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0;
648   // Setting the time base of the codec.
649   config_->g_timebase.num = 1;
650   config_->g_timebase.den = 90000;
651   config_->g_lag_in_frames = 0;  // 0- no frame lagging
652   config_->g_threads = 1;
653   // Rate control settings.
654   config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0;
655   config_->rc_end_usage = VPX_CBR;
656   config_->g_pass = VPX_RC_ONE_PASS;
657   config_->rc_min_quantizer =
658       codec_.mode == VideoCodecMode::kScreensharing ? 8 : 2;
659   config_->rc_max_quantizer = 52;
660   config_->rc_undershoot_pct = 50;
661   config_->rc_overshoot_pct = 50;
662   config_->rc_buf_initial_sz = 500;
663   config_->rc_buf_optimal_sz = 600;
664   config_->rc_buf_sz = 1000;
665   // Set the maximum target size of any key-frame.
666   rc_max_intra_target_ = MaxIntraTarget(config_->rc_buf_optimal_sz);
667   // Key-frame interval is enforced manually by this wrapper.
668   config_->kf_mode = VPX_KF_DISABLED;
669   // TODO(webm:1592): work-around for libvpx issue, as it can still
670   // put some key-frames at will even in VPX_KF_DISABLED kf_mode.
671   config_->kf_max_dist = inst->VP9().keyFrameInterval;
672   config_->kf_min_dist = config_->kf_max_dist;
673   if (quality_scaler_experiment_.enabled) {
674     // In that experiment webrtc wide quality scaler is used instead of libvpx
675     // internal scaler.
676     config_->rc_resize_allowed = 0;
677   } else {
678     config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0;
679   }
680   // Determine number of threads based on the image size and #cores.
681   config_->g_threads =
682       NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores);
683 
684   is_flexible_mode_ = inst->VP9().flexibleMode;
685 
686   if (num_spatial_layers_ > 1 &&
687       codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
688     RTC_LOG(LS_ERROR) << "Flexible mode is required for screenshare with "
689                          "several spatial layers";
690     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
691   }
692 
693   // External reference control is required for different frame rate on spatial
694   // layers because libvpx generates rtp incompatible references in this case.
695   external_ref_control_ = external_ref_ctrl_ ||
696                           (num_spatial_layers_ > 1 &&
697                            codec_.mode == VideoCodecMode::kScreensharing) ||
698                           inter_layer_pred_ == InterLayerPredMode::kOn;
699 
700   if (num_temporal_layers_ == 1) {
701     gof_.SetGofInfoVP9(kTemporalStructureMode1);
702     config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING;
703     config_->ts_number_layers = 1;
704     config_->ts_rate_decimator[0] = 1;
705     config_->ts_periodicity = 1;
706     config_->ts_layer_id[0] = 0;
707   } else if (num_temporal_layers_ == 2) {
708     gof_.SetGofInfoVP9(kTemporalStructureMode2);
709     config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0101;
710     config_->ts_number_layers = 2;
711     config_->ts_rate_decimator[0] = 2;
712     config_->ts_rate_decimator[1] = 1;
713     config_->ts_periodicity = 2;
714     config_->ts_layer_id[0] = 0;
715     config_->ts_layer_id[1] = 1;
716   } else if (num_temporal_layers_ == 3) {
717     gof_.SetGofInfoVP9(kTemporalStructureMode3);
718     config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_0212;
719     config_->ts_number_layers = 3;
720     config_->ts_rate_decimator[0] = 4;
721     config_->ts_rate_decimator[1] = 2;
722     config_->ts_rate_decimator[2] = 1;
723     config_->ts_periodicity = 4;
724     config_->ts_layer_id[0] = 0;
725     config_->ts_layer_id[1] = 2;
726     config_->ts_layer_id[2] = 1;
727     config_->ts_layer_id[3] = 2;
728   } else {
729     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
730   }
731 
732   if (external_ref_control_) {
733     config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
734     if (num_temporal_layers_ > 1 && num_spatial_layers_ > 1 &&
735         codec_.mode == VideoCodecMode::kScreensharing) {
736       // External reference control for several temporal layers with different
737       // frame rates on spatial layers is not implemented yet.
738       return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
739     }
740   }
741   ref_buf_ = {};
742 
743   return InitAndSetControlSettings(inst);
744 }
745 
NumberOfThreads(int width,int height,int number_of_cores)746 int LibvpxVp9Encoder::NumberOfThreads(int width,
747                                       int height,
748                                       int number_of_cores) {
749   // Keep the number of encoder threads equal to the possible number of column
750   // tiles, which is (1, 2, 4, 8). See comments below for VP9E_SET_TILE_COLUMNS.
751   if (width * height >= 1280 * 720 && number_of_cores > 4) {
752     return 4;
753   } else if (width * height >= 640 * 360 && number_of_cores > 2) {
754     return 2;
755   } else {
756 // Use 2 threads for low res on ARM.
757 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || \
758     defined(WEBRTC_ANDROID)
759     if (width * height >= 320 * 180 && number_of_cores > 2) {
760       return 2;
761     }
762 #endif
763     // 1 thread less than VGA.
764     return 1;
765   }
766 }
767 
InitAndSetControlSettings(const VideoCodec * inst)768 int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
769   // Set QP-min/max per spatial and temporal layer.
770   int tot_num_layers = num_spatial_layers_ * num_temporal_layers_;
771   for (int i = 0; i < tot_num_layers; ++i) {
772     svc_params_.max_quantizers[i] = config_->rc_max_quantizer;
773     svc_params_.min_quantizers[i] = config_->rc_min_quantizer;
774   }
775   config_->ss_number_layers = num_spatial_layers_;
776   if (svc_controller_) {
777     auto stream_config = svc_controller_->StreamConfig();
778     for (int i = 0; i < stream_config.num_spatial_layers; ++i) {
779       svc_params_.scaling_factor_num[i] = stream_config.scaling_factor_num[i];
780       svc_params_.scaling_factor_den[i] = stream_config.scaling_factor_den[i];
781     }
782   } else if (ExplicitlyConfiguredSpatialLayers()) {
783     for (int i = 0; i < num_spatial_layers_; ++i) {
784       const auto& layer = codec_.spatialLayers[i];
785       RTC_CHECK_GT(layer.width, 0);
786       const int scale_factor = codec_.width / layer.width;
787       RTC_DCHECK_GT(scale_factor, 0);
788 
789       // Ensure scaler factor is integer.
790       if (scale_factor * layer.width != codec_.width) {
791         return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
792       }
793 
794       // Ensure scale factor is the same in both dimensions.
795       if (scale_factor * layer.height != codec_.height) {
796         return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
797       }
798 
799       // Ensure scale factor is power of two.
800       const bool is_pow_of_two = (scale_factor & (scale_factor - 1)) == 0;
801       if (!is_pow_of_two) {
802         return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
803       }
804 
805       svc_params_.scaling_factor_num[i] = 1;
806       svc_params_.scaling_factor_den[i] = scale_factor;
807 
808       RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0);
809       RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate);
810       if (i > 0) {
811         // Frame rate of high spatial layer is supposed to be equal or higher
812         // than frame rate of low spatial layer.
813         RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate,
814                       codec_.spatialLayers[i - 1].maxFramerate);
815       }
816     }
817   } else {
818     int scaling_factor_num = 256;
819     for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
820       // 1:2 scaling in each dimension.
821       svc_params_.scaling_factor_num[i] = scaling_factor_num;
822       svc_params_.scaling_factor_den[i] = 256;
823     }
824   }
825 
826   UpdatePerformanceFlags();
827   RTC_DCHECK_EQ(performance_flags_by_spatial_index_.size(),
828                 static_cast<size_t>(num_spatial_layers_));
829 
830   SvcRateAllocator init_allocator(codec_);
831   current_bitrate_allocation_ =
832       init_allocator.Allocate(VideoBitrateAllocationParameters(
833           inst->startBitrate * 1000, inst->maxFramerate));
834   if (!SetSvcRates(current_bitrate_allocation_)) {
835     return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
836   }
837 
838   const vpx_codec_err_t rv = libvpx_->codec_enc_init(
839       encoder_, vpx_codec_vp9_cx(), config_,
840       config_->g_bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH);
841   if (rv != VPX_CODEC_OK) {
842     RTC_LOG(LS_ERROR) << "Init error: " << libvpx_->codec_err_to_string(rv);
843     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
844   }
845 
846   if (performance_flags_.use_per_layer_speed) {
847     for (int si = 0; si < num_spatial_layers_; ++si) {
848       svc_params_.speed_per_layer[si] =
849           performance_flags_by_spatial_index_[si].base_layer_speed;
850       svc_params_.loopfilter_ctrl[si] =
851           performance_flags_by_spatial_index_[si].deblock_mode;
852     }
853     bool denoiser_on =
854         AllowDenoising() && inst->VP9().denoisingOn &&
855         performance_flags_by_spatial_index_[num_spatial_layers_ - 1]
856             .allow_denoising;
857     libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
858                            denoiser_on ? 1 : 0);
859   }
860 
861   libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
862                          rc_max_intra_target_);
863   libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE,
864                          inst->VP9().adaptiveQpMode ? 3 : 0);
865 
866   libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
867   libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
868 
869   if (is_svc_) {
870     libvpx_->codec_control(encoder_, VP9E_SET_SVC, 1);
871     libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
872   }
873   if (!is_svc_ || !performance_flags_.use_per_layer_speed) {
874     libvpx_->codec_control(
875         encoder_, VP8E_SET_CPUUSED,
876         performance_flags_by_spatial_index_.rbegin()->base_layer_speed);
877   }
878 
879   if (num_spatial_layers_ > 1) {
880     switch (inter_layer_pred_) {
881       case InterLayerPredMode::kOn:
882         libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0);
883         break;
884       case InterLayerPredMode::kOff:
885         libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1);
886         break;
887       case InterLayerPredMode::kOnKeyPic:
888         libvpx_->codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2);
889         break;
890       default:
891         RTC_DCHECK_NOTREACHED();
892     }
893 
894     memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_));
895     const bool reverse_constrained_drop_mode =
896         inter_layer_pred_ == InterLayerPredMode::kOn &&
897         codec_.mode == VideoCodecMode::kScreensharing &&
898         num_spatial_layers_ > 1;
899     if (reverse_constrained_drop_mode) {
900       // Screenshare dropping mode: drop a layer only together with all lower
901       // layers. This ensures that drops on lower layers won't reduce frame-rate
902       // for higher layers and reference structure is RTP-compatible.
903 #if 0
904       // CONSTRAINED_FROM_ABOVE_DROP is not defined in the available version of
905       // libvpx
906       svc_drop_frame_.framedrop_mode = CONSTRAINED_FROM_ABOVE_DROP;
907 #else
908       abort();
909 #endif
910       svc_drop_frame_.max_consec_drop = 5;
911       for (size_t i = 0; i < num_spatial_layers_; ++i) {
912         svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
913       }
914       // No buffering is needed because the highest layer is always present in
915       // all frames in CONSTRAINED_FROM_ABOVE drop mode.
916       layer_buffering_ = false;
917     } else {
918       // Configure encoder to drop entire superframe whenever it needs to drop
919       // a layer. This mode is preferred over per-layer dropping which causes
920       // quality flickering and is not compatible with RTP non-flexible mode.
921       svc_drop_frame_.framedrop_mode =
922           full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
923       // Buffering is needed only for constrained layer drop, as it's not clear
924       // which frame is the last.
925       layer_buffering_ = !full_superframe_drop_;
926       svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max();
927       for (size_t i = 0; i < num_spatial_layers_; ++i) {
928         svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
929       }
930     }
931     libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
932                            &svc_drop_frame_);
933   }
934 
935   // Register callback for getting each spatial layer.
936   vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
937       LibvpxVp9Encoder::EncoderOutputCodedPacketCallback,
938       reinterpret_cast<void*>(this)};
939   libvpx_->codec_control(encoder_, VP9E_REGISTER_CX_CALLBACK,
940                          reinterpret_cast<void*>(&cbp));
941 
942   // Control function to set the number of column tiles in encoding a frame, in
943   // log2 unit: e.g., 0 = 1 tile column, 1 = 2 tile columns, 2 = 4 tile columns.
944   // The number tile columns will be capped by the encoder based on image size
945   // (minimum width of tile column is 256 pixels, maximum is 4096).
946   libvpx_->codec_control(encoder_, VP9E_SET_TILE_COLUMNS,
947                          static_cast<int>((config_->g_threads >> 1)));
948 
949   // Turn on row-based multithreading.
950   libvpx_->codec_control(encoder_, VP9E_SET_ROW_MT, 1);
951 
952   if (AllowDenoising() && !performance_flags_.use_per_layer_speed) {
953     libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
954                            inst->VP9().denoisingOn ? 1 : 0);
955   }
956 
957   if (codec_.mode == VideoCodecMode::kScreensharing) {
958     // Adjust internal parameters to screen content.
959     libvpx_->codec_control(encoder_, VP9E_SET_TUNE_CONTENT, 1);
960   }
961   // Enable encoder skip of static/low content blocks.
962   libvpx_->codec_control(encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
963   inited_ = true;
964   config_changed_ = true;
965   return WEBRTC_VIDEO_CODEC_OK;
966 }
967 
MaxIntraTarget(uint32_t optimal_buffer_size)968 uint32_t LibvpxVp9Encoder::MaxIntraTarget(uint32_t optimal_buffer_size) {
969   // Set max to the optimal buffer level (normalized by target BR),
970   // and scaled by a scale_par.
971   // Max target size = scale_par * optimal_buffer_size * targetBR[Kbps].
972   // This value is presented in percentage of perFrameBw:
973   // perFrameBw = targetBR[Kbps] * 1000 / framerate.
974   // The target in % is as follows:
975   float scale_par = 0.5;
976   uint32_t target_pct =
977       optimal_buffer_size * scale_par * codec_.maxFramerate / 10;
978   // Don't go below 3 times the per frame bandwidth.
979   const uint32_t min_intra_size = 300;
980   return (target_pct < min_intra_size) ? min_intra_size : target_pct;
981 }
982 
Encode(const VideoFrame & input_image,const std::vector<VideoFrameType> * frame_types)983 int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
984                              const std::vector<VideoFrameType>* frame_types) {
985   if (!inited_) {
986     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
987   }
988   if (encoded_complete_callback_ == nullptr) {
989     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
990   }
991   if (num_active_spatial_layers_ == 0) {
992     // All spatial layers are disabled, return without encoding anything.
993     return WEBRTC_VIDEO_CODEC_OK;
994   }
995 
996   // We only support one stream at the moment.
997   if (frame_types && !frame_types->empty()) {
998     if ((*frame_types)[0] == VideoFrameType::kVideoFrameKey) {
999       force_key_frame_ = true;
1000     }
1001   }
1002 
1003   if (pics_since_key_ + 1 ==
1004       static_cast<size_t>(codec_.VP9()->keyFrameInterval)) {
1005     force_key_frame_ = true;
1006   }
1007 
1008   if (svc_controller_) {
1009     layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_);
1010     if (layer_frames_.empty()) {
1011       return WEBRTC_VIDEO_CODEC_ERROR;
1012     }
1013     if (layer_frames_.front().IsKeyframe()) {
1014       force_key_frame_ = true;
1015     }
1016   }
1017 
1018   vpx_svc_layer_id_t layer_id = {0};
1019   if (!force_key_frame_) {
1020     const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
1021     layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
1022 
1023     if (codec_.mode == VideoCodecMode::kScreensharing) {
1024       const uint32_t frame_timestamp_ms =
1025           1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
1026 
1027       // To ensure that several rate-limiters with different limits don't
1028       // interfere, they must be queried in order of increasing limit.
1029 
1030       bool use_steady_state_limiter =
1031           variable_framerate_experiment_.enabled &&
1032           input_image.update_rect().IsEmpty() &&
1033           num_steady_state_frames_ >=
1034               variable_framerate_experiment_.frames_before_steady_state;
1035 
1036       // Need to check all frame limiters, even if lower layers are disabled,
1037       // because variable frame-rate limiter should be checked after the first
1038       // layer. It's easier to overwrite active layers after, then check all
1039       // cases.
1040       for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
1041         const float layer_fps =
1042             framerate_controller_[layer_id.spatial_layer_id].GetTargetRate();
1043         // Use steady state rate-limiter at the correct place.
1044         if (use_steady_state_limiter &&
1045             layer_fps > variable_framerate_experiment_.framerate_limit - 1e-9) {
1046           if (variable_framerate_controller_.DropFrame(frame_timestamp_ms)) {
1047             layer_id.spatial_layer_id = num_active_spatial_layers_;
1048           }
1049           // Break always: if rate limiter triggered frame drop, no need to
1050           // continue; otherwise, the rate is less than the next limiters.
1051           break;
1052         }
1053         if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
1054           ++layer_id.spatial_layer_id;
1055         } else {
1056           break;
1057         }
1058       }
1059 
1060       if (use_steady_state_limiter &&
1061           layer_id.spatial_layer_id < num_active_spatial_layers_) {
1062         variable_framerate_controller_.AddFrame(frame_timestamp_ms);
1063       }
1064     }
1065 
1066     if (force_all_active_layers_) {
1067       layer_id.spatial_layer_id = first_active_layer_;
1068       force_all_active_layers_ = false;
1069     }
1070 
1071     RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_);
1072     if (layer_id.spatial_layer_id >= num_active_spatial_layers_) {
1073       // Drop entire picture.
1074       return WEBRTC_VIDEO_CODEC_OK;
1075     }
1076   }
1077 
1078   // Need to set temporal layer id on ALL layers, even disabled ones.
1079   // Otherwise libvpx might produce frames on a disabled layer:
1080   // http://crbug.com/1051476
1081   for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
1082     layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id;
1083   }
1084 
1085   if (layer_id.spatial_layer_id < first_active_layer_) {
1086     layer_id.spatial_layer_id = first_active_layer_;
1087   }
1088 
1089   if (svc_controller_) {
1090     layer_id.spatial_layer_id = layer_frames_.front().SpatialId();
1091     layer_id.temporal_layer_id = layer_frames_.front().TemporalId();
1092     for (const auto& layer : layer_frames_) {
1093       layer_id.temporal_layer_id_per_spatial[layer.SpatialId()] =
1094           layer.TemporalId();
1095     }
1096     SetActiveSpatialLayers();
1097   }
1098 
1099   if (is_svc_ && performance_flags_.use_per_layer_speed) {
1100     // Update speed settings that might depend on temporal index.
1101     bool speed_updated = false;
1102     for (int sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) {
1103       const int target_speed =
1104           layer_id.temporal_layer_id_per_spatial[sl_idx] == 0
1105               ? performance_flags_by_spatial_index_[sl_idx].base_layer_speed
1106               : performance_flags_by_spatial_index_[sl_idx].high_layer_speed;
1107       if (svc_params_.speed_per_layer[sl_idx] != target_speed) {
1108         svc_params_.speed_per_layer[sl_idx] = target_speed;
1109         speed_updated = true;
1110       }
1111     }
1112     if (speed_updated) {
1113       libvpx_->codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_);
1114     }
1115   }
1116 
1117   libvpx_->codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
1118 
1119   if (num_spatial_layers_ > 1) {
1120     // Update frame dropping settings as they may change on per-frame basis.
1121     libvpx_->codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
1122                            &svc_drop_frame_);
1123   }
1124 
1125   if (config_changed_) {
1126     if (libvpx_->codec_enc_config_set(encoder_, config_)) {
1127       return WEBRTC_VIDEO_CODEC_ERROR;
1128     }
1129 
1130     if (!performance_flags_.use_per_layer_speed) {
1131       // Not setting individual speeds per layer, find the highest active
1132       // resolution instead and base the speed on that.
1133       for (int i = num_spatial_layers_ - 1; i >= 0; --i) {
1134         if (config_->ss_target_bitrate[i] > 0) {
1135           int width = (svc_params_.scaling_factor_num[i] * config_->g_w) /
1136                       svc_params_.scaling_factor_den[i];
1137           int height = (svc_params_.scaling_factor_num[i] * config_->g_h) /
1138                        svc_params_.scaling_factor_den[i];
1139           int speed =
1140               std::prev(performance_flags_.settings_by_resolution.lower_bound(
1141                             width * height))
1142                   ->second.base_layer_speed;
1143           libvpx_->codec_control(encoder_, VP8E_SET_CPUUSED, speed);
1144           break;
1145         }
1146       }
1147     }
1148     config_changed_ = false;
1149   }
1150 
1151   RTC_DCHECK_EQ(input_image.width(), raw_->d_w);
1152   RTC_DCHECK_EQ(input_image.height(), raw_->d_h);
1153 
1154   // Set input image for use in the callback.
1155   // This was necessary since you need some information from input_image.
1156   // You can save only the necessary information (such as timestamp) instead of
1157   // doing this.
1158   input_image_ = &input_image;
1159 
1160   // In case we need to map the buffer, `mapped_buffer` is used to keep it alive
1161   // through reference counting until after encoding has finished.
1162   rtc::scoped_refptr<const VideoFrameBuffer> mapped_buffer;
1163   const I010BufferInterface* i010_buffer;
1164   rtc::scoped_refptr<const I010BufferInterface> i010_copy;
1165   switch (profile_) {
1166     case VP9Profile::kProfile0: {
1167       mapped_buffer =
1168           PrepareBufferForProfile0(input_image.video_frame_buffer());
1169       if (!mapped_buffer) {
1170         return WEBRTC_VIDEO_CODEC_ERROR;
1171       }
1172       break;
1173     }
1174     case VP9Profile::kProfile1: {
1175       RTC_DCHECK_NOTREACHED();
1176       break;
1177     }
1178     case VP9Profile::kProfile2: {
1179       // We can inject kI010 frames directly for encode. All other formats
1180       // should be converted to it.
1181       switch (input_image.video_frame_buffer()->type()) {
1182         case VideoFrameBuffer::Type::kI010: {
1183           i010_buffer = input_image.video_frame_buffer()->GetI010();
1184           break;
1185         }
1186         default: {
1187           auto i420_buffer = input_image.video_frame_buffer()->ToI420();
1188           if (!i420_buffer) {
1189             RTC_LOG(LS_ERROR) << "Failed to convert "
1190                               << VideoFrameBufferTypeToString(
1191                                      input_image.video_frame_buffer()->type())
1192                               << " image to I420. Can't encode frame.";
1193             return WEBRTC_VIDEO_CODEC_ERROR;
1194           }
1195           i010_copy = I010Buffer::Copy(*i420_buffer);
1196           i010_buffer = i010_copy.get();
1197         }
1198       }
1199       raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(
1200           reinterpret_cast<const uint8_t*>(i010_buffer->DataY()));
1201       raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(
1202           reinterpret_cast<const uint8_t*>(i010_buffer->DataU()));
1203       raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(
1204           reinterpret_cast<const uint8_t*>(i010_buffer->DataV()));
1205       raw_->stride[VPX_PLANE_Y] = i010_buffer->StrideY() * 2;
1206       raw_->stride[VPX_PLANE_U] = i010_buffer->StrideU() * 2;
1207       raw_->stride[VPX_PLANE_V] = i010_buffer->StrideV() * 2;
1208       break;
1209     }
1210     case VP9Profile::kProfile3: {
1211       RTC_DCHECK_NOTREACHED();
1212       break;
1213     }
1214   }
1215 
1216   vpx_enc_frame_flags_t flags = 0;
1217   if (force_key_frame_) {
1218     flags = VPX_EFLAG_FORCE_KF;
1219   }
1220 
1221   if (svc_controller_) {
1222     vpx_svc_ref_frame_config_t ref_config = Vp9References(layer_frames_);
1223     libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG,
1224                            &ref_config);
1225   } else if (external_ref_control_) {
1226     vpx_svc_ref_frame_config_t ref_config =
1227         SetReferences(force_key_frame_, layer_id.spatial_layer_id);
1228 
1229     if (VideoCodecMode::kScreensharing == codec_.mode) {
1230       for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
1231         ref_config.duration[sl_idx] = static_cast<int64_t>(
1232             90000 / (std::min(static_cast<float>(codec_.maxFramerate),
1233                               framerate_controller_[sl_idx].GetTargetRate())));
1234       }
1235     }
1236 
1237     libvpx_->codec_control(encoder_, VP9E_SET_SVC_REF_FRAME_CONFIG,
1238                            &ref_config);
1239   }
1240 
1241   first_frame_in_picture_ = true;
1242 
1243   // TODO(ssilkin): Frame duration should be specified per spatial layer
1244   // since their frame rate can be different. For now calculate frame duration
1245   // based on target frame rate of the highest spatial layer, which frame rate
1246   // is supposed to be equal or higher than frame rate of low spatial layers.
1247   // Also, timestamp should represent actual time passed since previous frame
1248   // (not 'expected' time). Then rate controller can drain buffer more
1249   // accurately.
1250   RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_);
1251   float target_framerate_fps =
1252       (codec_.mode == VideoCodecMode::kScreensharing)
1253           ? std::min(static_cast<float>(codec_.maxFramerate),
1254                      framerate_controller_[num_active_spatial_layers_ - 1]
1255                          .GetTargetRate())
1256           : codec_.maxFramerate;
1257   uint32_t duration = static_cast<uint32_t>(90000 / target_framerate_fps);
1258   const vpx_codec_err_t rv = libvpx_->codec_encode(
1259       encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME);
1260   if (rv != VPX_CODEC_OK) {
1261     RTC_LOG(LS_ERROR) << "Encoding error: " << libvpx_->codec_err_to_string(rv)
1262                       << "\n"
1263                          "Details: "
1264                       << libvpx_->codec_error(encoder_) << "\n"
1265                       << libvpx_->codec_error_detail(encoder_);
1266     return WEBRTC_VIDEO_CODEC_ERROR;
1267   }
1268   timestamp_ += duration;
1269 
1270   if (layer_buffering_) {
1271     const bool end_of_picture = true;
1272     DeliverBufferedFrame(end_of_picture);
1273   }
1274 
1275   return WEBRTC_VIDEO_CODEC_OK;
1276 }
1277 
PopulateCodecSpecific(CodecSpecificInfo * codec_specific,absl::optional<int> * spatial_idx,absl::optional<int> * temporal_idx,const vpx_codec_cx_pkt & pkt)1278 bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
1279                                              absl::optional<int>* spatial_idx,
1280                                              absl::optional<int>* temporal_idx,
1281                                              const vpx_codec_cx_pkt& pkt) {
1282   RTC_CHECK(codec_specific != nullptr);
1283   codec_specific->codecType = kVideoCodecVP9;
1284   CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9);
1285 
1286   vp9_info->first_frame_in_picture = first_frame_in_picture_;
1287   vp9_info->flexible_mode = is_flexible_mode_;
1288 
1289   if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
1290     pics_since_key_ = 0;
1291   } else if (first_frame_in_picture_) {
1292     ++pics_since_key_;
1293   }
1294 
1295   vpx_svc_layer_id_t layer_id = {0};
1296   libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1297 
1298   // Can't have keyframe with non-zero temporal layer.
1299   RTC_DCHECK(pics_since_key_ != 0 || layer_id.temporal_layer_id == 0);
1300 
1301   RTC_CHECK_GT(num_temporal_layers_, 0);
1302   RTC_CHECK_GT(num_active_spatial_layers_, 0);
1303   if (num_temporal_layers_ == 1) {
1304     RTC_CHECK_EQ(layer_id.temporal_layer_id, 0);
1305     vp9_info->temporal_idx = kNoTemporalIdx;
1306     *temporal_idx = absl::nullopt;
1307   } else {
1308     vp9_info->temporal_idx = layer_id.temporal_layer_id;
1309     *temporal_idx = layer_id.temporal_layer_id;
1310   }
1311   if (num_active_spatial_layers_ == 1) {
1312     RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
1313     *spatial_idx = absl::nullopt;
1314   } else {
1315     *spatial_idx = layer_id.spatial_layer_id;
1316   }
1317 
1318   const bool is_key_pic = (pics_since_key_ == 0);
1319   const bool is_inter_layer_pred_allowed =
1320       (inter_layer_pred_ == InterLayerPredMode::kOn ||
1321        (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic));
1322 
1323   // Always set inter_layer_predicted to true on high layer frame if inter-layer
1324   // prediction (ILP) is allowed even if encoder didn't actually use it.
1325   // Setting inter_layer_predicted to false would allow receiver to decode high
1326   // layer frame without decoding low layer frame. If that would happen (e.g.
1327   // if low layer frame is lost) then receiver won't be able to decode next high
1328   // layer frame which uses ILP.
1329   vp9_info->inter_layer_predicted =
1330       first_frame_in_picture_ ? false : is_inter_layer_pred_allowed;
1331 
1332   // Mark all low spatial layer frames as references (not just frames of
1333   // active low spatial layers) if inter-layer prediction is enabled since
1334   // these frames are indirect references of high spatial layer, which can
1335   // later be enabled without key frame.
1336   vp9_info->non_ref_for_inter_layer_pred =
1337       !is_inter_layer_pred_allowed ||
1338       layer_id.spatial_layer_id + 1 == num_spatial_layers_;
1339 
1340   // Always populate this, so that the packetizer can properly set the marker
1341   // bit.
1342   vp9_info->num_spatial_layers = num_active_spatial_layers_;
1343   vp9_info->first_active_layer = first_active_layer_;
1344 
1345   vp9_info->num_ref_pics = 0;
1346   FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted,
1347                        vp9_info);
1348   if (vp9_info->flexible_mode) {
1349     vp9_info->gof_idx = kNoGofIdx;
1350     if (!svc_controller_) {
1351       if (num_temporal_layers_ == 1) {
1352         vp9_info->temporal_up_switch = true;
1353       } else {
1354         // In flexible mode with > 1 temporal layer but no SVC controller we
1355         // can't techincally determine if a frame is an upswitch point, use
1356         // gof-based data as proxy for now.
1357         // TODO(sprang): Remove once SVC controller is the only choice.
1358         vp9_info->gof_idx =
1359             static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
1360         vp9_info->temporal_up_switch =
1361             gof_.temporal_up_switch[vp9_info->gof_idx];
1362       }
1363     }
1364   } else {
1365     vp9_info->gof_idx =
1366         static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
1367     vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
1368     RTC_DCHECK(vp9_info->num_ref_pics == gof_.num_ref_pics[vp9_info->gof_idx] ||
1369                vp9_info->num_ref_pics == 0);
1370   }
1371 
1372   vp9_info->inter_pic_predicted = (!is_key_pic && vp9_info->num_ref_pics > 0);
1373 
1374   // Write SS on key frame of independently coded spatial layers and on base
1375   // temporal/spatial layer frame if number of layers changed without issuing
1376   // of key picture (inter-layer prediction is enabled).
1377   const bool is_key_frame = is_key_pic && !vp9_info->inter_layer_predicted;
1378   if (is_key_frame || (ss_info_needed_ && layer_id.temporal_layer_id == 0 &&
1379                        layer_id.spatial_layer_id == first_active_layer_)) {
1380     vp9_info->ss_data_available = true;
1381     vp9_info->spatial_layer_resolution_present = true;
1382     // Signal disabled layers.
1383     for (size_t i = 0; i < first_active_layer_; ++i) {
1384       vp9_info->width[i] = 0;
1385       vp9_info->height[i] = 0;
1386     }
1387     for (size_t i = first_active_layer_; i < num_active_spatial_layers_; ++i) {
1388       vp9_info->width[i] = codec_.width * svc_params_.scaling_factor_num[i] /
1389                            svc_params_.scaling_factor_den[i];
1390       vp9_info->height[i] = codec_.height * svc_params_.scaling_factor_num[i] /
1391                             svc_params_.scaling_factor_den[i];
1392     }
1393     if (vp9_info->flexible_mode) {
1394       vp9_info->gof.num_frames_in_gof = 0;
1395     } else {
1396       vp9_info->gof.CopyGofInfoVP9(gof_);
1397     }
1398 
1399     ss_info_needed_ = false;
1400   } else {
1401     vp9_info->ss_data_available = false;
1402   }
1403 
1404   first_frame_in_picture_ = false;
1405 
1406   // Populate codec-agnostic section in the codec specific structure.
1407   if (svc_controller_) {
1408     auto it = absl::c_find_if(
1409         layer_frames_,
1410         [&](const ScalableVideoController::LayerFrameConfig& config) {
1411           return config.SpatialId() == layer_id.spatial_layer_id;
1412         });
1413     if (it == layer_frames_.end()) {
1414       RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S"
1415                         << layer_id.spatial_layer_id << "T"
1416                         << layer_id.temporal_layer_id
1417                         << " that wasn't requested.";
1418       return false;
1419     }
1420     codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it);
1421     if (is_key_frame) {
1422       codec_specific->template_structure =
1423           svc_controller_->DependencyStructure();
1424       auto& resolutions = codec_specific->template_structure->resolutions;
1425       resolutions.resize(num_spatial_layers_);
1426       for (int sid = 0; sid < num_spatial_layers_; ++sid) {
1427         resolutions[sid] = RenderResolution(
1428             /*width=*/codec_.width * svc_params_.scaling_factor_num[sid] /
1429                 svc_params_.scaling_factor_den[sid],
1430             /*height=*/codec_.height * svc_params_.scaling_factor_num[sid] /
1431                 svc_params_.scaling_factor_den[sid]);
1432       }
1433     }
1434     if (is_flexible_mode_) {
1435       // Populate data for legacy temporal-upswitch state.
1436       // We can switch up to a higher temporal layer only if all temporal layers
1437       // higher than this (within the current spatial layer) are switch points.
1438       vp9_info->temporal_up_switch = true;
1439       for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_;
1440            ++i) {
1441         // Assumes decode targets are always ordered first by spatial then by
1442         // temporal id.
1443         size_t dti_index =
1444             (layer_id.spatial_layer_id * num_temporal_layers_) + i;
1445         vp9_info->temporal_up_switch &=
1446             (codec_specific->generic_frame_info
1447                  ->decode_target_indications[dti_index] ==
1448              DecodeTargetIndication::kSwitch);
1449       }
1450     }
1451   }
1452   return true;
1453 }
1454 
FillReferenceIndices(const vpx_codec_cx_pkt & pkt,const size_t pic_num,const bool inter_layer_predicted,CodecSpecificInfoVP9 * vp9_info)1455 void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
1456                                             const size_t pic_num,
1457                                             const bool inter_layer_predicted,
1458                                             CodecSpecificInfoVP9* vp9_info) {
1459   vpx_svc_layer_id_t layer_id = {0};
1460   libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1461 
1462   const bool is_key_frame =
1463       (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
1464 
1465   std::vector<RefFrameBuffer> ref_buf_list;
1466 
1467   if (is_svc_) {
1468     vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
1469     libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG,
1470                            &enc_layer_conf);
1471     char ref_buf_flags[] = "00000000";
1472     // There should be one character per buffer + 1 termination '\0'.
1473     static_assert(sizeof(ref_buf_flags) == kNumVp9Buffers + 1);
1474 
1475     if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) {
1476       const size_t fb_idx =
1477           enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id];
1478       RTC_DCHECK_LT(fb_idx, ref_buf_.size());
1479       if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
1480                     ref_buf_[fb_idx]) == ref_buf_list.end()) {
1481         ref_buf_list.push_back(ref_buf_[fb_idx]);
1482         ref_buf_flags[fb_idx] = '1';
1483       }
1484     }
1485 
1486     if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) {
1487       const size_t fb_idx =
1488           enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id];
1489       RTC_DCHECK_LT(fb_idx, ref_buf_.size());
1490       if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
1491                     ref_buf_[fb_idx]) == ref_buf_list.end()) {
1492         ref_buf_list.push_back(ref_buf_[fb_idx]);
1493         ref_buf_flags[fb_idx] = '1';
1494       }
1495     }
1496 
1497     if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) {
1498       const size_t fb_idx =
1499           enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id];
1500       RTC_DCHECK_LT(fb_idx, ref_buf_.size());
1501       if (std::find(ref_buf_list.begin(), ref_buf_list.end(),
1502                     ref_buf_[fb_idx]) == ref_buf_list.end()) {
1503         ref_buf_list.push_back(ref_buf_[fb_idx]);
1504         ref_buf_flags[fb_idx] = '1';
1505       }
1506     }
1507 
1508     RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
1509                         << layer_id.spatial_layer_id << " tl "
1510                         << layer_id.temporal_layer_id << " refered buffers "
1511                         << ref_buf_flags;
1512 
1513   } else if (!is_key_frame) {
1514     RTC_DCHECK_EQ(num_spatial_layers_, 1);
1515     RTC_DCHECK_EQ(num_temporal_layers_, 1);
1516     // In non-SVC mode encoder doesn't provide reference list. Assume each frame
1517     // refers previous one, which is stored in buffer 0.
1518     ref_buf_list.push_back(ref_buf_[0]);
1519   }
1520 
1521   std::vector<size_t> ref_pid_list;
1522 
1523   vp9_info->num_ref_pics = 0;
1524   for (const RefFrameBuffer& ref_buf : ref_buf_list) {
1525     RTC_DCHECK_LE(ref_buf.pic_num, pic_num);
1526     if (ref_buf.pic_num < pic_num) {
1527       if (inter_layer_pred_ != InterLayerPredMode::kOn) {
1528         // RTP spec limits temporal prediction to the same spatial layer.
1529         // It is safe to ignore this requirement if inter-layer prediction is
1530         // enabled for all frames when all base frames are relayed to receiver.
1531         RTC_DCHECK_EQ(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
1532       } else {
1533         RTC_DCHECK_LE(ref_buf.spatial_layer_id, layer_id.spatial_layer_id);
1534       }
1535       RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id);
1536 
1537       // Encoder may reference several spatial layers on the same previous
1538       // frame in case if some spatial layers are skipped on the current frame.
1539       // We shouldn't put duplicate references as it may break some old
1540       // clients and isn't RTP compatible.
1541       if (std::find(ref_pid_list.begin(), ref_pid_list.end(),
1542                     ref_buf.pic_num) != ref_pid_list.end()) {
1543         continue;
1544       }
1545       ref_pid_list.push_back(ref_buf.pic_num);
1546 
1547       const size_t p_diff = pic_num - ref_buf.pic_num;
1548       RTC_DCHECK_LE(p_diff, 127UL);
1549 
1550       vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff);
1551       ++vp9_info->num_ref_pics;
1552     } else {
1553       RTC_DCHECK(inter_layer_predicted);
1554       // RTP spec only allows to use previous spatial layer for inter-layer
1555       // prediction.
1556       RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id);
1557     }
1558   }
1559 }
1560 
UpdateReferenceBuffers(const vpx_codec_cx_pkt & pkt,const size_t pic_num)1561 void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
1562                                               const size_t pic_num) {
1563   vpx_svc_layer_id_t layer_id = {0};
1564   libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1565 
1566   RefFrameBuffer frame_buf = {.pic_num = pic_num,
1567                               .spatial_layer_id = layer_id.spatial_layer_id,
1568                               .temporal_layer_id = layer_id.temporal_layer_id};
1569 
1570   if (is_svc_) {
1571     vpx_svc_ref_frame_config_t enc_layer_conf = {{0}};
1572     libvpx_->codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG,
1573                            &enc_layer_conf);
1574     const int update_buffer_slot =
1575         enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id];
1576 
1577     for (size_t i = 0; i < ref_buf_.size(); ++i) {
1578       if (update_buffer_slot & (1 << i)) {
1579         ref_buf_[i] = frame_buf;
1580       }
1581     }
1582 
1583     RTC_LOG(LS_VERBOSE) << "Frame " << pic_num << " sl "
1584                         << layer_id.spatial_layer_id << " tl "
1585                         << layer_id.temporal_layer_id << " updated buffers "
1586                         << (update_buffer_slot & (1 << 0) ? 1 : 0)
1587                         << (update_buffer_slot & (1 << 1) ? 1 : 0)
1588                         << (update_buffer_slot & (1 << 2) ? 1 : 0)
1589                         << (update_buffer_slot & (1 << 3) ? 1 : 0)
1590                         << (update_buffer_slot & (1 << 4) ? 1 : 0)
1591                         << (update_buffer_slot & (1 << 5) ? 1 : 0)
1592                         << (update_buffer_slot & (1 << 6) ? 1 : 0)
1593                         << (update_buffer_slot & (1 << 7) ? 1 : 0);
1594   } else {
1595     RTC_DCHECK_EQ(num_spatial_layers_, 1);
1596     RTC_DCHECK_EQ(num_temporal_layers_, 1);
1597     // In non-svc mode encoder doesn't provide reference list. Assume each frame
1598     // is reference and stored in buffer 0.
1599     ref_buf_[0] = frame_buf;
1600   }
1601 }
1602 
SetReferences(bool is_key_pic,int first_active_spatial_layer_id)1603 vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences(
1604     bool is_key_pic,
1605     int first_active_spatial_layer_id) {
1606   // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs.
1607   RTC_DCHECK_LE(gof_.num_frames_in_gof, 4);
1608 
1609   vpx_svc_ref_frame_config_t ref_config;
1610   memset(&ref_config, 0, sizeof(ref_config));
1611 
1612   const size_t num_temporal_refs = std::max(1, num_temporal_layers_ - 1);
1613   const bool is_inter_layer_pred_allowed =
1614       inter_layer_pred_ == InterLayerPredMode::kOn ||
1615       (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic);
1616   absl::optional<int> last_updated_buf_idx;
1617 
1618   // Put temporal reference to LAST and spatial reference to GOLDEN. Update
1619   // frame buffer (i.e. store encoded frame) if current frame is a temporal
1620   // reference (i.e. it belongs to a low temporal layer) or it is a spatial
1621   // reference. In later case, always store spatial reference in the last
1622   // reference frame buffer.
1623   // For the case of 3 temporal and 3 spatial layers we need 6 frame buffers
1624   // for temporal references plus 1 buffer for spatial reference. 7 buffers
1625   // in total.
1626 
1627   for (int sl_idx = first_active_spatial_layer_id;
1628        sl_idx < num_active_spatial_layers_; ++sl_idx) {
1629     const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1;
1630     const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof;
1631 
1632     if (!is_key_pic) {
1633       // Set up temporal reference.
1634       const int buf_idx = sl_idx * num_temporal_refs + kRefBufIdx[gof_idx];
1635 
1636       // Last reference frame buffer is reserved for spatial reference. It is
1637       // not supposed to be used for temporal prediction.
1638       RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1);
1639 
1640       const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
1641       // Incorrect spatial layer may be in the buffer due to a key-frame.
1642       const bool same_spatial_layer =
1643           ref_buf_[buf_idx].spatial_layer_id == sl_idx;
1644       bool correct_pid = false;
1645       if (is_flexible_mode_) {
1646         correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff;
1647       } else {
1648         // Below code assumes single temporal referecence.
1649         RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);
1650         correct_pid = pid_diff == gof_.pid_diff[gof_idx][0];
1651       }
1652 
1653       if (same_spatial_layer && correct_pid) {
1654         ref_config.lst_fb_idx[sl_idx] = buf_idx;
1655         ref_config.reference_last[sl_idx] = 1;
1656       } else {
1657         // This reference doesn't match with one specified by GOF. This can
1658         // only happen if spatial layer is enabled dynamically without key
1659         // frame. Spatial prediction is supposed to be enabled in this case.
1660         RTC_DCHECK(is_inter_layer_pred_allowed &&
1661                    sl_idx > first_active_spatial_layer_id);
1662       }
1663     }
1664 
1665     if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) {
1666       // Set up spatial reference.
1667       RTC_DCHECK(last_updated_buf_idx);
1668       ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx;
1669       ref_config.reference_golden[sl_idx] = 1;
1670     } else {
1671       RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 ||
1672                  sl_idx == first_active_spatial_layer_id ||
1673                  inter_layer_pred_ == InterLayerPredMode::kOff);
1674     }
1675 
1676     last_updated_buf_idx.reset();
1677 
1678     if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 ||
1679         num_temporal_layers_ == 1) {
1680       last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx];
1681 
1682       // Ensure last frame buffer is not used for temporal prediction (it is
1683       // reserved for spatial reference).
1684       RTC_DCHECK_LT(*last_updated_buf_idx, kNumVp9Buffers - 1);
1685     } else if (is_inter_layer_pred_allowed) {
1686       last_updated_buf_idx = kNumVp9Buffers - 1;
1687     }
1688 
1689     if (last_updated_buf_idx) {
1690       ref_config.update_buffer_slot[sl_idx] = 1 << *last_updated_buf_idx;
1691     }
1692   }
1693 
1694   return ref_config;
1695 }
1696 
GetEncodedLayerFrame(const vpx_codec_cx_pkt * pkt)1697 void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
1698   RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT);
1699 
1700   if (pkt->data.frame.sz == 0) {
1701     // Ignore dropped frame.
1702     return;
1703   }
1704 
1705   vpx_svc_layer_id_t layer_id = {0};
1706   libvpx_->codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
1707 
1708   if (layer_buffering_) {
1709     // Deliver buffered low spatial layer frame.
1710     const bool end_of_picture = false;
1711     DeliverBufferedFrame(end_of_picture);
1712   }
1713 
1714   encoded_image_.SetEncodedData(EncodedImageBuffer::Create(
1715       static_cast<const uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz));
1716 
1717   codec_specific_ = {};
1718   absl::optional<int> spatial_index;
1719   absl::optional<int> temporal_index;
1720   if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index,
1721                              *pkt)) {
1722     // Drop the frame.
1723     encoded_image_.set_size(0);
1724     return;
1725   }
1726   encoded_image_.SetSpatialIndex(spatial_index);
1727   encoded_image_.SetTemporalIndex(temporal_index);
1728 
1729   const bool is_key_frame =
1730       ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) &&
1731       !codec_specific_.codecSpecific.VP9.inter_layer_predicted;
1732 
1733   // Ensure encoder issued key frame on request.
1734   RTC_DCHECK(is_key_frame || !force_key_frame_);
1735 
1736   // Check if encoded frame is a key frame.
1737   encoded_image_._frameType = VideoFrameType::kVideoFrameDelta;
1738   if (is_key_frame) {
1739     encoded_image_._frameType = VideoFrameType::kVideoFrameKey;
1740     force_key_frame_ = false;
1741   }
1742 
1743   UpdateReferenceBuffers(*pkt, pics_since_key_);
1744 
1745   TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_.size());
1746   encoded_image_.SetTimestamp(input_image_->timestamp());
1747   encoded_image_.SetColorSpace(input_image_->color_space());
1748   encoded_image_._encodedHeight =
1749       pkt->data.frame.height[layer_id.spatial_layer_id];
1750   encoded_image_._encodedWidth =
1751       pkt->data.frame.width[layer_id.spatial_layer_id];
1752   int qp = -1;
1753   libvpx_->codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
1754   encoded_image_.qp_ = qp;
1755 
1756   if (!layer_buffering_) {
1757     const bool end_of_picture = encoded_image_.SpatialIndex().value_or(0) + 1 ==
1758                                 num_active_spatial_layers_;
1759     DeliverBufferedFrame(end_of_picture);
1760   }
1761 }
1762 
DeliverBufferedFrame(bool end_of_picture)1763 void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) {
1764   if (encoded_image_.size() > 0) {
1765     if (num_spatial_layers_ > 1) {
1766       // Restore frame dropping settings, as dropping may be temporary forbidden
1767       // due to dynamically enabled layers.
1768       for (size_t i = 0; i < num_spatial_layers_; ++i) {
1769         svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
1770       }
1771     }
1772 
1773     codec_specific_.end_of_picture = end_of_picture;
1774 
1775     encoded_complete_callback_->OnEncodedImage(encoded_image_,
1776                                                &codec_specific_);
1777 
1778     if (codec_.mode == VideoCodecMode::kScreensharing) {
1779       const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0);
1780       const uint32_t frame_timestamp_ms =
1781           1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency;
1782       framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms);
1783 
1784       const size_t steady_state_size = SteadyStateSize(
1785           spatial_idx, codec_specific_.codecSpecific.VP9.temporal_idx);
1786 
1787       // Only frames on spatial layers, which may be limited in a steady state
1788       // are considered for steady state detection.
1789       if (framerate_controller_[spatial_idx].GetTargetRate() >
1790           variable_framerate_experiment_.framerate_limit + 1e-9) {
1791         if (encoded_image_.qp_ <=
1792                 variable_framerate_experiment_.steady_state_qp &&
1793             encoded_image_.size() <= steady_state_size) {
1794           ++num_steady_state_frames_;
1795         } else {
1796           num_steady_state_frames_ = 0;
1797         }
1798       }
1799     }
1800     encoded_image_.set_size(0);
1801   }
1802 }
1803 
RegisterEncodeCompleteCallback(EncodedImageCallback * callback)1804 int LibvpxVp9Encoder::RegisterEncodeCompleteCallback(
1805     EncodedImageCallback* callback) {
1806   encoded_complete_callback_ = callback;
1807   return WEBRTC_VIDEO_CODEC_OK;
1808 }
1809 
GetEncoderInfo() const1810 VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const {
1811   EncoderInfo info;
1812   info.supports_native_handle = false;
1813   info.implementation_name = "libvpx";
1814   if (quality_scaler_experiment_.enabled && inited_ &&
1815       codec_.VP9().automaticResizeOn) {
1816     info.scaling_settings = VideoEncoder::ScalingSettings(
1817         quality_scaler_experiment_.low_qp, quality_scaler_experiment_.high_qp);
1818   } else {
1819     info.scaling_settings = VideoEncoder::ScalingSettings::kOff;
1820   }
1821   info.has_trusted_rate_controller = trusted_rate_controller_;
1822   info.is_hardware_accelerated = false;
1823   if (inited_) {
1824     // Find the max configured fps of any active spatial layer.
1825     float max_fps = 0.0;
1826     for (size_t si = 0; si < num_spatial_layers_; ++si) {
1827       if (codec_.spatialLayers[si].active &&
1828           codec_.spatialLayers[si].maxFramerate > max_fps) {
1829         max_fps = codec_.spatialLayers[si].maxFramerate;
1830       }
1831     }
1832 
1833     for (size_t si = 0; si < num_spatial_layers_; ++si) {
1834       info.fps_allocation[si].clear();
1835       if (!codec_.spatialLayers[si].active) {
1836         continue;
1837       }
1838 
1839       // This spatial layer may already use a fraction of the total frame rate.
1840       const float sl_fps_fraction =
1841           codec_.spatialLayers[si].maxFramerate / max_fps;
1842       for (size_t ti = 0; ti < num_temporal_layers_; ++ti) {
1843         const uint32_t decimator =
1844             num_temporal_layers_ <= 1 ? 1 : config_->ts_rate_decimator[ti];
1845         RTC_DCHECK_GT(decimator, 0);
1846         info.fps_allocation[si].push_back(
1847             rtc::saturated_cast<uint8_t>(EncoderInfo::kMaxFramerateFraction *
1848                                          (sl_fps_fraction / decimator)));
1849       }
1850     }
1851     if (profile_ == VP9Profile::kProfile0) {
1852       info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420,
1853                                       VideoFrameBuffer::Type::kNV12};
1854     }
1855   }
1856   if (!encoder_info_override_.resolution_bitrate_limits().empty()) {
1857     info.resolution_bitrate_limits =
1858         encoder_info_override_.resolution_bitrate_limits();
1859   }
1860   return info;
1861 }
1862 
SteadyStateSize(int sid,int tid)1863 size_t LibvpxVp9Encoder::SteadyStateSize(int sid, int tid) {
1864   const size_t bitrate_bps = current_bitrate_allocation_.GetBitrate(
1865       sid, tid == kNoTemporalIdx ? 0 : tid);
1866   const float fps = (codec_.mode == VideoCodecMode::kScreensharing)
1867                         ? std::min(static_cast<float>(codec_.maxFramerate),
1868                                    framerate_controller_[sid].GetTargetRate())
1869                         : codec_.maxFramerate;
1870   return static_cast<size_t>(
1871       bitrate_bps / (8 * fps) *
1872           (100 -
1873            variable_framerate_experiment_.steady_state_undershoot_percentage) /
1874           100 +
1875       0.5);
1876 }
1877 
1878 // static
1879 LibvpxVp9Encoder::VariableFramerateExperiment
ParseVariableFramerateConfig(const FieldTrialsView & trials)1880 LibvpxVp9Encoder::ParseVariableFramerateConfig(const FieldTrialsView& trials) {
1881   FieldTrialFlag enabled = FieldTrialFlag("Enabled");
1882   FieldTrialParameter<double> framerate_limit("min_fps", 5.0);
1883   FieldTrialParameter<int> qp("min_qp", 32);
1884   FieldTrialParameter<int> undershoot_percentage("undershoot", 30);
1885   FieldTrialParameter<int> frames_before_steady_state(
1886       "frames_before_steady_state", 5);
1887   ParseFieldTrial({&enabled, &framerate_limit, &qp, &undershoot_percentage,
1888                    &frames_before_steady_state},
1889                   trials.Lookup("WebRTC-VP9VariableFramerateScreenshare"));
1890   VariableFramerateExperiment config;
1891   config.enabled = enabled.Get();
1892   config.framerate_limit = framerate_limit.Get();
1893   config.steady_state_qp = qp.Get();
1894   config.steady_state_undershoot_percentage = undershoot_percentage.Get();
1895   config.frames_before_steady_state = frames_before_steady_state.Get();
1896 
1897   return config;
1898 }
1899 
1900 // static
1901 LibvpxVp9Encoder::QualityScalerExperiment
ParseQualityScalerConfig(const FieldTrialsView & trials)1902 LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) {
1903   FieldTrialFlag disabled = FieldTrialFlag("Disabled");
1904   FieldTrialParameter<int> low_qp("low_qp", kLowVp9QpThreshold);
1905   FieldTrialParameter<int> high_qp("hihg_qp", kHighVp9QpThreshold);
1906   ParseFieldTrial({&disabled, &low_qp, &high_qp},
1907                   trials.Lookup("WebRTC-VP9QualityScaler"));
1908   QualityScalerExperiment config;
1909   config.enabled = !disabled.Get();
1910   RTC_LOG(LS_INFO) << "Webrtc quality scaler for vp9 is "
1911                    << (config.enabled ? "enabled." : "disabled");
1912   config.low_qp = low_qp.Get();
1913   config.high_qp = high_qp.Get();
1914 
1915   return config;
1916 }
1917 
UpdatePerformanceFlags()1918 void LibvpxVp9Encoder::UpdatePerformanceFlags() {
1919   flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution;
1920   if (codec_.GetVideoEncoderComplexity() ==
1921       VideoCodecComplexity::kComplexityLow) {
1922     // For low tier devices, always use speed 9. Only disable upper
1923     // layer deblocking below QCIF.
1924     params_by_resolution[0] = {.base_layer_speed = 9,
1925                                .high_layer_speed = 9,
1926                                .deblock_mode = 1,
1927                                .allow_denoising = true};
1928     params_by_resolution[352 * 288] = {.base_layer_speed = 9,
1929                                        .high_layer_speed = 9,
1930                                        .deblock_mode = 0,
1931                                        .allow_denoising = true};
1932   } else {
1933     params_by_resolution = performance_flags_.settings_by_resolution;
1934   }
1935 
1936   const auto find_speed = [&](int min_pixel_count) {
1937     RTC_DCHECK(!params_by_resolution.empty());
1938     auto it = params_by_resolution.upper_bound(min_pixel_count);
1939     return std::prev(it)->second;
1940   };
1941   performance_flags_by_spatial_index_.clear();
1942 
1943   if (is_svc_) {
1944     for (int si = 0; si < num_spatial_layers_; ++si) {
1945       performance_flags_by_spatial_index_.push_back(find_speed(
1946           codec_.spatialLayers[si].width * codec_.spatialLayers[si].height));
1947     }
1948   } else {
1949     performance_flags_by_spatial_index_.push_back(
1950         find_speed(codec_.width * codec_.height));
1951   }
1952 }
1953 
1954 // static
1955 LibvpxVp9Encoder::PerformanceFlags
ParsePerformanceFlagsFromTrials(const FieldTrialsView & trials)1956 LibvpxVp9Encoder::ParsePerformanceFlagsFromTrials(
1957     const FieldTrialsView& trials) {
1958   struct Params : public PerformanceFlags::ParameterSet {
1959     int min_pixel_count = 0;
1960   };
1961 
1962   FieldTrialStructList<Params> trials_list(
1963       {FieldTrialStructMember("min_pixel_count",
1964                               [](Params* p) { return &p->min_pixel_count; }),
1965        FieldTrialStructMember("high_layer_speed",
1966                               [](Params* p) { return &p->high_layer_speed; }),
1967        FieldTrialStructMember("base_layer_speed",
1968                               [](Params* p) { return &p->base_layer_speed; }),
1969        FieldTrialStructMember("deblock_mode",
1970                               [](Params* p) { return &p->deblock_mode; }),
1971        FieldTrialStructMember("denoiser",
1972                               [](Params* p) { return &p->allow_denoising; })},
1973       {});
1974 
1975   FieldTrialFlag per_layer_speed("use_per_layer_speed");
1976 
1977   ParseFieldTrial({&trials_list, &per_layer_speed},
1978                   trials.Lookup("WebRTC-VP9-PerformanceFlags"));
1979 
1980   PerformanceFlags flags;
1981   flags.use_per_layer_speed = per_layer_speed.Get();
1982 
1983   constexpr int kMinSpeed = 1;
1984   constexpr int kMaxSpeed = 9;
1985   for (auto& f : trials_list.Get()) {
1986     if (f.base_layer_speed < kMinSpeed || f.base_layer_speed > kMaxSpeed ||
1987         f.high_layer_speed < kMinSpeed || f.high_layer_speed > kMaxSpeed ||
1988         f.deblock_mode < 0 || f.deblock_mode > 2) {
1989       RTC_LOG(LS_WARNING) << "Ignoring invalid performance flags: "
1990                           << "min_pixel_count = " << f.min_pixel_count
1991                           << ", high_layer_speed = " << f.high_layer_speed
1992                           << ", base_layer_speed = " << f.base_layer_speed
1993                           << ", deblock_mode = " << f.deblock_mode;
1994       continue;
1995     }
1996     flags.settings_by_resolution[f.min_pixel_count] = f;
1997   }
1998 
1999   if (flags.settings_by_resolution.empty()) {
2000     return GetDefaultPerformanceFlags();
2001   }
2002 
2003   return flags;
2004 }
2005 
2006 // static
2007 LibvpxVp9Encoder::PerformanceFlags
GetDefaultPerformanceFlags()2008 LibvpxVp9Encoder::GetDefaultPerformanceFlags() {
2009   PerformanceFlags flags;
2010   flags.use_per_layer_speed = true;
2011 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID)
2012   // Speed 8 on all layers for all resolutions.
2013   flags.settings_by_resolution[0] = {.base_layer_speed = 8,
2014                                      .high_layer_speed = 8,
2015                                      .deblock_mode = 0,
2016                                      .allow_denoising = true};
2017 #else
2018 
2019   // For smaller resolutions, use lower speed setting for the temporal base
2020   // layer (get some coding gain at the cost of increased encoding complexity).
2021   // Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and
2022   // disable deblocking for upper-most temporal layers.
2023   flags.settings_by_resolution[0] = {.base_layer_speed = 5,
2024                                      .high_layer_speed = 8,
2025                                      .deblock_mode = 1,
2026                                      .allow_denoising = true};
2027 
2028   // Use speed 7 for QCIF and above.
2029   // Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and
2030   // enable deblocking for all temporal layers.
2031   flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7,
2032                                              .high_layer_speed = 8,
2033                                              .deblock_mode = 0,
2034                                              .allow_denoising = true};
2035 
2036   // For very high resolution (1080p and up), turn the speed all the way up
2037   // since this is very CPU intensive. Also disable denoising to save CPU, at
2038   // these resolutions denoising appear less effective and hopefully you also
2039   // have a less noisy video source at this point.
2040   flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9,
2041                                                .high_layer_speed = 9,
2042                                                .deblock_mode = 0,
2043                                                .allow_denoising = false};
2044 
2045 #endif
2046   return flags;
2047 }
2048 
MaybeRewrapRawWithFormat(const vpx_img_fmt fmt)2049 void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) {
2050   if (!raw_) {
2051     raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1,
2052                              nullptr);
2053   } else if (raw_->fmt != fmt) {
2054     RTC_LOG(LS_INFO) << "Switching VP9 encoder pixel format to "
2055                      << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420");
2056     libvpx_->img_free(raw_);
2057     raw_ = libvpx_->img_wrap(nullptr, fmt, codec_.width, codec_.height, 1,
2058                              nullptr);
2059   }
2060   // else no-op since the image is already in the right format.
2061 }
2062 
PrepareBufferForProfile0(rtc::scoped_refptr<VideoFrameBuffer> buffer)2063 rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
2064     rtc::scoped_refptr<VideoFrameBuffer> buffer) {
2065   absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
2066       supported_formats = {VideoFrameBuffer::Type::kI420,
2067                            VideoFrameBuffer::Type::kNV12};
2068 
2069   rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
2070   if (buffer->type() != VideoFrameBuffer::Type::kNative) {
2071     // `buffer` is already mapped.
2072     mapped_buffer = buffer;
2073   } else {
2074     // Attempt to map to one of the supported formats.
2075     mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
2076   }
2077   if (!mapped_buffer ||
2078       (absl::c_find(supported_formats, mapped_buffer->type()) ==
2079            supported_formats.end() &&
2080        mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
2081     // Unknown pixel format or unable to map, convert to I420 and prepare that
2082     // buffer instead to ensure Scale() is safe to use.
2083     auto converted_buffer = buffer->ToI420();
2084     if (!converted_buffer) {
2085       RTC_LOG(LS_ERROR) << "Failed to convert "
2086                         << VideoFrameBufferTypeToString(buffer->type())
2087                         << " image to I420. Can't encode frame.";
2088       return {};
2089     }
2090     RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
2091               converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
2092 
2093     // Because `buffer` had to be converted, use `converted_buffer` instead.
2094     buffer = mapped_buffer = converted_buffer;
2095   }
2096 
2097   // Prepare `raw_` from `mapped_buffer`.
2098   switch (mapped_buffer->type()) {
2099     case VideoFrameBuffer::Type::kI420:
2100     case VideoFrameBuffer::Type::kI420A: {
2101       MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420);
2102       const I420BufferInterface* i420_buffer = mapped_buffer->GetI420();
2103       RTC_DCHECK(i420_buffer);
2104       raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY());
2105       raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU());
2106       raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV());
2107       raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
2108       raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU();
2109       raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV();
2110       break;
2111     }
2112     case VideoFrameBuffer::Type::kNV12: {
2113       MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12);
2114       const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12();
2115       RTC_DCHECK(nv12_buffer);
2116       raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY());
2117       raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV());
2118       raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1;
2119       raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY();
2120       raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV();
2121       raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV();
2122       break;
2123     }
2124     default:
2125       RTC_DCHECK_NOTREACHED();
2126   }
2127   return mapped_buffer;
2128 }
2129 
2130 }  // namespace webrtc
2131 
2132 #endif  // RTC_ENABLE_VP9
2133