• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "cast/standalone_sender/streaming_vp8_encoder.h"
6 
7 #include <stdint.h>
8 #include <string.h>
9 #include <vpx/vp8cx.h>
10 
11 #include <chrono>
12 #include <cmath>
13 #include <utility>
14 
15 #include "cast/streaming/encoded_frame.h"
16 #include "cast/streaming/environment.h"
17 #include "cast/streaming/sender.h"
18 #include "util/chrono_helpers.h"
19 #include "util/osp_logging.h"
20 #include "util/saturate_cast.h"
21 
22 namespace openscreen {
23 namespace cast {
24 
25 // TODO(https://crbug.com/openscreen/123): Fix the declarations and then remove
26 // this:
27 using openscreen::operator<<;  // For std::chrono::duration pretty-printing.
28 
29 namespace {
30 
31 constexpr int kBytesPerKilobyte = 1024;
32 
33 // Lower and upper bounds to the frame duration passed to vpx_codec_encode(), to
34 // ensure sanity. Note that the upper-bound is especially important in cases
35 // where the video paused for some lengthy amount of time.
36 constexpr Clock::duration kMinFrameDuration = milliseconds(1);
37 constexpr Clock::duration kMaxFrameDuration = milliseconds(125);
38 
39 // Highest/lowest allowed encoding speed set to the encoder. The valid range is
40 // [4, 16], but experiments show that with speed higher than 12, the saving of
41 // the encoding time is not worth the dropping of the quality. And, with speed
42 // lower than 6, the increasing amount of quality is not worth the increasing
43 // amount of encoding time.
44 constexpr int kHighestEncodingSpeed = 12;
45 constexpr int kLowestEncodingSpeed = 6;
46 
47 // This is the equivalent change in encoding speed per one quantizer step.
48 constexpr double kEquivalentEncodingSpeedStepPerQuantizerStep = 1 / 20.0;
49 
50 }  // namespace
51 
StreamingVp8Encoder(const Parameters & params,TaskRunner * task_runner,Sender * sender)52 StreamingVp8Encoder::StreamingVp8Encoder(const Parameters& params,
53                                          TaskRunner* task_runner,
54                                          Sender* sender)
55     : params_(params),
56       main_task_runner_(task_runner),
57       sender_(sender),
58       ideal_speed_setting_(kHighestEncodingSpeed),
59       encode_thread_([this] { ProcessWorkUnitsUntilTimeToQuit(); }) {
60   OSP_DCHECK_LE(1, params_.num_encode_threads);
61   OSP_DCHECK_LE(kMinQuantizer, params_.min_quantizer);
62   OSP_DCHECK_LE(params_.min_quantizer, params_.max_cpu_saver_quantizer);
63   OSP_DCHECK_LE(params_.max_cpu_saver_quantizer, params_.max_quantizer);
64   OSP_DCHECK_LE(params_.max_quantizer, kMaxQuantizer);
65   OSP_DCHECK_LT(0.0, params_.max_time_utilization);
66   OSP_DCHECK_LE(params_.max_time_utilization, 1.0);
67   OSP_DCHECK(main_task_runner_);
68   OSP_DCHECK(sender_);
69 
70   const auto result =
71       vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &config_, 0);
72   OSP_CHECK_EQ(result, VPX_CODEC_OK);
73 
74   // This is set to non-zero in ConfigureForNewFrameSize() later, to flag that
75   // the encoder has been initialized.
76   config_.g_threads = 0;
77 
78   // Set the timebase to match that of openscreen::Clock::duration.
79   config_.g_timebase.num = Clock::duration::period::num;
80   config_.g_timebase.den = Clock::duration::period::den;
81 
82   // |g_pass| and |g_lag_in_frames| must be "one pass" and zero, respectively,
83   // because of the way the libvpx API is used.
84   config_.g_pass = VPX_RC_ONE_PASS;
85   config_.g_lag_in_frames = 0;
86 
87   // Rate control settings.
88   config_.rc_dropframe_thresh = 0;  // The encoder may not drop any frames.
89   config_.rc_resize_allowed = 0;
90   config_.rc_end_usage = VPX_CBR;
91   config_.rc_target_bitrate = target_bitrate_ / kBytesPerKilobyte;
92   config_.rc_min_quantizer = params_.min_quantizer;
93   config_.rc_max_quantizer = params_.max_quantizer;
94 
95   // The reasons for the values chosen here (rc_*shoot_pct and rc_buf_*_sz) are
96   // lost in history. They were brought-over from the legacy Chrome Cast
97   // Streaming Sender implemenation.
98   config_.rc_undershoot_pct = 100;
99   config_.rc_overshoot_pct = 15;
100   config_.rc_buf_initial_sz = 500;
101   config_.rc_buf_optimal_sz = 600;
102   config_.rc_buf_sz = 1000;
103 
104   config_.kf_mode = VPX_KF_DISABLED;
105 }
106 
~StreamingVp8Encoder()107 StreamingVp8Encoder::~StreamingVp8Encoder() {
108   {
109     std::unique_lock<std::mutex> lock(mutex_);
110     target_bitrate_ = 0;
111     cv_.notify_one();
112   }
113   encode_thread_.join();
114 }
115 
GetTargetBitrate() const116 int StreamingVp8Encoder::GetTargetBitrate() const {
117   // Note: No need to lock the |mutex_| since this method should be called on
118   // the same thread as SetTargetBitrate().
119   return target_bitrate_;
120 }
121 
SetTargetBitrate(int new_bitrate)122 void StreamingVp8Encoder::SetTargetBitrate(int new_bitrate) {
123   // Ensure that, when bps is converted to kbps downstream, that the encoder
124   // bitrate will not be zero.
125   new_bitrate = std::max(new_bitrate, kBytesPerKilobyte);
126 
127   std::unique_lock<std::mutex> lock(mutex_);
128   // Only assign the new target bitrate if |target_bitrate_| has not yet been
129   // used to signal the |encode_thread_| to end.
130   if (target_bitrate_ > 0) {
131     target_bitrate_ = new_bitrate;
132   }
133 }
134 
EncodeAndSend(const VideoFrame & frame,Clock::time_point reference_time,std::function<void (Stats)> stats_callback)135 void StreamingVp8Encoder::EncodeAndSend(
136     const VideoFrame& frame,
137     Clock::time_point reference_time,
138     std::function<void(Stats)> stats_callback) {
139   WorkUnit work_unit;
140 
141   // TODO(miu): The |VideoFrame| struct should provide the media timestamp,
142   // instead of this code inferring it from the reference timestamps, since: 1)
143   // the video capturer's clock may tick at a different rate than the system
144   // clock; and 2) to reduce jitter.
145   if (start_time_ == Clock::time_point::min()) {
146     start_time_ = reference_time;
147     work_unit.rtp_timestamp = RtpTimeTicks();
148   } else {
149     work_unit.rtp_timestamp = RtpTimeTicks::FromTimeSinceOrigin(
150         reference_time - start_time_, sender_->rtp_timebase());
151     if (work_unit.rtp_timestamp <= last_enqueued_rtp_timestamp_) {
152       OSP_LOG_WARN << "VIDEO[" << sender_->ssrc()
153                    << "] Dropping: RTP timestamp is not monotonically "
154                       "increasing from last frame.";
155       return;
156     }
157   }
158   if (sender_->GetInFlightMediaDuration(work_unit.rtp_timestamp) >
159       sender_->GetMaxInFlightMediaDuration()) {
160     OSP_LOG_WARN << "VIDEO[" << sender_->ssrc()
161                  << "] Dropping: In-flight media duration would be too high.";
162     return;
163   }
164 
165   Clock::duration frame_duration = frame.duration;
166   if (frame_duration <= Clock::duration::zero()) {
167     // The caller did not provide the frame duration in |frame|.
168     if (reference_time == start_time_) {
169       // Use the max for the first frame so libvpx will spend extra effort on
170       // its quality.
171       frame_duration = kMaxFrameDuration;
172     } else {
173       // Use the actual amount of time between the current and previous frame as
174       // a prediction for the next frame's duration.
175       frame_duration =
176           (work_unit.rtp_timestamp - last_enqueued_rtp_timestamp_)
177               .ToDuration<Clock::duration>(sender_->rtp_timebase());
178     }
179   }
180   work_unit.duration =
181       std::max(std::min(frame_duration, kMaxFrameDuration), kMinFrameDuration);
182 
183   last_enqueued_rtp_timestamp_ = work_unit.rtp_timestamp;
184 
185   work_unit.image = CloneAsVpxImage(frame);
186   work_unit.reference_time = reference_time;
187   work_unit.stats_callback = std::move(stats_callback);
188   const bool force_key_frame = sender_->NeedsKeyFrame();
189   {
190     std::unique_lock<std::mutex> lock(mutex_);
191     needs_key_frame_ |= force_key_frame;
192     encode_queue_.push(std::move(work_unit));
193     cv_.notify_one();
194   }
195 }
196 
DestroyEncoder()197 void StreamingVp8Encoder::DestroyEncoder() {
198   OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
199 
200   if (is_encoder_initialized()) {
201     vpx_codec_destroy(&encoder_);
202     // Flag that the encoder is not initialized. See header comments for
203     // is_encoder_initialized().
204     config_.g_threads = 0;
205   }
206 }
207 
ProcessWorkUnitsUntilTimeToQuit()208 void StreamingVp8Encoder::ProcessWorkUnitsUntilTimeToQuit() {
209   OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
210 
211   for (;;) {
212     WorkUnitWithResults work_unit{};
213     bool force_key_frame;
214     int target_bitrate;
215     {
216       std::unique_lock<std::mutex> lock(mutex_);
217       if (target_bitrate_ <= 0) {
218         break;  // Time to end this thread.
219       }
220       if (encode_queue_.empty()) {
221         cv_.wait(lock);
222         if (encode_queue_.empty()) {
223           continue;
224         }
225       }
226       static_cast<WorkUnit&>(work_unit) = std::move(encode_queue_.front());
227       encode_queue_.pop();
228       force_key_frame = needs_key_frame_;
229       needs_key_frame_ = false;
230       target_bitrate = target_bitrate_;
231     }
232 
233     // Clock::now() is being called directly, instead of using a
234     // dependency-injected "now function," since actual wall time is being
235     // measured.
236     const Clock::time_point encode_start_time = Clock::now();
237     PrepareEncoder(work_unit.image->d_w, work_unit.image->d_h, target_bitrate);
238     EncodeFrame(force_key_frame, &work_unit);
239     ComputeFrameEncodeStats(Clock::now() - encode_start_time, target_bitrate,
240                             &work_unit);
241     UpdateSpeedSettingForNextFrame(work_unit.stats);
242 
243     main_task_runner_->PostTask(
244         [this, results = std::move(work_unit)]() mutable {
245           SendEncodedFrame(std::move(results));
246         });
247   }
248 
249   DestroyEncoder();
250 }
251 
PrepareEncoder(int width,int height,int target_bitrate)252 void StreamingVp8Encoder::PrepareEncoder(int width,
253                                          int height,
254                                          int target_bitrate) {
255   OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
256 
257   const int target_kbps = target_bitrate / kBytesPerKilobyte;
258 
259   // Translate the |ideal_speed_setting_| into the VP8E_SET_CPUUSED setting and
260   // the minimum quantizer to use.
261   int speed;
262   int min_quantizer;
263   if (ideal_speed_setting_ > kHighestEncodingSpeed) {
264     speed = kHighestEncodingSpeed;
265     const double remainder = ideal_speed_setting_ - speed;
266     min_quantizer = rounded_saturate_cast<int>(
267         remainder / kEquivalentEncodingSpeedStepPerQuantizerStep +
268         params_.min_quantizer);
269     min_quantizer = std::min(min_quantizer, params_.max_cpu_saver_quantizer);
270   } else {
271     speed = std::max(rounded_saturate_cast<int>(ideal_speed_setting_),
272                      kLowestEncodingSpeed);
273     min_quantizer = params_.min_quantizer;
274   }
275 
276   if (static_cast<int>(config_.g_w) != width ||
277       static_cast<int>(config_.g_h) != height) {
278     DestroyEncoder();
279   }
280 
281   if (!is_encoder_initialized()) {
282     config_.g_threads = params_.num_encode_threads;
283     config_.g_w = width;
284     config_.g_h = height;
285     config_.rc_target_bitrate = target_kbps;
286     config_.rc_min_quantizer = min_quantizer;
287 
288     encoder_ = {};
289     const vpx_codec_flags_t flags = 0;
290     const auto init_result =
291         vpx_codec_enc_init(&encoder_, vpx_codec_vp8_cx(), &config_, flags);
292     OSP_CHECK_EQ(init_result, VPX_CODEC_OK);
293 
294     // Raise the threshold for considering macroblocks as static. The default is
295     // zero, so this setting makes the encoder less sensitive to motion. This
296     // lowers the probability of needing to utilize more CPU to search for
297     // motion vectors.
298     const auto ctl_result =
299         vpx_codec_control(&encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
300     OSP_CHECK_EQ(ctl_result, VPX_CODEC_OK);
301 
302     // Ensure the speed will be set (below).
303     current_speed_setting_ = ~speed;
304   } else if (static_cast<int>(config_.rc_target_bitrate) != target_kbps ||
305              static_cast<int>(config_.rc_min_quantizer) != min_quantizer) {
306     config_.rc_target_bitrate = target_kbps;
307     config_.rc_min_quantizer = min_quantizer;
308     const auto update_config_result =
309         vpx_codec_enc_config_set(&encoder_, &config_);
310     OSP_CHECK_EQ(update_config_result, VPX_CODEC_OK);
311   }
312 
313   if (current_speed_setting_ != speed) {
314     // Pass the |speed| as a negative value to turn off VP8's automatic speed
315     // selection logic and force the exact setting.
316     const auto ctl_result =
317         vpx_codec_control(&encoder_, VP8E_SET_CPUUSED, -speed);
318     OSP_CHECK_EQ(ctl_result, VPX_CODEC_OK);
319     current_speed_setting_ = speed;
320   }
321 }
322 
EncodeFrame(bool force_key_frame,WorkUnitWithResults * work_unit)323 void StreamingVp8Encoder::EncodeFrame(bool force_key_frame,
324                                       WorkUnitWithResults* work_unit) {
325   OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
326 
327   // The presentation timestamp argument here is fixed to zero to force the
328   // encoder to base its single-frame bandwidth calculations entirely on
329   // |frame_duration| and the target bitrate setting.
330   const vpx_codec_pts_t pts = 0;
331   const vpx_enc_frame_flags_t flags = force_key_frame ? VPX_EFLAG_FORCE_KF : 0;
332   const auto encode_result =
333       vpx_codec_encode(&encoder_, work_unit->image.get(), pts,
334                        work_unit->duration.count(), flags, VPX_DL_REALTIME);
335   OSP_CHECK_EQ(encode_result, VPX_CODEC_OK);
336 
337   const vpx_codec_cx_pkt_t* pkt;
338   for (vpx_codec_iter_t iter = nullptr;;) {
339     pkt = vpx_codec_get_cx_data(&encoder_, &iter);
340     // vpx_codec_get_cx_data() returns null once the "iteration" is complete.
341     // However, that point should never be reached because a
342     // VPX_CODEC_CX_FRAME_PKT must be encountered before that.
343     OSP_CHECK(pkt);
344     if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
345       break;
346     }
347   }
348 
349   // A copy of the payload data is being made here. That's okay since it has to
350   // be copied at some point anyway, to be passed back to the main thread.
351   auto* const begin = static_cast<const uint8_t*>(pkt->data.frame.buf);
352   auto* const end = begin + pkt->data.frame.sz;
353   work_unit->payload.assign(begin, end);
354   work_unit->is_key_frame = !!(pkt->data.frame.flags & VPX_FRAME_IS_KEY);
355 }
356 
ComputeFrameEncodeStats(Clock::duration encode_wall_time,int target_bitrate,WorkUnitWithResults * work_unit)357 void StreamingVp8Encoder::ComputeFrameEncodeStats(
358     Clock::duration encode_wall_time,
359     int target_bitrate,
360     WorkUnitWithResults* work_unit) {
361   OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
362 
363   Stats& stats = work_unit->stats;
364 
365   // Note: stats.frame_id is set later, in SendEncodedFrame().
366   stats.rtp_timestamp = work_unit->rtp_timestamp;
367   stats.encode_wall_time = encode_wall_time;
368   stats.frame_duration = work_unit->duration;
369   stats.encoded_size = work_unit->payload.size();
370 
371   constexpr double kBytesPerBit = 1.0 / CHAR_BIT;
372   constexpr double kSecondsPerClockTick =
373       1.0 / Clock::to_duration(seconds(1)).count();
374   const double target_bytes_per_clock_tick =
375       target_bitrate * (kBytesPerBit * kSecondsPerClockTick);
376   stats.target_size = target_bytes_per_clock_tick * work_unit->duration.count();
377 
378   // The quantizer the encoder used. This is the result of the VP8 encoder
379   // taking a guess at what quantizer value would produce an encoded frame size
380   // as close to the target as possible.
381   const auto get_quantizer_result = vpx_codec_control(
382       &encoder_, VP8E_GET_LAST_QUANTIZER_64, &stats.quantizer);
383   OSP_CHECK_EQ(get_quantizer_result, VPX_CODEC_OK);
384 
385   // Now that the frame has been encoded and the number of bytes is known, the
386   // perfect quantizer value (i.e., the one that should have been used) can be
387   // determined.
388   stats.perfect_quantizer = stats.quantizer * stats.space_utilization();
389 }
390 
UpdateSpeedSettingForNextFrame(const Stats & stats)391 void StreamingVp8Encoder::UpdateSpeedSettingForNextFrame(const Stats& stats) {
392   OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
393 
394   // Combine the speed setting that was used to encode the last frame, and the
395   // quantizer the encoder chose into a single speed metric.
396   const double speed = current_speed_setting_ +
397                        kEquivalentEncodingSpeedStepPerQuantizerStep *
398                            std::max(0, stats.quantizer - params_.min_quantizer);
399 
400   // Like |Stats::perfect_quantizer|, this computes a "hindsight" speed setting
401   // for the last frame, one that may have potentially allowed for a
402   // better-quality quantizer choice by the encoder, while also keeping CPU
403   // utilization within budget.
404   const double perfect_speed =
405       speed * stats.time_utilization() / params_.max_time_utilization;
406 
407   // Update the ideal speed setting, to be used for the next frame. An
408   // exponentially-decaying weighted average is used here to smooth-out noise.
409   // The weight is based on the duration of the frame that was encoded.
410   constexpr Clock::duration kDecayHalfLife = milliseconds(120);
411   const double ticks = stats.frame_duration.count();
412   const double weight = ticks / (ticks + kDecayHalfLife.count());
413   ideal_speed_setting_ =
414       weight * perfect_speed + (1.0 - weight) * ideal_speed_setting_;
415   OSP_DCHECK(std::isfinite(ideal_speed_setting_));
416 }
417 
SendEncodedFrame(WorkUnitWithResults results)418 void StreamingVp8Encoder::SendEncodedFrame(WorkUnitWithResults results) {
419   OSP_DCHECK(main_task_runner_->IsRunningOnTaskRunner());
420 
421   EncodedFrame frame;
422   frame.frame_id = sender_->GetNextFrameId();
423   if (results.is_key_frame) {
424     frame.dependency = EncodedFrame::KEY_FRAME;
425     frame.referenced_frame_id = frame.frame_id;
426   } else {
427     frame.dependency = EncodedFrame::DEPENDS_ON_ANOTHER;
428     frame.referenced_frame_id = frame.frame_id - 1;
429   }
430   frame.rtp_timestamp = results.rtp_timestamp;
431   frame.reference_time = results.reference_time;
432   frame.data = absl::Span<uint8_t>(results.payload);
433 
434   if (sender_->EnqueueFrame(frame) != Sender::OK) {
435     // Since the frame will not be sent, the encoder's frame dependency chain
436     // has been broken. Force a key frame for the next frame.
437     std::unique_lock<std::mutex> lock(mutex_);
438     needs_key_frame_ = true;
439   }
440 
441   if (results.stats_callback) {
442     results.stats.frame_id = frame.frame_id;
443     results.stats_callback(results.stats);
444   }
445 }
446 
447 namespace {
CopyPlane(const uint8_t * src,int src_stride,int num_rows,uint8_t * dst,int dst_stride)448 void CopyPlane(const uint8_t* src,
449                int src_stride,
450                int num_rows,
451                uint8_t* dst,
452                int dst_stride) {
453   if (src_stride == dst_stride) {
454     memcpy(dst, src, src_stride * num_rows);
455     return;
456   }
457   const int bytes_per_row = std::min(src_stride, dst_stride);
458   while (--num_rows >= 0) {
459     memcpy(dst, src, bytes_per_row);
460     dst += dst_stride;
461     src += src_stride;
462   }
463 }
464 }  // namespace
465 
466 // static
CloneAsVpxImage(const VideoFrame & frame)467 StreamingVp8Encoder::VpxImageUniquePtr StreamingVp8Encoder::CloneAsVpxImage(
468     const VideoFrame& frame) {
469   OSP_DCHECK_GE(frame.width, 0);
470   OSP_DCHECK_GE(frame.height, 0);
471   OSP_DCHECK_GE(frame.yuv_strides[0], 0);
472   OSP_DCHECK_GE(frame.yuv_strides[1], 0);
473   OSP_DCHECK_GE(frame.yuv_strides[2], 0);
474 
475   constexpr int kAlignment = 32;
476   VpxImageUniquePtr image(vpx_img_alloc(nullptr, VPX_IMG_FMT_I420, frame.width,
477                                         frame.height, kAlignment));
478   OSP_CHECK(image);
479 
480   CopyPlane(frame.yuv_planes[0], frame.yuv_strides[0], frame.height,
481             image->planes[VPX_PLANE_Y], image->stride[VPX_PLANE_Y]);
482   CopyPlane(frame.yuv_planes[1], frame.yuv_strides[1], (frame.height + 1) / 2,
483             image->planes[VPX_PLANE_U], image->stride[VPX_PLANE_U]);
484   CopyPlane(frame.yuv_planes[2], frame.yuv_strides[2], (frame.height + 1) / 2,
485             image->planes[VPX_PLANE_V], image->stride[VPX_PLANE_V]);
486 
487   return image;
488 }
489 
490 }  // namespace cast
491 }  // namespace openscreen
492