• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_
6 #define CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_
7 
8 #include <vpx/vpx_encoder.h>
9 #include <vpx/vpx_image.h>
10 
11 #include <algorithm>
12 #include <condition_variable>  // NOLINT
13 #include <functional>
14 #include <memory>
15 #include <mutex>
16 #include <queue>
17 #include <thread>
18 #include <vector>
19 
20 #include "absl/base/thread_annotations.h"
21 #include "cast/streaming/frame_id.h"
22 #include "cast/streaming/rtp_time.h"
23 #include "platform/api/task_runner.h"
24 #include "platform/api/time.h"
25 
26 namespace openscreen {
27 
28 class TaskRunner;
29 
30 namespace cast {
31 
32 class Sender;
33 
34 // Uses libvpx to encode VP8 video and streams it to a Sender. Includes
35 // extensive logic for fine-tuning the encoder parameters in real-time, to
36 // provide the best quality results given external, uncontrollable factors:
37 // CPU/network availability, and the complexity of the video frame content.
38 //
39 // Internally, a separate encode thread is created and used to prevent blocking
40 // the main thread while frames are being encoded. All public API methods are
41 // assumed to be called on the same sequence/thread as the main TaskRunner
42 // (injected via the constructor).
43 //
44 // Usage:
45 //
46 // 1. EncodeAndSend() is used to queue-up video frames for encoding and sending,
47 // which will be done on a best-effort basis.
48 //
49 // 2. The client is expected to call SetTargetBitrate() frequently based on its
50 // own bandwidth estimates and congestion control logic. In addition, a client
51 // may provide a callback for each frame's encode statistics, which can be used
52 // to further optimize the user experience. For example, the stats can be used
53 // as a signal to reduce the data volume (i.e., resolution and/or frame rate)
54 // coming from the video capture source.
55 class StreamingVp8Encoder {
56  public:
57   // Configurable parameters passed to the StreamingVp8Encoder constructor.
58   struct Parameters {
59     // Number of threads to parallelize frame encoding. This should be set based
60     // on the number of CPU cores available for encoding, but no more than 8.
61     int num_encode_threads =
62         std::min(std::max<int>(std::thread::hardware_concurrency(), 1), 8);
63 
64     // Best-quality quantizer (lower is better quality). Range: [0,63]
65     int min_quantizer = 4;
66 
67     // Worst-quality quantizer (lower is better quality). Range: [0,63]
68     int max_quantizer = 63;
69 
70     // Worst-quality quantizer to use when the CPU is extremely constrained.
71     // Range: [min_quantizer,max_quantizer]
72     int max_cpu_saver_quantizer = 25;
73 
74     // Maximum amount of wall-time a frame's encode can take, relative to the
75     // frame's duration, before the CPU-saver logic is activated. The default
76     // (70%) is appropriate for systems with four or more cores, but should be
77     // reduced (e.g., 50%) for systems with fewer than three cores.
78     //
79     // Example: For 30 FPS (continuous) video, the frame duration is ~33.3ms,
80     // and a value of 0.5 here would mean that the CPU-saver logic starts
81     // sacrificing quality when frame encodes start taking longer than ~16.7ms.
82     double max_time_utilization = 0.7;
83   };
84 
85   // Represents an input VideoFrame, passed to EncodeAndSend().
86   struct VideoFrame {
87     // Image width and height.
88     int width;
89     int height;
90 
91     // I420 format image pointers and row strides (the number of bytes between
92     // the start of successive rows). The pointers only need to remain valid
93     // until the EncodeAndSend() call returns.
94     const uint8_t* yuv_planes[3];
95     int yuv_strides[3];
96 
97     // How long this frame will be held before the next frame will be displayed,
98     // or zero if unknown. The frame duration is passed to the VP8 codec,
99     // affecting a number of important behaviors, including: per-frame
100     // bandwidth, CPU time spent encoding, temporal quality trade-offs, and
101     // key/golden/alt-ref frame generation intervals.
102     Clock::duration duration;
103   };
104 
105   // Performance statistics for a single frame's encode.
106   //
107   // For full details on how to use these stats in an end-to-end system, see:
108   // https://www.chromium.org/developers/design-documents/
109   //     auto-throttled-screen-capture-and-mirroring
110   // and https://source.chromium.org/chromium/chromium/src/+/master:
111   //     media/cast/sender/performance_metrics_overlay.h
112   struct Stats {
113     // The Cast Streaming ID that was assigned to the frame.
114     FrameId frame_id;
115 
116     // The RTP timestamp of the frame.
117     RtpTimeTicks rtp_timestamp;
118 
119     // How long the frame took to encode. This is wall time, not CPU time or
120     // some other load metric.
121     Clock::duration encode_wall_time;
122 
123     // The frame's predicted duration; or, the actual duration if it was
124     // provided in the VideoFrame.
125     Clock::duration frame_duration;
126 
127     // The encoded frame's size in bytes.
128     int encoded_size;
129 
130     // The average size of an encoded frame in bytes, having this
131     // |frame_duration| and current target bitrate.
132     double target_size;
133 
134     // The actual quantizer the VP8 encoder used, in the range [0,63].
135     int quantizer;
136 
137     // The "hindsight" quantizer value that would have produced the best quality
138     // encoding of the frame at the current target bitrate. The nominal range is
139     // [0.0,63.0]. If it is larger than 63.0, then it was impossible for VP8 to
140     // encode the frame within the current target bitrate (e.g., too much
141     // "entropy" in the image, or too low a target bitrate).
142     double perfect_quantizer;
143 
144     // Utilization feedback metrics. The nominal range for each of these is
145     // [0.0,1.0] where 1.0 means "the entire budget available for the frame was
146     // exhausted." Going above 1.0 is okay for one or a few frames, since it's
147     // the average over many frames that matters before the system is considered
148     // "redlining."
149     //
150     // The max of these three provides an overall utilization control signal.
151     // The usual approach is for upstream control logic to increase/decrease the
152     // data volume (e.g., video resolution and/or frame rate) to maintain a good
153     // target point.
time_utilizationStats154     double time_utilization() const {
155       return static_cast<double>(encode_wall_time.count()) /
156              frame_duration.count();
157     }
space_utilizationStats158     double space_utilization() const { return encoded_size / target_size; }
entropy_utilizationStats159     double entropy_utilization() const {
160       return perfect_quantizer / kMaxQuantizer;
161     }
162   };
163 
164   StreamingVp8Encoder(const Parameters& params,
165                       TaskRunner* task_runner,
166                       Sender* sender);
167 
168   ~StreamingVp8Encoder();
169 
170   // Get/Set the target bitrate. This may be changed at any time, as frequently
171   // as desired, and it will take effect internally as soon as possible.
172   int GetTargetBitrate() const;
173   void SetTargetBitrate(int new_bitrate);
174 
175   // Encode |frame| using the VP8 encoder, assemble an EncodedFrame, and enqueue
176   // into the Sender. The frame may be dropped if too many frames are in-flight.
177   // If provided, the |stats_callback| is run after the frame is enqueued in the
178   // Sender (via the main TaskRunner).
179   void EncodeAndSend(const VideoFrame& frame,
180                      Clock::time_point reference_time,
181                      std::function<void(Stats)> stats_callback);
182 
183   static constexpr int kMinQuantizer = 0;
184   static constexpr int kMaxQuantizer = 63;
185 
186  private:
187   // Syntactic convenience to wrap the vpx_image_t alloc/free API in a smart
188   // pointer.
189   struct VpxImageDeleter {
operatorVpxImageDeleter190     void operator()(vpx_image_t* ptr) const { vpx_img_free(ptr); }
191   };
192   using VpxImageUniquePtr = std::unique_ptr<vpx_image_t, VpxImageDeleter>;
193 
194   // Represents the state of one frame encode. This is created in
195   // EncodeAndSend(), and passed to the encode thread via the |encode_queue_|.
196   struct WorkUnit {
197     VpxImageUniquePtr image;
198     Clock::duration duration;
199     Clock::time_point reference_time;
200     RtpTimeTicks rtp_timestamp;
201     std::function<void(Stats)> stats_callback;
202   };
203 
204   // Same as WorkUnit, but with additional fields to carry the encode results.
205   struct WorkUnitWithResults : public WorkUnit {
206     std::vector<uint8_t> payload;
207     bool is_key_frame;
208     Stats stats;
209   };
210 
is_encoder_initialized()211   bool is_encoder_initialized() const { return config_.g_threads != 0; }
212 
213   // Destroys the VP8 encoder context if it has been initialized.
214   void DestroyEncoder();
215 
216   // The procedure for the |encode_thread_| that loops, processing work units
217   // from the |encode_queue_| by calling Encode() until it's time to end the
218   // thread.
219   void ProcessWorkUnitsUntilTimeToQuit();
220 
221   // If the |encoder_| is live, attempt reconfiguration to allow it to encode
222   // frames at a new frame size, target bitrate, or "CPU encoding speed." If
223   // reconfiguration is not possible, destroy the existing instance and
224   // re-create a new |encoder_| instance.
225   void PrepareEncoder(int width, int height, int target_bitrate);
226 
227   // Wraps the complex libvpx vpx_codec_encode() call using inputs from
228   // |work_unit| and populating results there.
229   void EncodeFrame(bool force_key_frame, WorkUnitWithResults* work_unit);
230 
231   // Computes and populates |work_unit.stats| after the last call to
232   // EncodeFrame().
233   void ComputeFrameEncodeStats(Clock::duration encode_wall_time,
234                                int target_bitrate,
235                                WorkUnitWithResults* work_unit);
236 
237   // Updates the |ideal_speed_setting_|, to take effect with the next frame
238   // encode, based on the given performance |stats|.
239   void UpdateSpeedSettingForNextFrame(const Stats& stats);
240 
241   // Assembles and enqueues an EncodedFrame with the Sender on the main thread.
242   void SendEncodedFrame(WorkUnitWithResults results);
243 
244   // Allocates a vpx_image_t and copies the content from |frame| to it.
245   static VpxImageUniquePtr CloneAsVpxImage(const VideoFrame& frame);
246 
247   const Parameters params_;
248   TaskRunner* const main_task_runner_;
249   Sender* const sender_;
250 
251   // The reference time of the first frame passed to EncodeAndSend().
252   Clock::time_point start_time_ = Clock::time_point::min();
253 
254   // The RTP timestamp of the last frame that was pushed into the
255   // |encode_queue_| by EncodeAndSend(). This is used to check whether
256   // timestamps are monotonically increasing.
257   RtpTimeTicks last_enqueued_rtp_timestamp_;
258 
259   // Guards a few members shared by both the main and encode threads.
260   std::mutex mutex_;
261 
262   // Used by the encode thread to sleep until more work is available.
263   std::condition_variable cv_ ABSL_GUARDED_BY(mutex_);
264 
265   // These encode parameters not passed in the WorkUnit struct because it is
266   // desirable for them to be applied as soon as possible, with the very next
267   // WorkUnit popped from the |encode_queue_| on the encode thread, and not to
268   // wait until some later WorkUnit is processed.
269   bool needs_key_frame_ ABSL_GUARDED_BY(mutex_) = true;
270   int target_bitrate_ ABSL_GUARDED_BY(mutex_) = 2 << 20;  // Default: 2 Mbps.
271 
272   // The queue of frame encodes. The size of this queue is implicitly bounded by
273   // EncodeAndSend(), where it checks for the total in-flight media duration and
274   // maybe drops a frame.
275   std::queue<WorkUnit> encode_queue_ ABSL_GUARDED_BY(mutex_);
276 
277   // Current VP8 encoder configuration. Most of the fields are unchanging, and
278   // are populated in the ctor; but thereafter, only the encode thread accesses
279   // this struct.
280   //
281   // The speed setting is controlled via a separate libvpx API (see members
282   // below).
283   vpx_codec_enc_cfg_t config_{};
284 
285   // These represent the magnitude of the VP8 speed setting, where larger values
286   // (i.e., faster speed) request less CPU usage but will provide lower video
287   // quality. Only the encode thread accesses these.
288   double ideal_speed_setting_;  // A time-weighted average, from measurements.
289   int current_speed_setting_;   // Current |encoder_| speed setting.
290 
291   // libvpx VP8 encoder instance. Only the encode thread accesses this.
292   vpx_codec_ctx_t encoder_;
293 
294   // This member should be last in the class since the thread should not start
295   // until all above members have been initialized by the constructor.
296   std::thread encode_thread_;
297 };
298 
299 }  // namespace cast
300 }  // namespace openscreen
301 
302 #endif  // CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_
303