• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_
12 #define API_VIDEO_CODECS_VIDEO_ENCODER_H_
13 
14 #include <limits>
15 #include <memory>
16 #include <string>
17 #include <vector>
18 
19 #include "absl/container/inlined_vector.h"
20 #include "absl/types/optional.h"
21 #include "api/fec_controller_override.h"
22 #include "api/units/data_rate.h"
23 #include "api/video/encoded_image.h"
24 #include "api/video/video_bitrate_allocation.h"
25 #include "api/video/video_codec_constants.h"
26 #include "api/video/video_frame.h"
27 #include "api/video_codecs/video_codec.h"
28 #include "rtc_base/checks.h"
29 #include "rtc_base/system/rtc_export.h"
30 
31 namespace webrtc {
32 
33 // TODO(pbos): Expose these through a public (root) header or change these APIs.
34 struct CodecSpecificInfo;
35 
36 constexpr int kDefaultMinPixelsPerFrame = 320 * 180;
37 
38 class RTC_EXPORT EncodedImageCallback {
39  public:
~EncodedImageCallback()40   virtual ~EncodedImageCallback() {}
41 
42   struct Result {
43     enum Error {
44       OK,
45 
46       // Failed to send the packet.
47       ERROR_SEND_FAILED,
48     };
49 
ResultResult50     explicit Result(Error error) : error(error) {}
ResultResult51     Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {}
52 
53     Error error;
54 
55     // Frame ID assigned to the frame. The frame ID should be the same as the ID
56     // seen by the receiver for this frame. RTP timestamp of the frame is used
57     // as frame ID when RTP is used to send video. Must be used only when
58     // error=OK.
59     uint32_t frame_id = 0;
60 
61     // Tells the encoder that the next frame is should be dropped.
62     bool drop_next_frame = false;
63   };
64 
65   // Used to signal the encoder about reason a frame is dropped.
66   // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate
67   // limiting purposes).
68   // kDroppedByEncoder - dropped by encoder's internal rate limiter.
69   // TODO(bugs.webrtc.org/10164): Delete this enum? It duplicates the more
70   // general VideoStreamEncoderObserver::DropReason. Also,
71   // kDroppedByMediaOptimizations is not produced by any encoder, but by
72   // VideoStreamEncoder.
73   enum class DropReason : uint8_t {
74     kDroppedByMediaOptimizations,
75     kDroppedByEncoder
76   };
77 
78   // Callback function which is called when an image has been encoded.
79   virtual Result OnEncodedImage(
80       const EncodedImage& encoded_image,
81       const CodecSpecificInfo* codec_specific_info) = 0;
82 
OnDroppedFrame(DropReason reason)83   virtual void OnDroppedFrame(DropReason reason) {}
84 };
85 
86 class RTC_EXPORT VideoEncoder {
87  public:
88   struct QpThresholds {
QpThresholdsQpThresholds89     QpThresholds(int l, int h) : low(l), high(h) {}
QpThresholdsQpThresholds90     QpThresholds() : low(-1), high(-1) {}
91     int low;
92     int high;
93   };
94 
95   // Quality scaling is enabled if thresholds are provided.
96   struct RTC_EXPORT ScalingSettings {
97    private:
98     // Private magic type for kOff, implicitly convertible to
99     // ScalingSettings.
100     struct KOff {};
101 
102    public:
103     // TODO(bugs.webrtc.org/9078): Since absl::optional should be trivially copy
104     // constructible, this magic value can likely be replaced by a constexpr
105     // ScalingSettings value.
106     static constexpr KOff kOff = {};
107 
108     ScalingSettings(int low, int high);
109     ScalingSettings(int low, int high, int min_pixels);
110     ScalingSettings(const ScalingSettings&);
111     ScalingSettings(KOff);  // NOLINT(runtime/explicit)
112     ~ScalingSettings();
113 
114     absl::optional<QpThresholds> thresholds;
115 
116     // We will never ask for a resolution lower than this.
117     // TODO(kthelgason): Lower this limit when better testing
118     // on MediaCodec and fallback implementations are in place.
119     // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206
120     int min_pixels_per_frame = kDefaultMinPixelsPerFrame;
121 
122    private:
123     // Private constructor; to get an object without thresholds, use
124     // the magic constant ScalingSettings::kOff.
125     ScalingSettings();
126   };
127 
128   // Bitrate limits for resolution.
129   struct ResolutionBitrateLimits {
ResolutionBitrateLimitsResolutionBitrateLimits130     ResolutionBitrateLimits(int frame_size_pixels,
131                             int min_start_bitrate_bps,
132                             int min_bitrate_bps,
133                             int max_bitrate_bps)
134         : frame_size_pixels(frame_size_pixels),
135           min_start_bitrate_bps(min_start_bitrate_bps),
136           min_bitrate_bps(min_bitrate_bps),
137           max_bitrate_bps(max_bitrate_bps) {}
138     // Size of video frame, in pixels, the bitrate thresholds are intended for.
139     int frame_size_pixels = 0;
140     // Recommended minimum bitrate to start encoding.
141     int min_start_bitrate_bps = 0;
142     // Recommended minimum bitrate.
143     int min_bitrate_bps = 0;
144     // Recommended maximum bitrate.
145     int max_bitrate_bps = 0;
146 
147     bool operator==(const ResolutionBitrateLimits& rhs) const;
148     bool operator!=(const ResolutionBitrateLimits& rhs) const {
149       return !(*this == rhs);
150     }
151   };
152 
153   // Struct containing metadata about the encoder implementing this interface.
154   struct RTC_EXPORT EncoderInfo {
155     static constexpr uint8_t kMaxFramerateFraction =
156         std::numeric_limits<uint8_t>::max();
157 
158     EncoderInfo();
159     EncoderInfo(const EncoderInfo&);
160 
161     ~EncoderInfo();
162 
163     std::string ToString() const;
164     bool operator==(const EncoderInfo& rhs) const;
165     bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); }
166 
167     // Any encoder implementation wishing to use the WebRTC provided
168     // quality scaler must populate this field.
169     ScalingSettings scaling_settings;
170 
171     // The width and height of the incoming video frames should be divisible
172     // by `requested_resolution_alignment`. If they are not, the encoder may
173     // drop the incoming frame.
174     // For example: With I420, this value would be a multiple of 2.
175     // Note that this field is unrelated to any horizontal or vertical stride
176     // requirements the encoder has on the incoming video frame buffers.
177     int requested_resolution_alignment;
178 
179     // Same as above but if true, each simulcast layer should also be divisible
180     // by `requested_resolution_alignment`.
181     // Note that scale factors `scale_resolution_down_by` may be adjusted so a
182     // common multiple is not too large to avoid largely cropped frames and
183     // possibly with an aspect ratio far from the original.
184     // Warning: large values of scale_resolution_down_by could be changed
185     // considerably, especially if `requested_resolution_alignment` is large.
186     bool apply_alignment_to_all_simulcast_layers;
187 
188     // If true, encoder supports working with a native handle (e.g. texture
189     // handle for hw codecs) rather than requiring a raw I420 buffer.
190     bool supports_native_handle;
191 
192     // The name of this particular encoder implementation, e.g. "libvpx".
193     std::string implementation_name;
194 
195     // If this field is true, the encoder rate controller must perform
196     // well even in difficult situations, and produce close to the specified
197     // target bitrate seen over a reasonable time window, drop frames if
198     // necessary in order to keep the rate correct, and react quickly to
199     // changing bitrate targets. If this method returns true, we disable the
200     // frame dropper in the media optimization module and rely entirely on the
201     // encoder to produce media at a bitrate that closely matches the target.
202     // Any overshooting may result in delay buildup. If this method returns
203     // false (default behavior), the media opt frame dropper will drop input
204     // frames if it suspect encoder misbehavior. Misbehavior is common,
205     // especially in hardware codecs. Disable media opt at your own risk.
206     bool has_trusted_rate_controller;
207 
208     // If this field is true, the encoder uses hardware support and different
209     // thresholds will be used in CPU adaptation.
210     bool is_hardware_accelerated;
211 
212     // For each spatial layer (simulcast stream or SVC layer), represented as an
213     // element in `fps_allocation` a vector indicates how many temporal layers
214     // the encoder is using for that spatial layer.
215     // For each spatial/temporal layer pair, the frame rate fraction is given as
216     // an 8bit unsigned integer where 0 = 0% and 255 = 100%.
217     //
218     // If the vector is empty for a given spatial layer, it indicates that frame
219     // rates are not defined and we can't count on any specific frame rate to be
220     // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic.
221     //
222     // The encoder may update this on a per-frame basis in response to both
223     // internal and external signals.
224     //
225     // Spatial layers are treated independently, but temporal layers are
226     // cumulative. For instance, if:
227     //   fps_allocation[0][0] = kFullFramerate / 2;
228     //   fps_allocation[0][1] = kFullFramerate;
229     // Then half of the frames are in the base layer and half is in TL1, but
230     // since TL1 is assumed to depend on the base layer, the frame rate is
231     // indicated as the full 100% for the top layer.
232     //
233     // Defaults to a single spatial layer containing a single temporal layer
234     // with a 100% frame rate fraction.
235     absl::InlinedVector<uint8_t, kMaxTemporalStreams>
236         fps_allocation[kMaxSpatialLayers];
237 
238     // Recommended bitrate limits for different resolutions.
239     std::vector<ResolutionBitrateLimits> resolution_bitrate_limits;
240 
241     // Obtains the limits from `resolution_bitrate_limits` that best matches the
242     // `frame_size_pixels`.
243     absl::optional<ResolutionBitrateLimits>
244     GetEncoderBitrateLimitsForResolution(int frame_size_pixels) const;
245 
246     // If true, this encoder has internal support for generating simulcast
247     // streams. Otherwise, an adapter class will be needed.
248     // Even if true, the config provided to InitEncode() might not be supported,
249     // in such case the encoder should return
250     // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED.
251     bool supports_simulcast;
252 
253     // The list of pixel formats preferred by the encoder. It is assumed that if
254     // the list is empty and supports_native_handle is false, then {I420} is the
255     // preferred pixel format. The order of the formats does not matter.
256     absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
257         preferred_pixel_formats;
258 
259     // Indicates whether or not QP value encoder writes into frame/slice/tile
260     // header can be interpreted as average frame/slice/tile QP.
261     absl::optional<bool> is_qp_trusted;
262   };
263 
264   struct RTC_EXPORT RateControlParameters {
265     RateControlParameters();
266     RateControlParameters(const VideoBitrateAllocation& bitrate,
267                           double framerate_fps);
268     RateControlParameters(const VideoBitrateAllocation& bitrate,
269                           double framerate_fps,
270                           DataRate bandwidth_allocation);
271     virtual ~RateControlParameters();
272 
273     // Target bitrate, per spatial/temporal layer.
274     // A target bitrate of 0bps indicates a layer should not be encoded at all.
275     VideoBitrateAllocation target_bitrate;
276     // Adjusted target bitrate, per spatial/temporal layer. May be lower or
277     // higher than the target depending on encoder behaviour.
278     VideoBitrateAllocation bitrate;
279     // Target framerate, in fps. A value <= 0.0 is invalid and should be
280     // interpreted as framerate target not available. In this case the encoder
281     // should fall back to the max framerate specified in `codec_settings` of
282     // the last InitEncode() call.
283     double framerate_fps;
284     // The network bandwidth available for video. This is at least
285     // `bitrate.get_sum_bps()`, but may be higher if the application is not
286     // network constrained.
287     DataRate bandwidth_allocation;
288 
289     bool operator==(const RateControlParameters& rhs) const;
290     bool operator!=(const RateControlParameters& rhs) const;
291   };
292 
293   struct LossNotification {
294     // The timestamp of the last decodable frame *prior* to the last received.
295     // (The last received - described below - might itself be decodable or not.)
296     uint32_t timestamp_of_last_decodable;
297     // The timestamp of the last received frame.
298     uint32_t timestamp_of_last_received;
299     // Describes whether the dependencies of the last received frame were
300     // all decodable.
301     // `false` if some dependencies were undecodable, `true` if all dependencies
302     // were decodable, and `nullopt` if the dependencies are unknown.
303     absl::optional<bool> dependencies_of_last_received_decodable;
304     // Describes whether the received frame was decodable.
305     // `false` if some dependency was undecodable or if some packet belonging
306     // to the last received frame was missed.
307     // `true` if all dependencies were decodable and all packets belonging
308     // to the last received frame were received.
309     // `nullopt` if no packet belonging to the last frame was missed, but the
310     // last packet in the frame was not yet received.
311     absl::optional<bool> last_received_decodable;
312   };
313 
314   // Negotiated capabilities which the VideoEncoder may expect the other
315   // side to use.
316   struct Capabilities {
CapabilitiesCapabilities317     explicit Capabilities(bool loss_notification)
318         : loss_notification(loss_notification) {}
319     bool loss_notification;
320   };
321 
322   struct Settings {
SettingsSettings323     Settings(const Capabilities& capabilities,
324              int number_of_cores,
325              size_t max_payload_size)
326         : capabilities(capabilities),
327           number_of_cores(number_of_cores),
328           max_payload_size(max_payload_size) {}
329 
330     Capabilities capabilities;
331     int number_of_cores;
332     size_t max_payload_size;
333   };
334 
335   static VideoCodecVP8 GetDefaultVp8Settings();
336   static VideoCodecVP9 GetDefaultVp9Settings();
337   static VideoCodecH264 GetDefaultH264Settings();
338 
~VideoEncoder()339   virtual ~VideoEncoder() {}
340 
341   // Set a FecControllerOverride, through which the encoder may override
342   // decisions made by FecController.
343   // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual.
344   virtual void SetFecControllerOverride(
345       FecControllerOverride* fec_controller_override);
346 
347   // Initialize the encoder with the information from the codecSettings
348   //
349   // Input:
350   //          - codec_settings    : Codec settings
351   //          - settings          : Settings affecting the encoding itself.
352   // Input for deprecated version:
353   //          - number_of_cores   : Number of cores available for the encoder
354   //          - max_payload_size  : The maximum size each payload is allowed
355   //                                to have. Usually MTU - overhead.
356   //
357   // Return value                  : Set bit rate if OK
358   //                                 <0 - Errors:
359   //                                  WEBRTC_VIDEO_CODEC_ERR_PARAMETER
360   //                                  WEBRTC_VIDEO_CODEC_ERR_SIZE
361   //                                  WEBRTC_VIDEO_CODEC_MEMORY
362   //                                  WEBRTC_VIDEO_CODEC_ERROR
363   // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting
364   // an announcement to discuss-webrtc, remove the three-parameters variant
365   // and make the two-parameters variant pure-virtual.
366   /* ABSL_DEPRECATED("bugs.webrtc.org/10720") */ virtual int32_t InitEncode(
367       const VideoCodec* codec_settings,
368       int32_t number_of_cores,
369       size_t max_payload_size);
370   virtual int InitEncode(const VideoCodec* codec_settings,
371                          const VideoEncoder::Settings& settings);
372 
373   // Register an encode complete callback object.
374   //
375   // Input:
376   //          - callback         : Callback object which handles encoded images.
377   //
378   // Return value                : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
379   virtual int32_t RegisterEncodeCompleteCallback(
380       EncodedImageCallback* callback) = 0;
381 
382   // Free encoder memory.
383   // Return value                : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
384   virtual int32_t Release() = 0;
385 
386   // Encode an image (as a part of a video stream). The encoded image
387   // will be returned to the user through the encode complete callback.
388   //
389   // Input:
390   //          - frame             : Image to be encoded
391   //          - frame_types       : Frame type to be generated by the encoder.
392   //
393   // Return value                 : WEBRTC_VIDEO_CODEC_OK if OK
394   //                                <0 - Errors:
395   //                                  WEBRTC_VIDEO_CODEC_ERR_PARAMETER
396   //                                  WEBRTC_VIDEO_CODEC_MEMORY
397   //                                  WEBRTC_VIDEO_CODEC_ERROR
398   virtual int32_t Encode(const VideoFrame& frame,
399                          const std::vector<VideoFrameType>* frame_types) = 0;
400 
401   // Sets rate control parameters: bitrate, framerate, etc. These settings are
402   // instantaneous (i.e. not moving averages) and should apply from now until
403   // the next call to SetRates().
404   virtual void SetRates(const RateControlParameters& parameters) = 0;
405 
406   // Inform the encoder when the packet loss rate changes.
407   //
408   // Input:   - packet_loss_rate  : The packet loss rate (0.0 to 1.0).
409   virtual void OnPacketLossRateUpdate(float packet_loss_rate);
410 
411   // Inform the encoder when the round trip time changes.
412   //
413   // Input:   - rtt_ms            : The new RTT, in milliseconds.
414   virtual void OnRttUpdate(int64_t rtt_ms);
415 
416   // Called when a loss notification is received.
417   virtual void OnLossNotification(const LossNotification& loss_notification);
418 
419   // Returns meta-data about the encoder, such as implementation name.
420   // The output of this method may change during runtime. For instance if a
421   // hardware encoder fails, it may fall back to doing software encoding using
422   // an implementation with different characteristics.
423   virtual EncoderInfo GetEncoderInfo() const;
424 };
425 }  // namespace webrtc
426 #endif  // API_VIDEO_CODECS_VIDEO_ENCODER_H_
427