1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_ 12 #define API_VIDEO_CODECS_VIDEO_ENCODER_H_ 13 14 #include <limits> 15 #include <memory> 16 #include <string> 17 #include <vector> 18 19 #include "absl/container/inlined_vector.h" 20 #include "absl/types/optional.h" 21 #include "api/fec_controller_override.h" 22 #include "api/units/data_rate.h" 23 #include "api/video/encoded_image.h" 24 #include "api/video/video_bitrate_allocation.h" 25 #include "api/video/video_codec_constants.h" 26 #include "api/video/video_frame.h" 27 #include "api/video_codecs/video_codec.h" 28 #include "rtc_base/checks.h" 29 #include "rtc_base/system/rtc_export.h" 30 31 namespace webrtc { 32 33 // TODO(pbos): Expose these through a public (root) header or change these APIs. 34 struct CodecSpecificInfo; 35 36 constexpr int kDefaultMinPixelsPerFrame = 320 * 180; 37 38 class RTC_EXPORT EncodedImageCallback { 39 public: ~EncodedImageCallback()40 virtual ~EncodedImageCallback() {} 41 42 struct Result { 43 enum Error { 44 OK, 45 46 // Failed to send the packet. 47 ERROR_SEND_FAILED, 48 }; 49 ResultResult50 explicit Result(Error error) : error(error) {} ResultResult51 Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {} 52 53 Error error; 54 55 // Frame ID assigned to the frame. The frame ID should be the same as the ID 56 // seen by the receiver for this frame. RTP timestamp of the frame is used 57 // as frame ID when RTP is used to send video. Must be used only when 58 // error=OK. 59 uint32_t frame_id = 0; 60 61 // Tells the encoder that the next frame is should be dropped. 62 bool drop_next_frame = false; 63 }; 64 65 // Used to signal the encoder about reason a frame is dropped. 66 // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate 67 // limiting purposes). 68 // kDroppedByEncoder - dropped by encoder's internal rate limiter. 69 // TODO(bugs.webrtc.org/10164): Delete this enum? It duplicates the more 70 // general VideoStreamEncoderObserver::DropReason. Also, 71 // kDroppedByMediaOptimizations is not produced by any encoder, but by 72 // VideoStreamEncoder. 73 enum class DropReason : uint8_t { 74 kDroppedByMediaOptimizations, 75 kDroppedByEncoder 76 }; 77 78 // Callback function which is called when an image has been encoded. 79 virtual Result OnEncodedImage( 80 const EncodedImage& encoded_image, 81 const CodecSpecificInfo* codec_specific_info) = 0; 82 OnDroppedFrame(DropReason reason)83 virtual void OnDroppedFrame(DropReason reason) {} 84 }; 85 86 class RTC_EXPORT VideoEncoder { 87 public: 88 struct QpThresholds { QpThresholdsQpThresholds89 QpThresholds(int l, int h) : low(l), high(h) {} QpThresholdsQpThresholds90 QpThresholds() : low(-1), high(-1) {} 91 int low; 92 int high; 93 }; 94 95 // Quality scaling is enabled if thresholds are provided. 96 struct RTC_EXPORT ScalingSettings { 97 private: 98 // Private magic type for kOff, implicitly convertible to 99 // ScalingSettings. 100 struct KOff {}; 101 102 public: 103 // TODO(bugs.webrtc.org/9078): Since absl::optional should be trivially copy 104 // constructible, this magic value can likely be replaced by a constexpr 105 // ScalingSettings value. 106 static constexpr KOff kOff = {}; 107 108 ScalingSettings(int low, int high); 109 ScalingSettings(int low, int high, int min_pixels); 110 ScalingSettings(const ScalingSettings&); 111 ScalingSettings(KOff); // NOLINT(runtime/explicit) 112 ~ScalingSettings(); 113 114 absl::optional<QpThresholds> thresholds; 115 116 // We will never ask for a resolution lower than this. 117 // TODO(kthelgason): Lower this limit when better testing 118 // on MediaCodec and fallback implementations are in place. 119 // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206 120 int min_pixels_per_frame = kDefaultMinPixelsPerFrame; 121 122 private: 123 // Private constructor; to get an object without thresholds, use 124 // the magic constant ScalingSettings::kOff. 125 ScalingSettings(); 126 }; 127 128 // Bitrate limits for resolution. 129 struct ResolutionBitrateLimits { ResolutionBitrateLimitsResolutionBitrateLimits130 ResolutionBitrateLimits(int frame_size_pixels, 131 int min_start_bitrate_bps, 132 int min_bitrate_bps, 133 int max_bitrate_bps) 134 : frame_size_pixels(frame_size_pixels), 135 min_start_bitrate_bps(min_start_bitrate_bps), 136 min_bitrate_bps(min_bitrate_bps), 137 max_bitrate_bps(max_bitrate_bps) {} 138 // Size of video frame, in pixels, the bitrate thresholds are intended for. 139 int frame_size_pixels = 0; 140 // Recommended minimum bitrate to start encoding. 141 int min_start_bitrate_bps = 0; 142 // Recommended minimum bitrate. 143 int min_bitrate_bps = 0; 144 // Recommended maximum bitrate. 145 int max_bitrate_bps = 0; 146 147 bool operator==(const ResolutionBitrateLimits& rhs) const; 148 bool operator!=(const ResolutionBitrateLimits& rhs) const { 149 return !(*this == rhs); 150 } 151 }; 152 153 // Struct containing metadata about the encoder implementing this interface. 154 struct RTC_EXPORT EncoderInfo { 155 static constexpr uint8_t kMaxFramerateFraction = 156 std::numeric_limits<uint8_t>::max(); 157 158 EncoderInfo(); 159 EncoderInfo(const EncoderInfo&); 160 161 ~EncoderInfo(); 162 163 std::string ToString() const; 164 bool operator==(const EncoderInfo& rhs) const; 165 bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); } 166 167 // Any encoder implementation wishing to use the WebRTC provided 168 // quality scaler must populate this field. 169 ScalingSettings scaling_settings; 170 171 // The width and height of the incoming video frames should be divisible 172 // by `requested_resolution_alignment`. If they are not, the encoder may 173 // drop the incoming frame. 174 // For example: With I420, this value would be a multiple of 2. 175 // Note that this field is unrelated to any horizontal or vertical stride 176 // requirements the encoder has on the incoming video frame buffers. 177 int requested_resolution_alignment; 178 179 // Same as above but if true, each simulcast layer should also be divisible 180 // by `requested_resolution_alignment`. 181 // Note that scale factors `scale_resolution_down_by` may be adjusted so a 182 // common multiple is not too large to avoid largely cropped frames and 183 // possibly with an aspect ratio far from the original. 184 // Warning: large values of scale_resolution_down_by could be changed 185 // considerably, especially if `requested_resolution_alignment` is large. 186 bool apply_alignment_to_all_simulcast_layers; 187 188 // If true, encoder supports working with a native handle (e.g. texture 189 // handle for hw codecs) rather than requiring a raw I420 buffer. 190 bool supports_native_handle; 191 192 // The name of this particular encoder implementation, e.g. "libvpx". 193 std::string implementation_name; 194 195 // If this field is true, the encoder rate controller must perform 196 // well even in difficult situations, and produce close to the specified 197 // target bitrate seen over a reasonable time window, drop frames if 198 // necessary in order to keep the rate correct, and react quickly to 199 // changing bitrate targets. If this method returns true, we disable the 200 // frame dropper in the media optimization module and rely entirely on the 201 // encoder to produce media at a bitrate that closely matches the target. 202 // Any overshooting may result in delay buildup. If this method returns 203 // false (default behavior), the media opt frame dropper will drop input 204 // frames if it suspect encoder misbehavior. Misbehavior is common, 205 // especially in hardware codecs. Disable media opt at your own risk. 206 bool has_trusted_rate_controller; 207 208 // If this field is true, the encoder uses hardware support and different 209 // thresholds will be used in CPU adaptation. 210 bool is_hardware_accelerated; 211 212 // For each spatial layer (simulcast stream or SVC layer), represented as an 213 // element in `fps_allocation` a vector indicates how many temporal layers 214 // the encoder is using for that spatial layer. 215 // For each spatial/temporal layer pair, the frame rate fraction is given as 216 // an 8bit unsigned integer where 0 = 0% and 255 = 100%. 217 // 218 // If the vector is empty for a given spatial layer, it indicates that frame 219 // rates are not defined and we can't count on any specific frame rate to be 220 // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic. 221 // 222 // The encoder may update this on a per-frame basis in response to both 223 // internal and external signals. 224 // 225 // Spatial layers are treated independently, but temporal layers are 226 // cumulative. For instance, if: 227 // fps_allocation[0][0] = kFullFramerate / 2; 228 // fps_allocation[0][1] = kFullFramerate; 229 // Then half of the frames are in the base layer and half is in TL1, but 230 // since TL1 is assumed to depend on the base layer, the frame rate is 231 // indicated as the full 100% for the top layer. 232 // 233 // Defaults to a single spatial layer containing a single temporal layer 234 // with a 100% frame rate fraction. 235 absl::InlinedVector<uint8_t, kMaxTemporalStreams> 236 fps_allocation[kMaxSpatialLayers]; 237 238 // Recommended bitrate limits for different resolutions. 239 std::vector<ResolutionBitrateLimits> resolution_bitrate_limits; 240 241 // Obtains the limits from `resolution_bitrate_limits` that best matches the 242 // `frame_size_pixels`. 243 absl::optional<ResolutionBitrateLimits> 244 GetEncoderBitrateLimitsForResolution(int frame_size_pixels) const; 245 246 // If true, this encoder has internal support for generating simulcast 247 // streams. Otherwise, an adapter class will be needed. 248 // Even if true, the config provided to InitEncode() might not be supported, 249 // in such case the encoder should return 250 // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED. 251 bool supports_simulcast; 252 253 // The list of pixel formats preferred by the encoder. It is assumed that if 254 // the list is empty and supports_native_handle is false, then {I420} is the 255 // preferred pixel format. The order of the formats does not matter. 256 absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats> 257 preferred_pixel_formats; 258 259 // Indicates whether or not QP value encoder writes into frame/slice/tile 260 // header can be interpreted as average frame/slice/tile QP. 261 absl::optional<bool> is_qp_trusted; 262 }; 263 264 struct RTC_EXPORT RateControlParameters { 265 RateControlParameters(); 266 RateControlParameters(const VideoBitrateAllocation& bitrate, 267 double framerate_fps); 268 RateControlParameters(const VideoBitrateAllocation& bitrate, 269 double framerate_fps, 270 DataRate bandwidth_allocation); 271 virtual ~RateControlParameters(); 272 273 // Target bitrate, per spatial/temporal layer. 274 // A target bitrate of 0bps indicates a layer should not be encoded at all. 275 VideoBitrateAllocation target_bitrate; 276 // Adjusted target bitrate, per spatial/temporal layer. May be lower or 277 // higher than the target depending on encoder behaviour. 278 VideoBitrateAllocation bitrate; 279 // Target framerate, in fps. A value <= 0.0 is invalid and should be 280 // interpreted as framerate target not available. In this case the encoder 281 // should fall back to the max framerate specified in `codec_settings` of 282 // the last InitEncode() call. 283 double framerate_fps; 284 // The network bandwidth available for video. This is at least 285 // `bitrate.get_sum_bps()`, but may be higher if the application is not 286 // network constrained. 287 DataRate bandwidth_allocation; 288 289 bool operator==(const RateControlParameters& rhs) const; 290 bool operator!=(const RateControlParameters& rhs) const; 291 }; 292 293 struct LossNotification { 294 // The timestamp of the last decodable frame *prior* to the last received. 295 // (The last received - described below - might itself be decodable or not.) 296 uint32_t timestamp_of_last_decodable; 297 // The timestamp of the last received frame. 298 uint32_t timestamp_of_last_received; 299 // Describes whether the dependencies of the last received frame were 300 // all decodable. 301 // `false` if some dependencies were undecodable, `true` if all dependencies 302 // were decodable, and `nullopt` if the dependencies are unknown. 303 absl::optional<bool> dependencies_of_last_received_decodable; 304 // Describes whether the received frame was decodable. 305 // `false` if some dependency was undecodable or if some packet belonging 306 // to the last received frame was missed. 307 // `true` if all dependencies were decodable and all packets belonging 308 // to the last received frame were received. 309 // `nullopt` if no packet belonging to the last frame was missed, but the 310 // last packet in the frame was not yet received. 311 absl::optional<bool> last_received_decodable; 312 }; 313 314 // Negotiated capabilities which the VideoEncoder may expect the other 315 // side to use. 316 struct Capabilities { CapabilitiesCapabilities317 explicit Capabilities(bool loss_notification) 318 : loss_notification(loss_notification) {} 319 bool loss_notification; 320 }; 321 322 struct Settings { SettingsSettings323 Settings(const Capabilities& capabilities, 324 int number_of_cores, 325 size_t max_payload_size) 326 : capabilities(capabilities), 327 number_of_cores(number_of_cores), 328 max_payload_size(max_payload_size) {} 329 330 Capabilities capabilities; 331 int number_of_cores; 332 size_t max_payload_size; 333 }; 334 335 static VideoCodecVP8 GetDefaultVp8Settings(); 336 static VideoCodecVP9 GetDefaultVp9Settings(); 337 static VideoCodecH264 GetDefaultH264Settings(); 338 ~VideoEncoder()339 virtual ~VideoEncoder() {} 340 341 // Set a FecControllerOverride, through which the encoder may override 342 // decisions made by FecController. 343 // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual. 344 virtual void SetFecControllerOverride( 345 FecControllerOverride* fec_controller_override); 346 347 // Initialize the encoder with the information from the codecSettings 348 // 349 // Input: 350 // - codec_settings : Codec settings 351 // - settings : Settings affecting the encoding itself. 352 // Input for deprecated version: 353 // - number_of_cores : Number of cores available for the encoder 354 // - max_payload_size : The maximum size each payload is allowed 355 // to have. Usually MTU - overhead. 356 // 357 // Return value : Set bit rate if OK 358 // <0 - Errors: 359 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER 360 // WEBRTC_VIDEO_CODEC_ERR_SIZE 361 // WEBRTC_VIDEO_CODEC_MEMORY 362 // WEBRTC_VIDEO_CODEC_ERROR 363 // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting 364 // an announcement to discuss-webrtc, remove the three-parameters variant 365 // and make the two-parameters variant pure-virtual. 366 /* ABSL_DEPRECATED("bugs.webrtc.org/10720") */ virtual int32_t InitEncode( 367 const VideoCodec* codec_settings, 368 int32_t number_of_cores, 369 size_t max_payload_size); 370 virtual int InitEncode(const VideoCodec* codec_settings, 371 const VideoEncoder::Settings& settings); 372 373 // Register an encode complete callback object. 374 // 375 // Input: 376 // - callback : Callback object which handles encoded images. 377 // 378 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise. 379 virtual int32_t RegisterEncodeCompleteCallback( 380 EncodedImageCallback* callback) = 0; 381 382 // Free encoder memory. 383 // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise. 384 virtual int32_t Release() = 0; 385 386 // Encode an image (as a part of a video stream). The encoded image 387 // will be returned to the user through the encode complete callback. 388 // 389 // Input: 390 // - frame : Image to be encoded 391 // - frame_types : Frame type to be generated by the encoder. 392 // 393 // Return value : WEBRTC_VIDEO_CODEC_OK if OK 394 // <0 - Errors: 395 // WEBRTC_VIDEO_CODEC_ERR_PARAMETER 396 // WEBRTC_VIDEO_CODEC_MEMORY 397 // WEBRTC_VIDEO_CODEC_ERROR 398 virtual int32_t Encode(const VideoFrame& frame, 399 const std::vector<VideoFrameType>* frame_types) = 0; 400 401 // Sets rate control parameters: bitrate, framerate, etc. These settings are 402 // instantaneous (i.e. not moving averages) and should apply from now until 403 // the next call to SetRates(). 404 virtual void SetRates(const RateControlParameters& parameters) = 0; 405 406 // Inform the encoder when the packet loss rate changes. 407 // 408 // Input: - packet_loss_rate : The packet loss rate (0.0 to 1.0). 409 virtual void OnPacketLossRateUpdate(float packet_loss_rate); 410 411 // Inform the encoder when the round trip time changes. 412 // 413 // Input: - rtt_ms : The new RTT, in milliseconds. 414 virtual void OnRttUpdate(int64_t rtt_ms); 415 416 // Called when a loss notification is received. 417 virtual void OnLossNotification(const LossNotification& loss_notification); 418 419 // Returns meta-data about the encoder, such as implementation name. 420 // The output of this method may change during runtime. For instance if a 421 // hardware encoder fails, it may fall back to doing software encoding using 422 // an implementation with different characteristics. 423 virtual EncoderInfo GetEncoderInfo() const; 424 }; 425 } // namespace webrtc 426 #endif // API_VIDEO_CODECS_VIDEO_ENCODER_H_ 427