1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ 12 #define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ 13 14 #include "webrtc/common_types.h" 15 #include "webrtc/typedefs.h" 16 17 /******************************************************/ 18 /* Quality Modes: Resolution and Robustness settings */ 19 /******************************************************/ 20 21 namespace webrtc { 22 struct VideoContentMetrics; 23 24 struct VCMResolutionScale { VCMResolutionScaleVCMResolutionScale25 VCMResolutionScale() 26 : codec_width(640), 27 codec_height(480), 28 frame_rate(30.0f), 29 spatial_width_fact(1.0f), 30 spatial_height_fact(1.0f), 31 temporal_fact(1.0f), 32 change_resolution_spatial(false), 33 change_resolution_temporal(false) {} 34 uint16_t codec_width; 35 uint16_t codec_height; 36 float frame_rate; 37 float spatial_width_fact; 38 float spatial_height_fact; 39 float temporal_fact; 40 bool change_resolution_spatial; 41 bool change_resolution_temporal; 42 }; 43 44 enum ImageType { 45 kQCIF = 0, // 176x144 46 kHCIF, // 264x216 = half(~3/4x3/4) CIF. 47 kQVGA, // 320x240 = quarter VGA. 48 kCIF, // 352x288 49 kHVGA, // 480x360 = half(~3/4x3/4) VGA. 50 kVGA, // 640x480 51 kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD. 52 kWHD, // 1280x720 53 kFULLHD, // 1920x1080 54 kNumImageTypes 55 }; 56 57 const uint32_t kSizeOfImageType[kNumImageTypes] = { 58 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600}; 59 60 enum FrameRateLevelClass { 61 kFrameRateLow, 62 kFrameRateMiddle1, 63 kFrameRateMiddle2, 64 kFrameRateHigh 65 }; 66 67 enum ContentLevelClass { kLow, kHigh, kDefault }; 68 69 struct VCMContFeature { VCMContFeatureVCMContFeature70 VCMContFeature() : value(0.0f), level(kDefault) {} ResetVCMContFeature71 void Reset() { 72 value = 0.0f; 73 level = kDefault; 74 } 75 float value; 76 ContentLevelClass level; 77 }; 78 79 enum UpDownAction { kUpResolution, kDownResolution }; 80 81 enum SpatialAction { 82 kNoChangeSpatial, 83 kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction. 84 kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction. 85 kNumModesSpatial 86 }; 87 88 enum TemporalAction { 89 kNoChangeTemporal, 90 kTwoThirdsTemporal, // 2/3 frame rate reduction 91 kOneHalfTemporal, // 1/2 frame rate reduction 92 kNumModesTemporal 93 }; 94 95 struct ResolutionAction { ResolutionActionResolutionAction96 ResolutionAction() : spatial(kNoChangeSpatial), temporal(kNoChangeTemporal) {} 97 SpatialAction spatial; 98 TemporalAction temporal; 99 }; 100 101 // Down-sampling factors for spatial (width and height), and temporal. 102 const float kFactorWidthSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f}; 103 104 const float kFactorHeightSpatial[kNumModesSpatial] = {1.0f, 4.0f / 3.0f, 2.0f}; 105 106 const float kFactorTemporal[kNumModesTemporal] = {1.0f, 1.5f, 2.0f}; 107 108 enum EncoderState { 109 kStableEncoding, // Low rate mis-match, stable buffer levels. 110 kStressedEncoding, // Significant over-shooting of target rate, 111 // Buffer under-flow, etc. 112 kEasyEncoding // Significant under-shooting of target rate. 113 }; 114 115 // QmMethod class: main class for resolution and robustness settings 116 117 class VCMQmMethod { 118 public: 119 VCMQmMethod(); 120 virtual ~VCMQmMethod(); 121 122 // Reset values 123 void ResetQM(); 124 virtual void Reset() = 0; 125 126 // Compute content class. 127 uint8_t ComputeContentClass(); 128 129 // Update with the content metrics. 130 void UpdateContent(const VideoContentMetrics* content_metrics); 131 132 // Compute spatial texture magnitude and level. 133 // Spatial texture is a spatial prediction error measure. 134 void ComputeSpatial(); 135 136 // Compute motion magnitude and level for NFD metric. 137 // NFD is normalized frame difference (normalized by spatial variance). 138 void ComputeMotionNFD(); 139 140 // Get the imageType (CIF, VGA, HD, etc) for the system width/height. 141 ImageType GetImageType(uint16_t width, uint16_t height); 142 143 // Return the closest image type. 144 ImageType FindClosestImageType(uint16_t width, uint16_t height); 145 146 // Get the frame rate level. 147 FrameRateLevelClass FrameRateLevel(float frame_rate); 148 149 protected: 150 // Content Data. 151 const VideoContentMetrics* content_metrics_; 152 153 // Encoder frame sizes and native frame sizes. 154 uint16_t width_; 155 uint16_t height_; 156 float user_frame_rate_; 157 uint16_t native_width_; 158 uint16_t native_height_; 159 float native_frame_rate_; 160 float aspect_ratio_; 161 // Image type and frame rate leve, for the current encoder resolution. 162 ImageType image_type_; 163 FrameRateLevelClass framerate_level_; 164 // Content class data. 165 VCMContFeature motion_; 166 VCMContFeature spatial_; 167 uint8_t content_class_; 168 bool init_; 169 }; 170 171 // Resolution settings class 172 173 class VCMQmResolution : public VCMQmMethod { 174 public: 175 VCMQmResolution(); 176 virtual ~VCMQmResolution(); 177 178 // Reset all quantities. 179 virtual void Reset(); 180 181 // Reset rate quantities and counters after every SelectResolution() call. 182 void ResetRates(); 183 184 // Reset down-sampling state. 185 void ResetDownSamplingState(); 186 187 // Get the encoder state. 188 EncoderState GetEncoderState(); 189 190 // Initialize after SetEncodingData in media_opt. 191 int Initialize(float bitrate, 192 float user_framerate, 193 uint16_t width, 194 uint16_t height, 195 int num_layers); 196 197 // Update the encoder frame size. 198 void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height); 199 200 // Update with actual bit rate (size of the latest encoded frame) 201 // and frame type, after every encoded frame. 202 void UpdateEncodedSize(size_t encoded_size); 203 204 // Update with new target bitrate, actual encoder sent rate, frame_rate, 205 // loss rate: every ~1 sec from SetTargetRates in media_opt. 206 void UpdateRates(float target_bitrate, 207 float encoder_sent_rate, 208 float incoming_framerate, 209 uint8_t packet_loss); 210 211 // Extract ST (spatio-temporal) resolution action. 212 // Inputs: qm: Reference to the quality modes pointer. 213 // Output: the spatial and/or temporal scale change. 214 int SelectResolution(VCMResolutionScale** qm); 215 216 private: 217 // Set the default resolution action. 218 void SetDefaultAction(); 219 220 // Compute rates for the selection of down-sampling action. 221 void ComputeRatesForSelection(); 222 223 // Compute the encoder state. 224 void ComputeEncoderState(); 225 226 // Return true if the action is to go back up in resolution. 227 bool GoingUpResolution(); 228 229 // Return true if the action is to go down in resolution. 230 bool GoingDownResolution(); 231 232 // Check the condition for going up in resolution by the scale factors: 233 // |facWidth|, |facHeight|, |facTemp|. 234 // |scaleFac| is a scale factor for the transition rate. 235 bool ConditionForGoingUp(float fac_width, 236 float fac_height, 237 float fac_temp, 238 float scale_fac); 239 240 // Get the bitrate threshold for the resolution action. 241 // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action. 242 // |scaleFac| is a scale factor for the transition rate. 243 float GetTransitionRate(float fac_width, 244 float fac_height, 245 float fac_temp, 246 float scale_fac); 247 248 // Update the down-sampling state. 249 void UpdateDownsamplingState(UpDownAction up_down); 250 251 // Update the codec frame size and frame rate. 252 void UpdateCodecResolution(); 253 254 // Return a state based on average target rate relative transition rate. 255 uint8_t RateClass(float transition_rate); 256 257 // Adjust the action selected from the table. 258 void AdjustAction(); 259 260 // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2. 261 void ConvertSpatialFractionalToWhole(); 262 263 // Returns true if the new frame sizes, under the selected spatial action, 264 // are of even size. 265 bool EvenFrameSize(); 266 267 // Insert latest down-sampling action into the history list. 268 void InsertLatestDownAction(); 269 270 // Remove the last (first element) down-sampling action from the list. 271 void RemoveLastDownAction(); 272 273 // Check constraints on the amount of down-sampling allowed. 274 void ConstrainAmountOfDownSampling(); 275 276 // For going up in resolution: pick spatial or temporal action, 277 // if both actions were separately selected. 278 void PickSpatialOrTemporal(); 279 280 // Select the directional (1x2 or 2x1) spatial down-sampling action. 281 void SelectSpatialDirectionMode(float transition_rate); 282 283 enum { kDownActionHistorySize = 10 }; 284 285 VCMResolutionScale* qm_; 286 // Encoder rate control parameters. 287 float target_bitrate_; 288 float incoming_framerate_; 289 float per_frame_bandwidth_; 290 float buffer_level_; 291 292 // Data accumulated every ~1sec from MediaOpt. 293 float sum_target_rate_; 294 float sum_incoming_framerate_; 295 float sum_rate_MM_; 296 float sum_rate_MM_sgn_; 297 float sum_packet_loss_; 298 // Counters. 299 uint32_t frame_cnt_; 300 uint32_t frame_cnt_delta_; 301 uint32_t update_rate_cnt_; 302 uint32_t low_buffer_cnt_; 303 304 // Resolution state parameters. 305 float state_dec_factor_spatial_; 306 float state_dec_factor_temporal_; 307 308 // Quantities used for selection. 309 float avg_target_rate_; 310 float avg_incoming_framerate_; 311 float avg_ratio_buffer_low_; 312 float avg_rate_mismatch_; 313 float avg_rate_mismatch_sgn_; 314 float avg_packet_loss_; 315 EncoderState encoder_state_; 316 ResolutionAction action_; 317 // Short history of the down-sampling actions from the Initialize() state. 318 // This is needed for going up in resolution. Since the total amount of 319 // down-sampling actions are constrained, the length of the list need not be 320 // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample. 321 ResolutionAction down_action_history_[kDownActionHistorySize]; 322 int num_layers_; 323 }; 324 325 // Robustness settings class. 326 327 class VCMQmRobustness : public VCMQmMethod { 328 public: 329 VCMQmRobustness(); 330 ~VCMQmRobustness(); 331 332 virtual void Reset(); 333 334 // Adjust FEC rate based on content: every ~1 sec from SetTargetRates. 335 // Returns an adjustment factor. 336 float AdjustFecFactor(uint8_t code_rate_delta, 337 float total_rate, 338 float framerate, 339 int64_t rtt_time, 340 uint8_t packet_loss); 341 342 // Set the UEP protection on/off. 343 bool SetUepProtection(uint8_t code_rate_delta, 344 float total_rate, 345 uint8_t packet_loss, 346 bool frame_type); 347 348 private: 349 // Previous state of network parameters. 350 float prev_total_rate_; 351 int64_t prev_rtt_time_; 352 uint8_t prev_packet_loss_; 353 uint8_t prev_code_rate_delta_; 354 }; 355 } // namespace webrtc 356 #endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ 357