1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ 12 #define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ 13 14 #include "webrtc/common_types.h" 15 #include "webrtc/typedefs.h" 16 17 /******************************************************/ 18 /* Quality Modes: Resolution and Robustness settings */ 19 /******************************************************/ 20 21 namespace webrtc { 22 struct VideoContentMetrics; 23 24 struct VCMResolutionScale { VCMResolutionScaleVCMResolutionScale25 VCMResolutionScale() 26 : codec_width(640), 27 codec_height(480), 28 frame_rate(30.0f), 29 spatial_width_fact(1.0f), 30 spatial_height_fact(1.0f), 31 temporal_fact(1.0f), 32 change_resolution_spatial(false), 33 change_resolution_temporal(false) { 34 } 35 uint16_t codec_width; 36 uint16_t codec_height; 37 float frame_rate; 38 float spatial_width_fact; 39 float spatial_height_fact; 40 float temporal_fact; 41 bool change_resolution_spatial; 42 bool change_resolution_temporal; 43 }; 44 45 enum ImageType { 46 kQCIF = 0, // 176x144 47 kHCIF, // 264x216 = half(~3/4x3/4) CIF. 48 kQVGA, // 320x240 = quarter VGA. 49 kCIF, // 352x288 50 kHVGA, // 480x360 = half(~3/4x3/4) VGA. 51 kVGA, // 640x480 52 kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD. 53 kWHD, // 1280x720 54 kFULLHD, // 1920x1080 55 kNumImageTypes 56 }; 57 58 const uint32_t kSizeOfImageType[kNumImageTypes] = 59 { 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600 }; 60 61 enum FrameRateLevelClass { 62 kFrameRateLow, 63 kFrameRateMiddle1, 64 kFrameRateMiddle2, 65 kFrameRateHigh 66 }; 67 68 enum ContentLevelClass { 69 kLow, 70 kHigh, 71 kDefault 72 }; 73 74 struct VCMContFeature { VCMContFeatureVCMContFeature75 VCMContFeature() 76 : value(0.0f), 77 level(kDefault) { 78 } ResetVCMContFeature79 void Reset() { 80 value = 0.0f; 81 level = kDefault; 82 } 83 float value; 84 ContentLevelClass level; 85 }; 86 87 enum UpDownAction { 88 kUpResolution, 89 kDownResolution 90 }; 91 92 enum SpatialAction { 93 kNoChangeSpatial, 94 kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction. 95 kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction. 96 kNumModesSpatial 97 }; 98 99 enum TemporalAction { 100 kNoChangeTemporal, 101 kTwoThirdsTemporal, // 2/3 frame rate reduction 102 kOneHalfTemporal, // 1/2 frame rate reduction 103 kNumModesTemporal 104 }; 105 106 struct ResolutionAction { ResolutionActionResolutionAction107 ResolutionAction() 108 : spatial(kNoChangeSpatial), 109 temporal(kNoChangeTemporal) { 110 } 111 SpatialAction spatial; 112 TemporalAction temporal; 113 }; 114 115 // Down-sampling factors for spatial (width and height), and temporal. 116 const float kFactorWidthSpatial[kNumModesSpatial] = 117 { 1.0f, 4.0f / 3.0f, 2.0f }; 118 119 const float kFactorHeightSpatial[kNumModesSpatial] = 120 { 1.0f, 4.0f / 3.0f, 2.0f }; 121 122 const float kFactorTemporal[kNumModesTemporal] = 123 { 1.0f, 1.5f, 2.0f }; 124 125 enum EncoderState { 126 kStableEncoding, // Low rate mis-match, stable buffer levels. 127 kStressedEncoding, // Significant over-shooting of target rate, 128 // Buffer under-flow, etc. 129 kEasyEncoding // Significant under-shooting of target rate. 130 }; 131 132 // QmMethod class: main class for resolution and robustness settings 133 134 class VCMQmMethod { 135 public: 136 VCMQmMethod(); 137 virtual ~VCMQmMethod(); 138 139 // Reset values 140 void ResetQM(); 141 virtual void Reset() = 0; 142 143 // Compute content class. 144 uint8_t ComputeContentClass(); 145 146 // Update with the content metrics. 147 void UpdateContent(const VideoContentMetrics* content_metrics); 148 149 // Compute spatial texture magnitude and level. 150 // Spatial texture is a spatial prediction error measure. 151 void ComputeSpatial(); 152 153 // Compute motion magnitude and level for NFD metric. 154 // NFD is normalized frame difference (normalized by spatial variance). 155 void ComputeMotionNFD(); 156 157 // Get the imageType (CIF, VGA, HD, etc) for the system width/height. 158 ImageType GetImageType(uint16_t width, uint16_t height); 159 160 // Return the closest image type. 161 ImageType FindClosestImageType(uint16_t width, uint16_t height); 162 163 // Get the frame rate level. 164 FrameRateLevelClass FrameRateLevel(float frame_rate); 165 166 protected: 167 // Content Data. 168 const VideoContentMetrics* content_metrics_; 169 170 // Encoder frame sizes and native frame sizes. 171 uint16_t width_; 172 uint16_t height_; 173 float user_frame_rate_; 174 uint16_t native_width_; 175 uint16_t native_height_; 176 float native_frame_rate_; 177 float aspect_ratio_; 178 // Image type and frame rate leve, for the current encoder resolution. 179 ImageType image_type_; 180 FrameRateLevelClass framerate_level_; 181 // Content class data. 182 VCMContFeature motion_; 183 VCMContFeature spatial_; 184 uint8_t content_class_; 185 bool init_; 186 }; 187 188 // Resolution settings class 189 190 class VCMQmResolution : public VCMQmMethod { 191 public: 192 VCMQmResolution(); 193 virtual ~VCMQmResolution(); 194 195 // Reset all quantities. 196 virtual void Reset(); 197 198 // Reset rate quantities and counters after every SelectResolution() call. 199 void ResetRates(); 200 201 // Reset down-sampling state. 202 void ResetDownSamplingState(); 203 204 // Get the encoder state. 205 EncoderState GetEncoderState(); 206 207 // Initialize after SetEncodingData in media_opt. 208 int Initialize(float bitrate, 209 float user_framerate, 210 uint16_t width, 211 uint16_t height, 212 int num_layers); 213 214 // Update the encoder frame size. 215 void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height); 216 217 // Update with actual bit rate (size of the latest encoded frame) 218 // and frame type, after every encoded frame. 219 void UpdateEncodedSize(int encoded_size, 220 FrameType encoded_frame_type); 221 222 // Update with new target bitrate, actual encoder sent rate, frame_rate, 223 // loss rate: every ~1 sec from SetTargetRates in media_opt. 224 void UpdateRates(float target_bitrate, 225 float encoder_sent_rate, 226 float incoming_framerate, 227 uint8_t packet_loss); 228 229 // Extract ST (spatio-temporal) resolution action. 230 // Inputs: qm: Reference to the quality modes pointer. 231 // Output: the spatial and/or temporal scale change. 232 int SelectResolution(VCMResolutionScale** qm); 233 234 private: 235 // Set the default resolution action. 236 void SetDefaultAction(); 237 238 // Compute rates for the selection of down-sampling action. 239 void ComputeRatesForSelection(); 240 241 // Compute the encoder state. 242 void ComputeEncoderState(); 243 244 // Return true if the action is to go back up in resolution. 245 bool GoingUpResolution(); 246 247 // Return true if the action is to go down in resolution. 248 bool GoingDownResolution(); 249 250 // Check the condition for going up in resolution by the scale factors: 251 // |facWidth|, |facHeight|, |facTemp|. 252 // |scaleFac| is a scale factor for the transition rate. 253 bool ConditionForGoingUp(float fac_width, 254 float fac_height, 255 float fac_temp, 256 float scale_fac); 257 258 // Get the bitrate threshold for the resolution action. 259 // The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action. 260 // |scaleFac| is a scale factor for the transition rate. 261 float GetTransitionRate(float fac_width, 262 float fac_height, 263 float fac_temp, 264 float scale_fac); 265 266 // Update the down-sampling state. 267 void UpdateDownsamplingState(UpDownAction up_down); 268 269 // Update the codec frame size and frame rate. 270 void UpdateCodecResolution(); 271 272 // Return a state based on average target rate relative transition rate. 273 uint8_t RateClass(float transition_rate); 274 275 // Adjust the action selected from the table. 276 void AdjustAction(); 277 278 // Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2. 279 void ConvertSpatialFractionalToWhole(); 280 281 // Returns true if the new frame sizes, under the selected spatial action, 282 // are of even size. 283 bool EvenFrameSize(); 284 285 // Insert latest down-sampling action into the history list. 286 void InsertLatestDownAction(); 287 288 // Remove the last (first element) down-sampling action from the list. 289 void RemoveLastDownAction(); 290 291 // Check constraints on the amount of down-sampling allowed. 292 void ConstrainAmountOfDownSampling(); 293 294 // For going up in resolution: pick spatial or temporal action, 295 // if both actions were separately selected. 296 void PickSpatialOrTemporal(); 297 298 // Select the directional (1x2 or 2x1) spatial down-sampling action. 299 void SelectSpatialDirectionMode(float transition_rate); 300 301 enum { kDownActionHistorySize = 10}; 302 303 VCMResolutionScale* qm_; 304 // Encoder rate control parameters. 305 float target_bitrate_; 306 float incoming_framerate_; 307 float per_frame_bandwidth_; 308 float buffer_level_; 309 310 // Data accumulated every ~1sec from MediaOpt. 311 float sum_target_rate_; 312 float sum_incoming_framerate_; 313 float sum_rate_MM_; 314 float sum_rate_MM_sgn_; 315 float sum_packet_loss_; 316 // Counters. 317 uint32_t frame_cnt_; 318 uint32_t frame_cnt_delta_; 319 uint32_t update_rate_cnt_; 320 uint32_t low_buffer_cnt_; 321 322 // Resolution state parameters. 323 float state_dec_factor_spatial_; 324 float state_dec_factor_temporal_; 325 326 // Quantities used for selection. 327 float avg_target_rate_; 328 float avg_incoming_framerate_; 329 float avg_ratio_buffer_low_; 330 float avg_rate_mismatch_; 331 float avg_rate_mismatch_sgn_; 332 float avg_packet_loss_; 333 EncoderState encoder_state_; 334 ResolutionAction action_; 335 // Short history of the down-sampling actions from the Initialize() state. 336 // This is needed for going up in resolution. Since the total amount of 337 // down-sampling actions are constrained, the length of the list need not be 338 // large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample. 339 ResolutionAction down_action_history_[kDownActionHistorySize]; 340 int num_layers_; 341 }; 342 343 // Robustness settings class. 344 345 class VCMQmRobustness : public VCMQmMethod { 346 public: 347 VCMQmRobustness(); 348 ~VCMQmRobustness(); 349 350 virtual void Reset(); 351 352 // Adjust FEC rate based on content: every ~1 sec from SetTargetRates. 353 // Returns an adjustment factor. 354 float AdjustFecFactor(uint8_t code_rate_delta, 355 float total_rate, 356 float framerate, 357 uint32_t rtt_time, 358 uint8_t packet_loss); 359 360 // Set the UEP protection on/off. 361 bool SetUepProtection(uint8_t code_rate_delta, 362 float total_rate, 363 uint8_t packet_loss, 364 bool frame_type); 365 366 private: 367 // Previous state of network parameters. 368 float prev_total_rate_; 369 uint32_t prev_rtt_time_; 370 uint8_t prev_packet_loss_; 371 uint8_t prev_code_rate_delta_; 372 }; 373 } // namespace webrtc 374 #endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_ 375