1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/video_coding/qm_select.h"
12
13 #include <math.h>
14
15 #include "webrtc/modules/include/module_common_types.h"
16 #include "webrtc/modules/video_coding/include/video_coding_defines.h"
17 #include "webrtc/modules/video_coding/internal_defines.h"
18 #include "webrtc/modules/video_coding/qm_select_data.h"
19 #include "webrtc/system_wrappers/include/trace.h"
20
21 namespace webrtc {
22
23 // QM-METHOD class
24
VCMQmMethod()25 VCMQmMethod::VCMQmMethod()
26 : content_metrics_(NULL),
27 width_(0),
28 height_(0),
29 user_frame_rate_(0.0f),
30 native_width_(0),
31 native_height_(0),
32 native_frame_rate_(0.0f),
33 image_type_(kVGA),
34 framerate_level_(kFrameRateHigh),
35 init_(false) {
36 ResetQM();
37 }
38
~VCMQmMethod()39 VCMQmMethod::~VCMQmMethod() {}
40
ResetQM()41 void VCMQmMethod::ResetQM() {
42 aspect_ratio_ = 1.0f;
43 motion_.Reset();
44 spatial_.Reset();
45 content_class_ = 0;
46 }
47
ComputeContentClass()48 uint8_t VCMQmMethod::ComputeContentClass() {
49 ComputeMotionNFD();
50 ComputeSpatial();
51 return content_class_ = 3 * motion_.level + spatial_.level;
52 }
53
UpdateContent(const VideoContentMetrics * contentMetrics)54 void VCMQmMethod::UpdateContent(const VideoContentMetrics* contentMetrics) {
55 content_metrics_ = contentMetrics;
56 }
57
ComputeMotionNFD()58 void VCMQmMethod::ComputeMotionNFD() {
59 if (content_metrics_) {
60 motion_.value = content_metrics_->motion_magnitude;
61 }
62 // Determine motion level.
63 if (motion_.value < kLowMotionNfd) {
64 motion_.level = kLow;
65 } else if (motion_.value > kHighMotionNfd) {
66 motion_.level = kHigh;
67 } else {
68 motion_.level = kDefault;
69 }
70 }
71
ComputeSpatial()72 void VCMQmMethod::ComputeSpatial() {
73 float spatial_err = 0.0;
74 float spatial_err_h = 0.0;
75 float spatial_err_v = 0.0;
76 if (content_metrics_) {
77 spatial_err = content_metrics_->spatial_pred_err;
78 spatial_err_h = content_metrics_->spatial_pred_err_h;
79 spatial_err_v = content_metrics_->spatial_pred_err_v;
80 }
81 // Spatial measure: take average of 3 prediction errors.
82 spatial_.value = (spatial_err + spatial_err_h + spatial_err_v) / 3.0f;
83
84 // Reduce thresholds for large scenes/higher pixel correlation.
85 float scale2 = image_type_ > kVGA ? kScaleTexture : 1.0;
86
87 if (spatial_.value > scale2 * kHighTexture) {
88 spatial_.level = kHigh;
89 } else if (spatial_.value < scale2 * kLowTexture) {
90 spatial_.level = kLow;
91 } else {
92 spatial_.level = kDefault;
93 }
94 }
95
GetImageType(uint16_t width,uint16_t height)96 ImageType VCMQmMethod::GetImageType(uint16_t width, uint16_t height) {
97 // Get the image type for the encoder frame size.
98 uint32_t image_size = width * height;
99 if (image_size == kSizeOfImageType[kQCIF]) {
100 return kQCIF;
101 } else if (image_size == kSizeOfImageType[kHCIF]) {
102 return kHCIF;
103 } else if (image_size == kSizeOfImageType[kQVGA]) {
104 return kQVGA;
105 } else if (image_size == kSizeOfImageType[kCIF]) {
106 return kCIF;
107 } else if (image_size == kSizeOfImageType[kHVGA]) {
108 return kHVGA;
109 } else if (image_size == kSizeOfImageType[kVGA]) {
110 return kVGA;
111 } else if (image_size == kSizeOfImageType[kQFULLHD]) {
112 return kQFULLHD;
113 } else if (image_size == kSizeOfImageType[kWHD]) {
114 return kWHD;
115 } else if (image_size == kSizeOfImageType[kFULLHD]) {
116 return kFULLHD;
117 } else {
118 // No exact match, find closet one.
119 return FindClosestImageType(width, height);
120 }
121 }
122
FindClosestImageType(uint16_t width,uint16_t height)123 ImageType VCMQmMethod::FindClosestImageType(uint16_t width, uint16_t height) {
124 float size = static_cast<float>(width * height);
125 float min = size;
126 int isel = 0;
127 for (int i = 0; i < kNumImageTypes; ++i) {
128 float dist = fabs(size - kSizeOfImageType[i]);
129 if (dist < min) {
130 min = dist;
131 isel = i;
132 }
133 }
134 return static_cast<ImageType>(isel);
135 }
136
FrameRateLevel(float avg_framerate)137 FrameRateLevelClass VCMQmMethod::FrameRateLevel(float avg_framerate) {
138 if (avg_framerate <= kLowFrameRate) {
139 return kFrameRateLow;
140 } else if (avg_framerate <= kMiddleFrameRate) {
141 return kFrameRateMiddle1;
142 } else if (avg_framerate <= kHighFrameRate) {
143 return kFrameRateMiddle2;
144 } else {
145 return kFrameRateHigh;
146 }
147 }
148
149 // RESOLUTION CLASS
150
VCMQmResolution()151 VCMQmResolution::VCMQmResolution() : qm_(new VCMResolutionScale()) {
152 Reset();
153 }
154
~VCMQmResolution()155 VCMQmResolution::~VCMQmResolution() {
156 delete qm_;
157 }
158
ResetRates()159 void VCMQmResolution::ResetRates() {
160 sum_target_rate_ = 0.0f;
161 sum_incoming_framerate_ = 0.0f;
162 sum_rate_MM_ = 0.0f;
163 sum_rate_MM_sgn_ = 0.0f;
164 sum_packet_loss_ = 0.0f;
165 buffer_level_ = kInitBufferLevel * target_bitrate_;
166 frame_cnt_ = 0;
167 frame_cnt_delta_ = 0;
168 low_buffer_cnt_ = 0;
169 update_rate_cnt_ = 0;
170 }
171
ResetDownSamplingState()172 void VCMQmResolution::ResetDownSamplingState() {
173 state_dec_factor_spatial_ = 1.0;
174 state_dec_factor_temporal_ = 1.0;
175 for (int i = 0; i < kDownActionHistorySize; i++) {
176 down_action_history_[i].spatial = kNoChangeSpatial;
177 down_action_history_[i].temporal = kNoChangeTemporal;
178 }
179 }
180
Reset()181 void VCMQmResolution::Reset() {
182 target_bitrate_ = 0.0f;
183 incoming_framerate_ = 0.0f;
184 buffer_level_ = 0.0f;
185 per_frame_bandwidth_ = 0.0f;
186 avg_target_rate_ = 0.0f;
187 avg_incoming_framerate_ = 0.0f;
188 avg_ratio_buffer_low_ = 0.0f;
189 avg_rate_mismatch_ = 0.0f;
190 avg_rate_mismatch_sgn_ = 0.0f;
191 avg_packet_loss_ = 0.0f;
192 encoder_state_ = kStableEncoding;
193 num_layers_ = 1;
194 ResetRates();
195 ResetDownSamplingState();
196 ResetQM();
197 }
198
GetEncoderState()199 EncoderState VCMQmResolution::GetEncoderState() {
200 return encoder_state_;
201 }
202
203 // Initialize state after re-initializing the encoder,
204 // i.e., after SetEncodingData() in mediaOpt.
Initialize(float bitrate,float user_framerate,uint16_t width,uint16_t height,int num_layers)205 int VCMQmResolution::Initialize(float bitrate,
206 float user_framerate,
207 uint16_t width,
208 uint16_t height,
209 int num_layers) {
210 if (user_framerate == 0.0f || width == 0 || height == 0) {
211 return VCM_PARAMETER_ERROR;
212 }
213 Reset();
214 target_bitrate_ = bitrate;
215 incoming_framerate_ = user_framerate;
216 UpdateCodecParameters(user_framerate, width, height);
217 native_width_ = width;
218 native_height_ = height;
219 native_frame_rate_ = user_framerate;
220 num_layers_ = num_layers;
221 // Initial buffer level.
222 buffer_level_ = kInitBufferLevel * target_bitrate_;
223 // Per-frame bandwidth.
224 per_frame_bandwidth_ = target_bitrate_ / user_framerate;
225 init_ = true;
226 return VCM_OK;
227 }
228
UpdateCodecParameters(float frame_rate,uint16_t width,uint16_t height)229 void VCMQmResolution::UpdateCodecParameters(float frame_rate,
230 uint16_t width,
231 uint16_t height) {
232 width_ = width;
233 height_ = height;
234 // |user_frame_rate| is the target frame rate for VPM frame dropper.
235 user_frame_rate_ = frame_rate;
236 image_type_ = GetImageType(width, height);
237 }
238
239 // Update rate data after every encoded frame.
UpdateEncodedSize(size_t encoded_size)240 void VCMQmResolution::UpdateEncodedSize(size_t encoded_size) {
241 frame_cnt_++;
242 // Convert to Kbps.
243 float encoded_size_kbits = 8.0f * static_cast<float>(encoded_size) / 1000.0f;
244
245 // Update the buffer level:
246 // Note this is not the actual encoder buffer level.
247 // |buffer_level_| is reset to an initial value after SelectResolution is
248 // called, and does not account for frame dropping by encoder or VCM.
249 buffer_level_ += per_frame_bandwidth_ - encoded_size_kbits;
250
251 // Counter for occurrences of low buffer level:
252 // low/negative values means encoder is likely dropping frames.
253 if (buffer_level_ <= kPercBufferThr * kInitBufferLevel * target_bitrate_) {
254 low_buffer_cnt_++;
255 }
256 }
257
258 // Update various quantities after SetTargetRates in MediaOpt.
UpdateRates(float target_bitrate,float encoder_sent_rate,float incoming_framerate,uint8_t packet_loss)259 void VCMQmResolution::UpdateRates(float target_bitrate,
260 float encoder_sent_rate,
261 float incoming_framerate,
262 uint8_t packet_loss) {
263 // Sum the target bitrate: this is the encoder rate from previous update
264 // (~1sec), i.e, before the update for next ~1sec.
265 sum_target_rate_ += target_bitrate_;
266 update_rate_cnt_++;
267
268 // Sum the received (from RTCP reports) packet loss rates.
269 sum_packet_loss_ += static_cast<float>(packet_loss / 255.0);
270
271 // Sum the sequence rate mismatch:
272 // Mismatch here is based on the difference between the target rate
273 // used (in previous ~1sec) and the average actual encoding rate measured
274 // at previous ~1sec.
275 float diff = target_bitrate_ - encoder_sent_rate;
276 if (target_bitrate_ > 0.0)
277 sum_rate_MM_ += fabs(diff) / target_bitrate_;
278 int sgnDiff = diff > 0 ? 1 : (diff < 0 ? -1 : 0);
279 // To check for consistent under(+)/over_shooting(-) of target rate.
280 sum_rate_MM_sgn_ += sgnDiff;
281
282 // Update with the current new target and frame rate:
283 // these values are ones the encoder will use for the current/next ~1sec.
284 target_bitrate_ = target_bitrate;
285 incoming_framerate_ = incoming_framerate;
286 sum_incoming_framerate_ += incoming_framerate_;
287 // Update the per_frame_bandwidth:
288 // this is the per_frame_bw for the current/next ~1sec.
289 per_frame_bandwidth_ = 0.0f;
290 if (incoming_framerate_ > 0.0f) {
291 per_frame_bandwidth_ = target_bitrate_ / incoming_framerate_;
292 }
293 }
294
295 // Select the resolution factors: frame size and frame rate change (qm scales).
296 // Selection is for going down in resolution, or for going back up
297 // (if a previous down-sampling action was taken).
298
299 // In the current version the following constraints are imposed:
300 // 1) We only allow for one action, either down or up, at a given time.
301 // 2) The possible down-sampling actions are: spatial by 1/2x1/2, 3/4x3/4;
302 // temporal/frame rate reduction by 1/2 and 2/3.
303 // 3) The action for going back up is the reverse of last (spatial or temporal)
304 // down-sampling action. The list of down-sampling actions from the
305 // Initialize() state are kept in |down_action_history_|.
306 // 4) The total amount of down-sampling (spatial and/or temporal) from the
307 // Initialize() state (native resolution) is limited by various factors.
SelectResolution(VCMResolutionScale ** qm)308 int VCMQmResolution::SelectResolution(VCMResolutionScale** qm) {
309 if (!init_) {
310 return VCM_UNINITIALIZED;
311 }
312 if (content_metrics_ == NULL) {
313 Reset();
314 *qm = qm_;
315 return VCM_OK;
316 }
317
318 // Check conditions on down-sampling state.
319 assert(state_dec_factor_spatial_ >= 1.0f);
320 assert(state_dec_factor_temporal_ >= 1.0f);
321 assert(state_dec_factor_spatial_ <= kMaxSpatialDown);
322 assert(state_dec_factor_temporal_ <= kMaxTempDown);
323 assert(state_dec_factor_temporal_ * state_dec_factor_spatial_ <=
324 kMaxTotalDown);
325
326 // Compute content class for selection.
327 content_class_ = ComputeContentClass();
328 // Compute various rate quantities for selection.
329 ComputeRatesForSelection();
330
331 // Get the encoder state.
332 ComputeEncoderState();
333
334 // Default settings: no action.
335 SetDefaultAction();
336 *qm = qm_;
337
338 // Check for going back up in resolution, if we have had some down-sampling
339 // relative to native state in Initialize().
340 if (down_action_history_[0].spatial != kNoChangeSpatial ||
341 down_action_history_[0].temporal != kNoChangeTemporal) {
342 if (GoingUpResolution()) {
343 *qm = qm_;
344 return VCM_OK;
345 }
346 }
347
348 // Check for going down in resolution.
349 if (GoingDownResolution()) {
350 *qm = qm_;
351 return VCM_OK;
352 }
353 return VCM_OK;
354 }
355
SetDefaultAction()356 void VCMQmResolution::SetDefaultAction() {
357 qm_->codec_width = width_;
358 qm_->codec_height = height_;
359 qm_->frame_rate = user_frame_rate_;
360 qm_->change_resolution_spatial = false;
361 qm_->change_resolution_temporal = false;
362 qm_->spatial_width_fact = 1.0f;
363 qm_->spatial_height_fact = 1.0f;
364 qm_->temporal_fact = 1.0f;
365 action_.spatial = kNoChangeSpatial;
366 action_.temporal = kNoChangeTemporal;
367 }
368
ComputeRatesForSelection()369 void VCMQmResolution::ComputeRatesForSelection() {
370 avg_target_rate_ = 0.0f;
371 avg_incoming_framerate_ = 0.0f;
372 avg_ratio_buffer_low_ = 0.0f;
373 avg_rate_mismatch_ = 0.0f;
374 avg_rate_mismatch_sgn_ = 0.0f;
375 avg_packet_loss_ = 0.0f;
376 if (frame_cnt_ > 0) {
377 avg_ratio_buffer_low_ =
378 static_cast<float>(low_buffer_cnt_) / static_cast<float>(frame_cnt_);
379 }
380 if (update_rate_cnt_ > 0) {
381 avg_rate_mismatch_ =
382 static_cast<float>(sum_rate_MM_) / static_cast<float>(update_rate_cnt_);
383 avg_rate_mismatch_sgn_ = static_cast<float>(sum_rate_MM_sgn_) /
384 static_cast<float>(update_rate_cnt_);
385 avg_target_rate_ = static_cast<float>(sum_target_rate_) /
386 static_cast<float>(update_rate_cnt_);
387 avg_incoming_framerate_ = static_cast<float>(sum_incoming_framerate_) /
388 static_cast<float>(update_rate_cnt_);
389 avg_packet_loss_ = static_cast<float>(sum_packet_loss_) /
390 static_cast<float>(update_rate_cnt_);
391 }
392 // For selection we may want to weight some quantities more heavily
393 // with the current (i.e., next ~1sec) rate values.
394 avg_target_rate_ =
395 kWeightRate * avg_target_rate_ + (1.0 - kWeightRate) * target_bitrate_;
396 avg_incoming_framerate_ = kWeightRate * avg_incoming_framerate_ +
397 (1.0 - kWeightRate) * incoming_framerate_;
398 // Use base layer frame rate for temporal layers: this will favor spatial.
399 assert(num_layers_ > 0);
400 framerate_level_ = FrameRateLevel(avg_incoming_framerate_ /
401 static_cast<float>(1 << (num_layers_ - 1)));
402 }
403
ComputeEncoderState()404 void VCMQmResolution::ComputeEncoderState() {
405 // Default.
406 encoder_state_ = kStableEncoding;
407
408 // Assign stressed state if:
409 // 1) occurrences of low buffer levels is high, or
410 // 2) rate mis-match is high, and consistent over-shooting by encoder.
411 if ((avg_ratio_buffer_low_ > kMaxBufferLow) ||
412 ((avg_rate_mismatch_ > kMaxRateMisMatch) &&
413 (avg_rate_mismatch_sgn_ < -kRateOverShoot))) {
414 encoder_state_ = kStressedEncoding;
415 }
416 // Assign easy state if:
417 // 1) rate mis-match is high, and
418 // 2) consistent under-shooting by encoder.
419 if ((avg_rate_mismatch_ > kMaxRateMisMatch) &&
420 (avg_rate_mismatch_sgn_ > kRateUnderShoot)) {
421 encoder_state_ = kEasyEncoding;
422 }
423 }
424
GoingUpResolution()425 bool VCMQmResolution::GoingUpResolution() {
426 // For going up, we check for undoing the previous down-sampling action.
427
428 float fac_width = kFactorWidthSpatial[down_action_history_[0].spatial];
429 float fac_height = kFactorHeightSpatial[down_action_history_[0].spatial];
430 float fac_temp = kFactorTemporal[down_action_history_[0].temporal];
431 // For going up spatially, we allow for going up by 3/4x3/4 at each stage.
432 // So if the last spatial action was 1/2x1/2 it would be undone in 2 stages.
433 // Modify the fac_width/height for this case.
434 if (down_action_history_[0].spatial == kOneQuarterSpatialUniform) {
435 fac_width = kFactorWidthSpatial[kOneQuarterSpatialUniform] /
436 kFactorWidthSpatial[kOneHalfSpatialUniform];
437 fac_height = kFactorHeightSpatial[kOneQuarterSpatialUniform] /
438 kFactorHeightSpatial[kOneHalfSpatialUniform];
439 }
440
441 // Check if we should go up both spatially and temporally.
442 if (down_action_history_[0].spatial != kNoChangeSpatial &&
443 down_action_history_[0].temporal != kNoChangeTemporal) {
444 if (ConditionForGoingUp(fac_width, fac_height, fac_temp,
445 kTransRateScaleUpSpatialTemp)) {
446 action_.spatial = down_action_history_[0].spatial;
447 action_.temporal = down_action_history_[0].temporal;
448 UpdateDownsamplingState(kUpResolution);
449 return true;
450 }
451 }
452 // Check if we should go up either spatially or temporally.
453 bool selected_up_spatial = false;
454 bool selected_up_temporal = false;
455 if (down_action_history_[0].spatial != kNoChangeSpatial) {
456 selected_up_spatial = ConditionForGoingUp(fac_width, fac_height, 1.0f,
457 kTransRateScaleUpSpatial);
458 }
459 if (down_action_history_[0].temporal != kNoChangeTemporal) {
460 selected_up_temporal =
461 ConditionForGoingUp(1.0f, 1.0f, fac_temp, kTransRateScaleUpTemp);
462 }
463 if (selected_up_spatial && !selected_up_temporal) {
464 action_.spatial = down_action_history_[0].spatial;
465 action_.temporal = kNoChangeTemporal;
466 UpdateDownsamplingState(kUpResolution);
467 return true;
468 } else if (!selected_up_spatial && selected_up_temporal) {
469 action_.spatial = kNoChangeSpatial;
470 action_.temporal = down_action_history_[0].temporal;
471 UpdateDownsamplingState(kUpResolution);
472 return true;
473 } else if (selected_up_spatial && selected_up_temporal) {
474 PickSpatialOrTemporal();
475 UpdateDownsamplingState(kUpResolution);
476 return true;
477 }
478 return false;
479 }
480
ConditionForGoingUp(float fac_width,float fac_height,float fac_temp,float scale_fac)481 bool VCMQmResolution::ConditionForGoingUp(float fac_width,
482 float fac_height,
483 float fac_temp,
484 float scale_fac) {
485 float estimated_transition_rate_up =
486 GetTransitionRate(fac_width, fac_height, fac_temp, scale_fac);
487 // Go back up if:
488 // 1) target rate is above threshold and current encoder state is stable, or
489 // 2) encoder state is easy (encoder is significantly under-shooting target).
490 if (((avg_target_rate_ > estimated_transition_rate_up) &&
491 (encoder_state_ == kStableEncoding)) ||
492 (encoder_state_ == kEasyEncoding)) {
493 return true;
494 } else {
495 return false;
496 }
497 }
498
GoingDownResolution()499 bool VCMQmResolution::GoingDownResolution() {
500 float estimated_transition_rate_down =
501 GetTransitionRate(1.0f, 1.0f, 1.0f, 1.0f);
502 float max_rate = kFrameRateFac[framerate_level_] * kMaxRateQm[image_type_];
503 // Resolution reduction if:
504 // (1) target rate is below transition rate, or
505 // (2) encoder is in stressed state and target rate below a max threshold.
506 if ((avg_target_rate_ < estimated_transition_rate_down) ||
507 (encoder_state_ == kStressedEncoding && avg_target_rate_ < max_rate)) {
508 // Get the down-sampling action: based on content class, and how low
509 // average target rate is relative to transition rate.
510 uint8_t spatial_fact =
511 kSpatialAction[content_class_ +
512 9 * RateClass(estimated_transition_rate_down)];
513 uint8_t temp_fact =
514 kTemporalAction[content_class_ +
515 9 * RateClass(estimated_transition_rate_down)];
516
517 switch (spatial_fact) {
518 case 4: {
519 action_.spatial = kOneQuarterSpatialUniform;
520 break;
521 }
522 case 2: {
523 action_.spatial = kOneHalfSpatialUniform;
524 break;
525 }
526 case 1: {
527 action_.spatial = kNoChangeSpatial;
528 break;
529 }
530 default: { assert(false); }
531 }
532 switch (temp_fact) {
533 case 3: {
534 action_.temporal = kTwoThirdsTemporal;
535 break;
536 }
537 case 2: {
538 action_.temporal = kOneHalfTemporal;
539 break;
540 }
541 case 1: {
542 action_.temporal = kNoChangeTemporal;
543 break;
544 }
545 default: { assert(false); }
546 }
547 // Only allow for one action (spatial or temporal) at a given time.
548 assert(action_.temporal == kNoChangeTemporal ||
549 action_.spatial == kNoChangeSpatial);
550
551 // Adjust cases not captured in tables, mainly based on frame rate, and
552 // also check for odd frame sizes.
553 AdjustAction();
554
555 // Update down-sampling state.
556 if (action_.spatial != kNoChangeSpatial ||
557 action_.temporal != kNoChangeTemporal) {
558 UpdateDownsamplingState(kDownResolution);
559 return true;
560 }
561 }
562 return false;
563 }
564
GetTransitionRate(float fac_width,float fac_height,float fac_temp,float scale_fac)565 float VCMQmResolution::GetTransitionRate(float fac_width,
566 float fac_height,
567 float fac_temp,
568 float scale_fac) {
569 ImageType image_type =
570 GetImageType(static_cast<uint16_t>(fac_width * width_),
571 static_cast<uint16_t>(fac_height * height_));
572
573 FrameRateLevelClass framerate_level =
574 FrameRateLevel(fac_temp * avg_incoming_framerate_);
575 // If we are checking for going up temporally, and this is the last
576 // temporal action, then use native frame rate.
577 if (down_action_history_[1].temporal == kNoChangeTemporal &&
578 fac_temp > 1.0f) {
579 framerate_level = FrameRateLevel(native_frame_rate_);
580 }
581
582 // The maximum allowed rate below which down-sampling is allowed:
583 // Nominal values based on image format (frame size and frame rate).
584 float max_rate = kFrameRateFac[framerate_level] * kMaxRateQm[image_type];
585
586 uint8_t image_class = image_type > kVGA ? 1 : 0;
587 uint8_t table_index = image_class * 9 + content_class_;
588 // Scale factor for down-sampling transition threshold:
589 // factor based on the content class and the image size.
590 float scaleTransRate = kScaleTransRateQm[table_index];
591 // Threshold bitrate for resolution action.
592 return static_cast<float>(scale_fac * scaleTransRate * max_rate);
593 }
594
UpdateDownsamplingState(UpDownAction up_down)595 void VCMQmResolution::UpdateDownsamplingState(UpDownAction up_down) {
596 if (up_down == kUpResolution) {
597 qm_->spatial_width_fact = 1.0f / kFactorWidthSpatial[action_.spatial];
598 qm_->spatial_height_fact = 1.0f / kFactorHeightSpatial[action_.spatial];
599 // If last spatial action was 1/2x1/2, we undo it in two steps, so the
600 // spatial scale factor in this first step is modified as (4.0/3.0 / 2.0).
601 if (action_.spatial == kOneQuarterSpatialUniform) {
602 qm_->spatial_width_fact = 1.0f *
603 kFactorWidthSpatial[kOneHalfSpatialUniform] /
604 kFactorWidthSpatial[kOneQuarterSpatialUniform];
605 qm_->spatial_height_fact =
606 1.0f * kFactorHeightSpatial[kOneHalfSpatialUniform] /
607 kFactorHeightSpatial[kOneQuarterSpatialUniform];
608 }
609 qm_->temporal_fact = 1.0f / kFactorTemporal[action_.temporal];
610 RemoveLastDownAction();
611 } else if (up_down == kDownResolution) {
612 ConstrainAmountOfDownSampling();
613 ConvertSpatialFractionalToWhole();
614 qm_->spatial_width_fact = kFactorWidthSpatial[action_.spatial];
615 qm_->spatial_height_fact = kFactorHeightSpatial[action_.spatial];
616 qm_->temporal_fact = kFactorTemporal[action_.temporal];
617 InsertLatestDownAction();
618 } else {
619 // This function should only be called if either the Up or Down action
620 // has been selected.
621 assert(false);
622 }
623 UpdateCodecResolution();
624 state_dec_factor_spatial_ = state_dec_factor_spatial_ *
625 qm_->spatial_width_fact *
626 qm_->spatial_height_fact;
627 state_dec_factor_temporal_ = state_dec_factor_temporal_ * qm_->temporal_fact;
628 }
629
UpdateCodecResolution()630 void VCMQmResolution::UpdateCodecResolution() {
631 if (action_.spatial != kNoChangeSpatial) {
632 qm_->change_resolution_spatial = true;
633 qm_->codec_width =
634 static_cast<uint16_t>(width_ / qm_->spatial_width_fact + 0.5f);
635 qm_->codec_height =
636 static_cast<uint16_t>(height_ / qm_->spatial_height_fact + 0.5f);
637 // Size should not exceed native sizes.
638 assert(qm_->codec_width <= native_width_);
639 assert(qm_->codec_height <= native_height_);
640 // New sizes should be multiple of 2, otherwise spatial should not have
641 // been selected.
642 assert(qm_->codec_width % 2 == 0);
643 assert(qm_->codec_height % 2 == 0);
644 }
645 if (action_.temporal != kNoChangeTemporal) {
646 qm_->change_resolution_temporal = true;
647 // Update the frame rate based on the average incoming frame rate.
648 qm_->frame_rate = avg_incoming_framerate_ / qm_->temporal_fact + 0.5f;
649 if (down_action_history_[0].temporal == 0) {
650 // When we undo the last temporal-down action, make sure we go back up
651 // to the native frame rate. Since the incoming frame rate may
652 // fluctuate over time, |avg_incoming_framerate_| scaled back up may
653 // be smaller than |native_frame rate_|.
654 qm_->frame_rate = native_frame_rate_;
655 }
656 }
657 }
658
RateClass(float transition_rate)659 uint8_t VCMQmResolution::RateClass(float transition_rate) {
660 return avg_target_rate_ < (kFacLowRate * transition_rate)
661 ? 0
662 : (avg_target_rate_ >= transition_rate ? 2 : 1);
663 }
664
665 // TODO(marpan): Would be better to capture these frame rate adjustments by
666 // extending the table data (qm_select_data.h).
AdjustAction()667 void VCMQmResolution::AdjustAction() {
668 // If the spatial level is default state (neither low or high), motion level
669 // is not high, and spatial action was selected, switch to 2/3 frame rate
670 // reduction if the average incoming frame rate is high.
671 if (spatial_.level == kDefault && motion_.level != kHigh &&
672 action_.spatial != kNoChangeSpatial &&
673 framerate_level_ == kFrameRateHigh) {
674 action_.spatial = kNoChangeSpatial;
675 action_.temporal = kTwoThirdsTemporal;
676 }
677 // If both motion and spatial level are low, and temporal down action was
678 // selected, switch to spatial 3/4x3/4 if the frame rate is not above the
679 // lower middle level (|kFrameRateMiddle1|).
680 if (motion_.level == kLow && spatial_.level == kLow &&
681 framerate_level_ <= kFrameRateMiddle1 &&
682 action_.temporal != kNoChangeTemporal) {
683 action_.spatial = kOneHalfSpatialUniform;
684 action_.temporal = kNoChangeTemporal;
685 }
686 // If spatial action is selected, and there has been too much spatial
687 // reduction already (i.e., 1/4), then switch to temporal action if the
688 // average frame rate is not low.
689 if (action_.spatial != kNoChangeSpatial &&
690 down_action_history_[0].spatial == kOneQuarterSpatialUniform &&
691 framerate_level_ != kFrameRateLow) {
692 action_.spatial = kNoChangeSpatial;
693 action_.temporal = kTwoThirdsTemporal;
694 }
695 // Never use temporal action if number of temporal layers is above 2.
696 if (num_layers_ > 2) {
697 if (action_.temporal != kNoChangeTemporal) {
698 action_.spatial = kOneHalfSpatialUniform;
699 }
700 action_.temporal = kNoChangeTemporal;
701 }
702 // If spatial action was selected, we need to make sure the frame sizes
703 // are multiples of two. Otherwise switch to 2/3 temporal.
704 if (action_.spatial != kNoChangeSpatial && !EvenFrameSize()) {
705 action_.spatial = kNoChangeSpatial;
706 // Only one action (spatial or temporal) is allowed at a given time, so need
707 // to check whether temporal action is currently selected.
708 action_.temporal = kTwoThirdsTemporal;
709 }
710 }
711
ConvertSpatialFractionalToWhole()712 void VCMQmResolution::ConvertSpatialFractionalToWhole() {
713 // If 3/4 spatial is selected, check if there has been another 3/4,
714 // and if so, combine them into 1/2. 1/2 scaling is more efficient than 9/16.
715 // Note we define 3/4x3/4 spatial as kOneHalfSpatialUniform.
716 if (action_.spatial == kOneHalfSpatialUniform) {
717 bool found = false;
718 int isel = kDownActionHistorySize;
719 for (int i = 0; i < kDownActionHistorySize; ++i) {
720 if (down_action_history_[i].spatial == kOneHalfSpatialUniform) {
721 isel = i;
722 found = true;
723 break;
724 }
725 }
726 if (found) {
727 action_.spatial = kOneQuarterSpatialUniform;
728 state_dec_factor_spatial_ =
729 state_dec_factor_spatial_ /
730 (kFactorWidthSpatial[kOneHalfSpatialUniform] *
731 kFactorHeightSpatial[kOneHalfSpatialUniform]);
732 // Check if switching to 1/2x1/2 (=1/4) spatial is allowed.
733 ConstrainAmountOfDownSampling();
734 if (action_.spatial == kNoChangeSpatial) {
735 // Not allowed. Go back to 3/4x3/4 spatial.
736 action_.spatial = kOneHalfSpatialUniform;
737 state_dec_factor_spatial_ =
738 state_dec_factor_spatial_ *
739 kFactorWidthSpatial[kOneHalfSpatialUniform] *
740 kFactorHeightSpatial[kOneHalfSpatialUniform];
741 } else {
742 // Switching is allowed. Remove 3/4x3/4 from the history, and update
743 // the frame size.
744 for (int i = isel; i < kDownActionHistorySize - 1; ++i) {
745 down_action_history_[i].spatial = down_action_history_[i + 1].spatial;
746 }
747 width_ = width_ * kFactorWidthSpatial[kOneHalfSpatialUniform];
748 height_ = height_ * kFactorHeightSpatial[kOneHalfSpatialUniform];
749 }
750 }
751 }
752 }
753
754 // Returns false if the new frame sizes, under the current spatial action,
755 // are not multiples of two.
EvenFrameSize()756 bool VCMQmResolution::EvenFrameSize() {
757 if (action_.spatial == kOneHalfSpatialUniform) {
758 if ((width_ * 3 / 4) % 2 != 0 || (height_ * 3 / 4) % 2 != 0) {
759 return false;
760 }
761 } else if (action_.spatial == kOneQuarterSpatialUniform) {
762 if ((width_ * 1 / 2) % 2 != 0 || (height_ * 1 / 2) % 2 != 0) {
763 return false;
764 }
765 }
766 return true;
767 }
768
InsertLatestDownAction()769 void VCMQmResolution::InsertLatestDownAction() {
770 if (action_.spatial != kNoChangeSpatial) {
771 for (int i = kDownActionHistorySize - 1; i > 0; --i) {
772 down_action_history_[i].spatial = down_action_history_[i - 1].spatial;
773 }
774 down_action_history_[0].spatial = action_.spatial;
775 }
776 if (action_.temporal != kNoChangeTemporal) {
777 for (int i = kDownActionHistorySize - 1; i > 0; --i) {
778 down_action_history_[i].temporal = down_action_history_[i - 1].temporal;
779 }
780 down_action_history_[0].temporal = action_.temporal;
781 }
782 }
783
RemoveLastDownAction()784 void VCMQmResolution::RemoveLastDownAction() {
785 if (action_.spatial != kNoChangeSpatial) {
786 // If the last spatial action was 1/2x1/2 we replace it with 3/4x3/4.
787 if (action_.spatial == kOneQuarterSpatialUniform) {
788 down_action_history_[0].spatial = kOneHalfSpatialUniform;
789 } else {
790 for (int i = 0; i < kDownActionHistorySize - 1; ++i) {
791 down_action_history_[i].spatial = down_action_history_[i + 1].spatial;
792 }
793 down_action_history_[kDownActionHistorySize - 1].spatial =
794 kNoChangeSpatial;
795 }
796 }
797 if (action_.temporal != kNoChangeTemporal) {
798 for (int i = 0; i < kDownActionHistorySize - 1; ++i) {
799 down_action_history_[i].temporal = down_action_history_[i + 1].temporal;
800 }
801 down_action_history_[kDownActionHistorySize - 1].temporal =
802 kNoChangeTemporal;
803 }
804 }
805
ConstrainAmountOfDownSampling()806 void VCMQmResolution::ConstrainAmountOfDownSampling() {
807 // Sanity checks on down-sampling selection:
808 // override the settings for too small image size and/or frame rate.
809 // Also check the limit on current down-sampling states.
810
811 float spatial_width_fact = kFactorWidthSpatial[action_.spatial];
812 float spatial_height_fact = kFactorHeightSpatial[action_.spatial];
813 float temporal_fact = kFactorTemporal[action_.temporal];
814 float new_dec_factor_spatial =
815 state_dec_factor_spatial_ * spatial_width_fact * spatial_height_fact;
816 float new_dec_factor_temp = state_dec_factor_temporal_ * temporal_fact;
817
818 // No spatial sampling if current frame size is too small, or if the
819 // amount of spatial down-sampling is above maximum spatial down-action.
820 if ((width_ * height_) <= kMinImageSize ||
821 new_dec_factor_spatial > kMaxSpatialDown) {
822 action_.spatial = kNoChangeSpatial;
823 new_dec_factor_spatial = state_dec_factor_spatial_;
824 }
825 // No frame rate reduction if average frame rate is below some point, or if
826 // the amount of temporal down-sampling is above maximum temporal down-action.
827 if (avg_incoming_framerate_ <= kMinFrameRate ||
828 new_dec_factor_temp > kMaxTempDown) {
829 action_.temporal = kNoChangeTemporal;
830 new_dec_factor_temp = state_dec_factor_temporal_;
831 }
832 // Check if the total (spatial-temporal) down-action is above maximum allowed,
833 // if so, disallow the current selected down-action.
834 if (new_dec_factor_spatial * new_dec_factor_temp > kMaxTotalDown) {
835 if (action_.spatial != kNoChangeSpatial) {
836 action_.spatial = kNoChangeSpatial;
837 } else if (action_.temporal != kNoChangeTemporal) {
838 action_.temporal = kNoChangeTemporal;
839 } else {
840 // We only allow for one action (spatial or temporal) at a given time, so
841 // either spatial or temporal action is selected when this function is
842 // called. If the selected action is disallowed from one of the above
843 // 2 prior conditions (on spatial & temporal max down-action), then this
844 // condition "total down-action > |kMaxTotalDown|" would not be entered.
845 assert(false);
846 }
847 }
848 }
849
PickSpatialOrTemporal()850 void VCMQmResolution::PickSpatialOrTemporal() {
851 // Pick the one that has had the most down-sampling thus far.
852 if (state_dec_factor_spatial_ > state_dec_factor_temporal_) {
853 action_.spatial = down_action_history_[0].spatial;
854 action_.temporal = kNoChangeTemporal;
855 } else {
856 action_.spatial = kNoChangeSpatial;
857 action_.temporal = down_action_history_[0].temporal;
858 }
859 }
860
861 // TODO(marpan): Update when we allow for directional spatial down-sampling.
SelectSpatialDirectionMode(float transition_rate)862 void VCMQmResolution::SelectSpatialDirectionMode(float transition_rate) {
863 // Default is 4/3x4/3
864 // For bit rates well below transitional rate, we select 2x2.
865 if (avg_target_rate_ < transition_rate * kRateRedSpatial2X2) {
866 qm_->spatial_width_fact = 2.0f;
867 qm_->spatial_height_fact = 2.0f;
868 }
869 // Otherwise check prediction errors and aspect ratio.
870 float spatial_err = 0.0f;
871 float spatial_err_h = 0.0f;
872 float spatial_err_v = 0.0f;
873 if (content_metrics_) {
874 spatial_err = content_metrics_->spatial_pred_err;
875 spatial_err_h = content_metrics_->spatial_pred_err_h;
876 spatial_err_v = content_metrics_->spatial_pred_err_v;
877 }
878
879 // Favor 1x2 if aspect_ratio is 16:9.
880 if (aspect_ratio_ >= 16.0f / 9.0f) {
881 // Check if 1x2 has lowest prediction error.
882 if (spatial_err_h < spatial_err && spatial_err_h < spatial_err_v) {
883 qm_->spatial_width_fact = 2.0f;
884 qm_->spatial_height_fact = 1.0f;
885 }
886 }
887 // Check for 4/3x4/3 selection: favor 2x2 over 1x2 and 2x1.
888 if (spatial_err < spatial_err_h * (1.0f + kSpatialErr2x2VsHoriz) &&
889 spatial_err < spatial_err_v * (1.0f + kSpatialErr2X2VsVert)) {
890 qm_->spatial_width_fact = 4.0f / 3.0f;
891 qm_->spatial_height_fact = 4.0f / 3.0f;
892 }
893 // Check for 2x1 selection.
894 if (spatial_err_v < spatial_err_h * (1.0f - kSpatialErrVertVsHoriz) &&
895 spatial_err_v < spatial_err * (1.0f - kSpatialErr2X2VsVert)) {
896 qm_->spatial_width_fact = 1.0f;
897 qm_->spatial_height_fact = 2.0f;
898 }
899 }
900
901 // ROBUSTNESS CLASS
902
VCMQmRobustness()903 VCMQmRobustness::VCMQmRobustness() {
904 Reset();
905 }
906
~VCMQmRobustness()907 VCMQmRobustness::~VCMQmRobustness() {}
908
Reset()909 void VCMQmRobustness::Reset() {
910 prev_total_rate_ = 0.0f;
911 prev_rtt_time_ = 0;
912 prev_packet_loss_ = 0;
913 prev_code_rate_delta_ = 0;
914 ResetQM();
915 }
916
917 // Adjust the FEC rate based on the content and the network state
918 // (packet loss rate, total rate/bandwidth, round trip time).
919 // Note that packetLoss here is the filtered loss value.
AdjustFecFactor(uint8_t code_rate_delta,float total_rate,float framerate,int64_t rtt_time,uint8_t packet_loss)920 float VCMQmRobustness::AdjustFecFactor(uint8_t code_rate_delta,
921 float total_rate,
922 float framerate,
923 int64_t rtt_time,
924 uint8_t packet_loss) {
925 // Default: no adjustment
926 float adjust_fec = 1.0f;
927 if (content_metrics_ == NULL) {
928 return adjust_fec;
929 }
930 // Compute class state of the content.
931 ComputeMotionNFD();
932 ComputeSpatial();
933
934 // TODO(marpan): Set FEC adjustment factor.
935
936 // Keep track of previous values of network state:
937 // adjustment may be also based on pattern of changes in network state.
938 prev_total_rate_ = total_rate;
939 prev_rtt_time_ = rtt_time;
940 prev_packet_loss_ = packet_loss;
941 prev_code_rate_delta_ = code_rate_delta;
942 return adjust_fec;
943 }
944
945 // Set the UEP (unequal-protection across packets) on/off for the FEC.
SetUepProtection(uint8_t code_rate_delta,float total_rate,uint8_t packet_loss,bool frame_type)946 bool VCMQmRobustness::SetUepProtection(uint8_t code_rate_delta,
947 float total_rate,
948 uint8_t packet_loss,
949 bool frame_type) {
950 // Default.
951 return false;
952 }
953 } // namespace webrtc
954