1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/video_coding/codecs/vp8/vp8_impl.h"
12
13 #include <stdlib.h>
14 #include <string.h>
15 #include <time.h>
16 #include <algorithm>
17
18 // NOTE(ajm): Path provided by gyp.
19 #include "libyuv/scale.h" // NOLINT
20 #include "libyuv/convert.h" // NOLINT
21
22 #include "webrtc/base/checks.h"
23 #include "webrtc/base/trace_event.h"
24 #include "webrtc/common.h"
25 #include "webrtc/common_types.h"
26 #include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
27 #include "webrtc/modules/include/module_common_types.h"
28 #include "webrtc/modules/video_coding/include/video_codec_interface.h"
29 #include "webrtc/modules/video_coding/codecs/vp8/include/vp8_common_types.h"
30 #include "webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h"
31 #include "webrtc/modules/video_coding/codecs/vp8/temporal_layers.h"
32 #include "webrtc/system_wrappers/include/tick_util.h"
33
34 namespace webrtc {
35 namespace {
36
37 enum { kVp8ErrorPropagationTh = 30 };
38 enum { kVp832ByteAlign = 32 };
39
40 // VP8 denoiser states.
41 enum denoiserState {
42 kDenoiserOff,
43 kDenoiserOnYOnly,
44 kDenoiserOnYUV,
45 kDenoiserOnYUVAggressive,
46 // Adaptive mode defaults to kDenoiserOnYUV on key frame, but may switch
47 // to kDenoiserOnYUVAggressive based on a computed noise metric.
48 kDenoiserOnAdaptive
49 };
50
51 // Greatest common divisior
GCD(int a,int b)52 int GCD(int a, int b) {
53 int c = a % b;
54 while (c != 0) {
55 a = b;
56 b = c;
57 c = a % b;
58 }
59 return b;
60 }
61
GetStreamBitratesKbps(const VideoCodec & codec,int bitrate_to_allocate_kbps)62 std::vector<int> GetStreamBitratesKbps(const VideoCodec& codec,
63 int bitrate_to_allocate_kbps) {
64 if (codec.numberOfSimulcastStreams <= 1) {
65 return std::vector<int>(1, bitrate_to_allocate_kbps);
66 }
67
68 std::vector<int> bitrates_kbps(codec.numberOfSimulcastStreams);
69 // Allocate min -> target bitrates as long as we have bitrate to spend.
70 size_t last_active_stream = 0;
71 for (size_t i = 0; i < static_cast<size_t>(codec.numberOfSimulcastStreams) &&
72 bitrate_to_allocate_kbps >=
73 static_cast<int>(codec.simulcastStream[i].minBitrate);
74 ++i) {
75 last_active_stream = i;
76 int allocated_bitrate_kbps =
77 std::min(static_cast<int>(codec.simulcastStream[i].targetBitrate),
78 bitrate_to_allocate_kbps);
79 bitrates_kbps[i] = allocated_bitrate_kbps;
80 bitrate_to_allocate_kbps -= allocated_bitrate_kbps;
81 }
82
83 // Spend additional bits on the highest-quality active layer, up to max
84 // bitrate.
85 // TODO(pbos): Consider spending additional bits on last_active_stream-1 down
86 // to 0 and not just the top layer when we have additional bitrate to spend.
87 int allocated_bitrate_kbps = std::min(
88 static_cast<int>(codec.simulcastStream[last_active_stream].maxBitrate -
89 bitrates_kbps[last_active_stream]),
90 bitrate_to_allocate_kbps);
91 bitrates_kbps[last_active_stream] += allocated_bitrate_kbps;
92 bitrate_to_allocate_kbps -= allocated_bitrate_kbps;
93
94 // Make sure we can always send something. Suspending below min bitrate is
95 // controlled outside the codec implementation and is not overriden by this.
96 if (bitrates_kbps[0] < static_cast<int>(codec.simulcastStream[0].minBitrate))
97 bitrates_kbps[0] = static_cast<int>(codec.simulcastStream[0].minBitrate);
98
99 return bitrates_kbps;
100 }
101
SumStreamMaxBitrate(int streams,const VideoCodec & codec)102 uint32_t SumStreamMaxBitrate(int streams, const VideoCodec& codec) {
103 uint32_t bitrate_sum = 0;
104 for (int i = 0; i < streams; ++i) {
105 bitrate_sum += codec.simulcastStream[i].maxBitrate;
106 }
107 return bitrate_sum;
108 }
109
NumberOfStreams(const VideoCodec & codec)110 int NumberOfStreams(const VideoCodec& codec) {
111 int streams =
112 codec.numberOfSimulcastStreams < 1 ? 1 : codec.numberOfSimulcastStreams;
113 uint32_t simulcast_max_bitrate = SumStreamMaxBitrate(streams, codec);
114 if (simulcast_max_bitrate == 0) {
115 streams = 1;
116 }
117 return streams;
118 }
119
ValidSimulcastResolutions(const VideoCodec & codec,int num_streams)120 bool ValidSimulcastResolutions(const VideoCodec& codec, int num_streams) {
121 if (codec.width != codec.simulcastStream[num_streams - 1].width ||
122 codec.height != codec.simulcastStream[num_streams - 1].height) {
123 return false;
124 }
125 for (int i = 0; i < num_streams; ++i) {
126 if (codec.width * codec.simulcastStream[i].height !=
127 codec.height * codec.simulcastStream[i].width) {
128 return false;
129 }
130 }
131 return true;
132 }
133
NumStreamsDisabled(const std::vector<bool> & streams)134 int NumStreamsDisabled(const std::vector<bool>& streams) {
135 int num_disabled = 0;
136 for (bool stream : streams) {
137 if (!stream)
138 ++num_disabled;
139 }
140 return num_disabled;
141 }
142 } // namespace
143
144 const float kTl1MaxTimeToDropFrames = 20.0f;
145
VP8EncoderImpl()146 VP8EncoderImpl::VP8EncoderImpl()
147 : encoded_complete_callback_(NULL),
148 inited_(false),
149 timestamp_(0),
150 feedback_mode_(false),
151 qp_max_(56), // Setting for max quantizer.
152 cpu_speed_default_(-6),
153 rc_max_intra_target_(0),
154 token_partitions_(VP8_ONE_TOKENPARTITION),
155 down_scale_requested_(false),
156 down_scale_bitrate_(0),
157 tl0_frame_dropper_(),
158 tl1_frame_dropper_(kTl1MaxTimeToDropFrames),
159 key_frame_request_(kMaxSimulcastStreams, false),
160 quality_scaler_enabled_(false) {
161 uint32_t seed = static_cast<uint32_t>(TickTime::MillisecondTimestamp());
162 srand(seed);
163
164 picture_id_.reserve(kMaxSimulcastStreams);
165 last_key_frame_picture_id_.reserve(kMaxSimulcastStreams);
166 temporal_layers_.reserve(kMaxSimulcastStreams);
167 raw_images_.reserve(kMaxSimulcastStreams);
168 encoded_images_.reserve(kMaxSimulcastStreams);
169 send_stream_.reserve(kMaxSimulcastStreams);
170 cpu_speed_.assign(kMaxSimulcastStreams, -6); // Set default to -6.
171 encoders_.reserve(kMaxSimulcastStreams);
172 configurations_.reserve(kMaxSimulcastStreams);
173 downsampling_factors_.reserve(kMaxSimulcastStreams);
174 }
175
~VP8EncoderImpl()176 VP8EncoderImpl::~VP8EncoderImpl() {
177 Release();
178 }
179
Release()180 int VP8EncoderImpl::Release() {
181 int ret_val = WEBRTC_VIDEO_CODEC_OK;
182
183 while (!encoded_images_.empty()) {
184 EncodedImage& image = encoded_images_.back();
185 delete[] image._buffer;
186 encoded_images_.pop_back();
187 }
188 while (!encoders_.empty()) {
189 vpx_codec_ctx_t& encoder = encoders_.back();
190 if (vpx_codec_destroy(&encoder)) {
191 ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
192 }
193 encoders_.pop_back();
194 }
195 configurations_.clear();
196 send_stream_.clear();
197 cpu_speed_.clear();
198 while (!raw_images_.empty()) {
199 vpx_img_free(&raw_images_.back());
200 raw_images_.pop_back();
201 }
202 while (!temporal_layers_.empty()) {
203 delete temporal_layers_.back();
204 temporal_layers_.pop_back();
205 }
206 inited_ = false;
207 return ret_val;
208 }
209
SetRates(uint32_t new_bitrate_kbit,uint32_t new_framerate)210 int VP8EncoderImpl::SetRates(uint32_t new_bitrate_kbit,
211 uint32_t new_framerate) {
212 if (!inited_) {
213 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
214 }
215 if (encoders_[0].err) {
216 return WEBRTC_VIDEO_CODEC_ERROR;
217 }
218 if (new_framerate < 1) {
219 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
220 }
221 if (codec_.maxBitrate > 0 && new_bitrate_kbit > codec_.maxBitrate) {
222 new_bitrate_kbit = codec_.maxBitrate;
223 }
224 if (new_bitrate_kbit < codec_.minBitrate) {
225 new_bitrate_kbit = codec_.minBitrate;
226 }
227 if (codec_.numberOfSimulcastStreams > 0 &&
228 new_bitrate_kbit < codec_.simulcastStream[0].minBitrate) {
229 new_bitrate_kbit = codec_.simulcastStream[0].minBitrate;
230 }
231 codec_.maxFramerate = new_framerate;
232
233 if (encoders_.size() == 1) {
234 // 1:1.
235 // Calculate a rough limit for when to trigger a potental down scale.
236 uint32_t k_pixels_per_frame = codec_.width * codec_.height / 1000;
237 // TODO(pwestin): we currently lack CAMA, this is a temporary fix to work
238 // around the current limitations.
239 // Only trigger keyframes if we are allowed to scale down.
240 if (configurations_[0].rc_resize_allowed) {
241 if (!down_scale_requested_) {
242 if (k_pixels_per_frame > new_bitrate_kbit) {
243 down_scale_requested_ = true;
244 down_scale_bitrate_ = new_bitrate_kbit;
245 key_frame_request_[0] = true;
246 }
247 } else {
248 if (new_bitrate_kbit > (2 * down_scale_bitrate_) ||
249 new_bitrate_kbit < (down_scale_bitrate_ / 2)) {
250 down_scale_requested_ = false;
251 }
252 }
253 }
254 } else {
255 // If we have more than 1 stream, reduce the qp_max for the low resolution
256 // stream if frame rate is not too low. The trade-off with lower qp_max is
257 // possibly more dropped frames, so we only do this if the frame rate is
258 // above some threshold (base temporal layer is down to 1/4 for 3 layers).
259 // We may want to condition this on bitrate later.
260 if (new_framerate > 20) {
261 configurations_[encoders_.size() - 1].rc_max_quantizer = 45;
262 } else {
263 // Go back to default value set in InitEncode.
264 configurations_[encoders_.size() - 1].rc_max_quantizer = qp_max_;
265 }
266 }
267
268 std::vector<int> stream_bitrates =
269 GetStreamBitratesKbps(codec_, new_bitrate_kbit);
270 size_t stream_idx = encoders_.size() - 1;
271 for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) {
272 if (encoders_.size() > 1)
273 SetStreamState(stream_bitrates[stream_idx] > 0, stream_idx);
274
275 unsigned int target_bitrate = stream_bitrates[stream_idx];
276 unsigned int max_bitrate = codec_.maxBitrate;
277 int framerate = new_framerate;
278 // TODO(holmer): This is a temporary hack for screensharing, where we
279 // interpret the startBitrate as the encoder target bitrate. This is
280 // to allow for a different max bitrate, so if the codec can't meet
281 // the target we still allow it to overshoot up to the max before dropping
282 // frames. This hack should be improved.
283 if (codec_.targetBitrate > 0 &&
284 (codec_.codecSpecific.VP8.numberOfTemporalLayers == 2 ||
285 codec_.simulcastStream[0].numberOfTemporalLayers == 2)) {
286 int tl0_bitrate = std::min(codec_.targetBitrate, target_bitrate);
287 max_bitrate = std::min(codec_.maxBitrate, target_bitrate);
288 target_bitrate = tl0_bitrate;
289 }
290 configurations_[i].rc_target_bitrate = target_bitrate;
291 temporal_layers_[stream_idx]->ConfigureBitrates(
292 target_bitrate, max_bitrate, framerate, &configurations_[i]);
293 if (vpx_codec_enc_config_set(&encoders_[i], &configurations_[i])) {
294 return WEBRTC_VIDEO_CODEC_ERROR;
295 }
296 }
297 quality_scaler_.ReportFramerate(new_framerate);
298 return WEBRTC_VIDEO_CODEC_OK;
299 }
300
ImplementationName() const301 const char* VP8EncoderImpl::ImplementationName() const {
302 return "libvpx";
303 }
304
SetStreamState(bool send_stream,int stream_idx)305 void VP8EncoderImpl::SetStreamState(bool send_stream,
306 int stream_idx) {
307 if (send_stream && !send_stream_[stream_idx]) {
308 // Need a key frame if we have not sent this stream before.
309 key_frame_request_[stream_idx] = true;
310 }
311 send_stream_[stream_idx] = send_stream;
312 }
313
SetupTemporalLayers(int num_streams,int num_temporal_layers,const VideoCodec & codec)314 void VP8EncoderImpl::SetupTemporalLayers(int num_streams,
315 int num_temporal_layers,
316 const VideoCodec& codec) {
317 const Config default_options;
318 const TemporalLayers::Factory& tl_factory =
319 (codec.extra_options ? codec.extra_options : &default_options)
320 ->Get<TemporalLayers::Factory>();
321 if (num_streams == 1) {
322 if (codec.mode == kScreensharing) {
323 // Special mode when screensharing on a single stream.
324 temporal_layers_.push_back(
325 new ScreenshareLayers(num_temporal_layers, rand()));
326 } else {
327 temporal_layers_.push_back(
328 tl_factory.Create(num_temporal_layers, rand()));
329 }
330 } else {
331 for (int i = 0; i < num_streams; ++i) {
332 // TODO(andresp): crash if layers is invalid.
333 int layers = codec.simulcastStream[i].numberOfTemporalLayers;
334 if (layers < 1)
335 layers = 1;
336 temporal_layers_.push_back(tl_factory.Create(layers, rand()));
337 }
338 }
339 }
340
InitEncode(const VideoCodec * inst,int number_of_cores,size_t)341 int VP8EncoderImpl::InitEncode(const VideoCodec* inst,
342 int number_of_cores,
343 size_t /*maxPayloadSize */) {
344 if (inst == NULL) {
345 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
346 }
347 if (inst->maxFramerate < 1) {
348 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
349 }
350 // allow zero to represent an unspecified maxBitRate
351 if (inst->maxBitrate > 0 && inst->startBitrate > inst->maxBitrate) {
352 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
353 }
354 if (inst->width <= 1 || inst->height <= 1) {
355 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
356 }
357 if (number_of_cores < 1) {
358 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
359 }
360 if (inst->codecSpecific.VP8.feedbackModeOn &&
361 inst->numberOfSimulcastStreams > 1) {
362 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
363 }
364 if (inst->codecSpecific.VP8.automaticResizeOn &&
365 inst->numberOfSimulcastStreams > 1) {
366 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
367 }
368 int retVal = Release();
369 if (retVal < 0) {
370 return retVal;
371 }
372
373 int number_of_streams = NumberOfStreams(*inst);
374 bool doing_simulcast = (number_of_streams > 1);
375
376 if (doing_simulcast && !ValidSimulcastResolutions(*inst, number_of_streams)) {
377 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
378 }
379
380 int num_temporal_layers =
381 doing_simulcast ? inst->simulcastStream[0].numberOfTemporalLayers
382 : inst->codecSpecific.VP8.numberOfTemporalLayers;
383
384 // TODO(andresp): crash if num temporal layers is bananas.
385 if (num_temporal_layers < 1)
386 num_temporal_layers = 1;
387 SetupTemporalLayers(number_of_streams, num_temporal_layers, *inst);
388
389 feedback_mode_ = inst->codecSpecific.VP8.feedbackModeOn;
390
391 timestamp_ = 0;
392 codec_ = *inst;
393
394 // Code expects simulcastStream resolutions to be correct, make sure they are
395 // filled even when there are no simulcast layers.
396 if (codec_.numberOfSimulcastStreams == 0) {
397 codec_.simulcastStream[0].width = codec_.width;
398 codec_.simulcastStream[0].height = codec_.height;
399 }
400
401 picture_id_.resize(number_of_streams);
402 last_key_frame_picture_id_.resize(number_of_streams);
403 encoded_images_.resize(number_of_streams);
404 encoders_.resize(number_of_streams);
405 configurations_.resize(number_of_streams);
406 downsampling_factors_.resize(number_of_streams);
407 raw_images_.resize(number_of_streams);
408 send_stream_.resize(number_of_streams);
409 send_stream_[0] = true; // For non-simulcast case.
410 cpu_speed_.resize(number_of_streams);
411 std::fill(key_frame_request_.begin(), key_frame_request_.end(), false);
412
413 int idx = number_of_streams - 1;
414 for (int i = 0; i < (number_of_streams - 1); ++i, --idx) {
415 int gcd = GCD(inst->simulcastStream[idx].width,
416 inst->simulcastStream[idx - 1].width);
417 downsampling_factors_[i].num = inst->simulcastStream[idx].width / gcd;
418 downsampling_factors_[i].den = inst->simulcastStream[idx - 1].width / gcd;
419 send_stream_[i] = false;
420 }
421 if (number_of_streams > 1) {
422 send_stream_[number_of_streams - 1] = false;
423 downsampling_factors_[number_of_streams - 1].num = 1;
424 downsampling_factors_[number_of_streams - 1].den = 1;
425 }
426 for (int i = 0; i < number_of_streams; ++i) {
427 // Random start, 16 bits is enough.
428 picture_id_[i] = static_cast<uint16_t>(rand()) & 0x7FFF; // NOLINT
429 last_key_frame_picture_id_[i] = -1;
430 // allocate memory for encoded image
431 if (encoded_images_[i]._buffer != NULL) {
432 delete[] encoded_images_[i]._buffer;
433 }
434 encoded_images_[i]._size =
435 CalcBufferSize(kI420, codec_.width, codec_.height);
436 encoded_images_[i]._buffer = new uint8_t[encoded_images_[i]._size];
437 encoded_images_[i]._completeFrame = true;
438 }
439 // populate encoder configuration with default values
440 if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &configurations_[0],
441 0)) {
442 return WEBRTC_VIDEO_CODEC_ERROR;
443 }
444 // setting the time base of the codec
445 configurations_[0].g_timebase.num = 1;
446 configurations_[0].g_timebase.den = 90000;
447 configurations_[0].g_lag_in_frames = 0; // 0- no frame lagging
448
449 // Set the error resilience mode according to user settings.
450 switch (inst->codecSpecific.VP8.resilience) {
451 case kResilienceOff:
452 // TODO(marpan): We should set keep error resilience off for this mode,
453 // independent of temporal layer settings, and make sure we set
454 // |codecSpecific.VP8.resilience| = |kResilientStream| at higher level
455 // code if we want to get error resilience on.
456 configurations_[0].g_error_resilient = 1;
457 break;
458 case kResilientStream:
459 configurations_[0].g_error_resilient = 1; // TODO(holmer): Replace with
460 // VPX_ERROR_RESILIENT_DEFAULT when we
461 // drop support for libvpx 9.6.0.
462 break;
463 case kResilientFrames:
464 #ifdef INDEPENDENT_PARTITIONS
465 configurations_[0] - g_error_resilient =
466 VPX_ERROR_RESILIENT_DEFAULT | VPX_ERROR_RESILIENT_PARTITIONS;
467 break;
468 #else
469 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; // Not supported
470 #endif
471 }
472
473 // rate control settings
474 configurations_[0].rc_dropframe_thresh =
475 inst->codecSpecific.VP8.frameDroppingOn ? 30 : 0;
476 configurations_[0].rc_end_usage = VPX_CBR;
477 configurations_[0].g_pass = VPX_RC_ONE_PASS;
478 // TODO(hellner): investigate why the following two lines produce
479 // automaticResizeOn value of 3 when running
480 // WebRtcVideoMediaChannelTest.GetStatsMultipleSendStreams inside the talk
481 // framework.
482 // configurations_[0].rc_resize_allowed =
483 // inst->codecSpecific.VP8.automaticResizeOn ? 1 : 0;
484 configurations_[0].rc_resize_allowed = 0;
485 // Handle resizing outside of libvpx when doing single-stream.
486 if (inst->codecSpecific.VP8.automaticResizeOn && number_of_streams > 1) {
487 configurations_[0].rc_resize_allowed = 1;
488 }
489 configurations_[0].rc_min_quantizer = 2;
490 if (inst->qpMax >= configurations_[0].rc_min_quantizer) {
491 qp_max_ = inst->qpMax;
492 }
493 configurations_[0].rc_max_quantizer = qp_max_;
494 configurations_[0].rc_undershoot_pct = 100;
495 configurations_[0].rc_overshoot_pct = 15;
496 configurations_[0].rc_buf_initial_sz = 500;
497 configurations_[0].rc_buf_optimal_sz = 600;
498 configurations_[0].rc_buf_sz = 1000;
499
500 // Set the maximum target size of any key-frame.
501 rc_max_intra_target_ = MaxIntraTarget(configurations_[0].rc_buf_optimal_sz);
502
503 if (feedback_mode_) {
504 // Disable periodic key frames if we get feedback from the decoder
505 // through SLI and RPSI.
506 configurations_[0].kf_mode = VPX_KF_DISABLED;
507 } else if (inst->codecSpecific.VP8.keyFrameInterval > 0) {
508 configurations_[0].kf_mode = VPX_KF_AUTO;
509 configurations_[0].kf_max_dist = inst->codecSpecific.VP8.keyFrameInterval;
510 } else {
511 configurations_[0].kf_mode = VPX_KF_DISABLED;
512 }
513
514 // Allow the user to set the complexity for the base stream.
515 switch (inst->codecSpecific.VP8.complexity) {
516 case kComplexityHigh:
517 cpu_speed_[0] = -5;
518 break;
519 case kComplexityHigher:
520 cpu_speed_[0] = -4;
521 break;
522 case kComplexityMax:
523 cpu_speed_[0] = -3;
524 break;
525 default:
526 cpu_speed_[0] = -6;
527 break;
528 }
529 cpu_speed_default_ = cpu_speed_[0];
530 // Set encoding complexity (cpu_speed) based on resolution and/or platform.
531 cpu_speed_[0] = SetCpuSpeed(inst->width, inst->height);
532 for (int i = 1; i < number_of_streams; ++i) {
533 cpu_speed_[i] =
534 SetCpuSpeed(inst->simulcastStream[number_of_streams - 1 - i].width,
535 inst->simulcastStream[number_of_streams - 1 - i].height);
536 }
537 configurations_[0].g_w = inst->width;
538 configurations_[0].g_h = inst->height;
539
540 // Determine number of threads based on the image size and #cores.
541 // TODO(fbarchard): Consider number of Simulcast layers.
542 configurations_[0].g_threads = NumberOfThreads(
543 configurations_[0].g_w, configurations_[0].g_h, number_of_cores);
544
545 // Creating a wrapper to the image - setting image data to NULL.
546 // Actual pointer will be set in encode. Setting align to 1, as it
547 // is meaningless (no memory allocation is done here).
548 vpx_img_wrap(&raw_images_[0], VPX_IMG_FMT_I420, inst->width, inst->height, 1,
549 NULL);
550
551 if (encoders_.size() == 1) {
552 configurations_[0].rc_target_bitrate = inst->startBitrate;
553 temporal_layers_[0]->ConfigureBitrates(inst->startBitrate, inst->maxBitrate,
554 inst->maxFramerate,
555 &configurations_[0]);
556 } else {
557 // Note the order we use is different from webm, we have lowest resolution
558 // at position 0 and they have highest resolution at position 0.
559 int stream_idx = encoders_.size() - 1;
560 std::vector<int> stream_bitrates =
561 GetStreamBitratesKbps(codec_, inst->startBitrate);
562 SetStreamState(stream_bitrates[stream_idx] > 0, stream_idx);
563 configurations_[0].rc_target_bitrate = stream_bitrates[stream_idx];
564 temporal_layers_[stream_idx]->ConfigureBitrates(
565 stream_bitrates[stream_idx], inst->maxBitrate, inst->maxFramerate,
566 &configurations_[0]);
567 --stream_idx;
568 for (size_t i = 1; i < encoders_.size(); ++i, --stream_idx) {
569 memcpy(&configurations_[i], &configurations_[0],
570 sizeof(configurations_[0]));
571
572 configurations_[i].g_w = inst->simulcastStream[stream_idx].width;
573 configurations_[i].g_h = inst->simulcastStream[stream_idx].height;
574
575 // Use 1 thread for lower resolutions.
576 configurations_[i].g_threads = 1;
577
578 // Setting alignment to 32 - as that ensures at least 16 for all
579 // planes (32 for Y, 16 for U,V). Libvpx sets the requested stride for
580 // the y plane, but only half of it to the u and v planes.
581 vpx_img_alloc(&raw_images_[i], VPX_IMG_FMT_I420,
582 inst->simulcastStream[stream_idx].width,
583 inst->simulcastStream[stream_idx].height, kVp832ByteAlign);
584 SetStreamState(stream_bitrates[stream_idx] > 0, stream_idx);
585 configurations_[i].rc_target_bitrate = stream_bitrates[stream_idx];
586 temporal_layers_[stream_idx]->ConfigureBitrates(
587 stream_bitrates[stream_idx], inst->maxBitrate, inst->maxFramerate,
588 &configurations_[i]);
589 }
590 }
591
592 rps_.Init();
593 // Disable both high-QP limits and framedropping. Both are handled by libvpx
594 // internally.
595 const int kDisabledBadQpThreshold = 64;
596 quality_scaler_.Init(codec_.qpMax / QualityScaler::kDefaultLowQpDenominator,
597 kDisabledBadQpThreshold, false);
598 quality_scaler_.ReportFramerate(codec_.maxFramerate);
599
600 // Only apply scaling to improve for single-layer streams. The scaling metrics
601 // use frame drops as a signal and is only applicable when we drop frames.
602 quality_scaler_enabled_ = encoders_.size() == 1 &&
603 configurations_[0].rc_dropframe_thresh > 0 &&
604 codec_.codecSpecific.VP8.automaticResizeOn;
605
606 return InitAndSetControlSettings();
607 }
608
SetCpuSpeed(int width,int height)609 int VP8EncoderImpl::SetCpuSpeed(int width, int height) {
610 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64)
611 // On mobile platform, always set to -12 to leverage between cpu usage
612 // and video quality.
613 return -12;
614 #else
615 // For non-ARM, increase encoding complexity (i.e., use lower speed setting)
616 // if resolution is below CIF. Otherwise, keep the default/user setting
617 // (|cpu_speed_default_|) set on InitEncode via codecSpecific.VP8.complexity.
618 if (width * height < 352 * 288)
619 return (cpu_speed_default_ < -4) ? -4 : cpu_speed_default_;
620 else
621 return cpu_speed_default_;
622 #endif
623 }
624
NumberOfThreads(int width,int height,int cpus)625 int VP8EncoderImpl::NumberOfThreads(int width, int height, int cpus) {
626 if (width * height >= 1920 * 1080 && cpus > 8) {
627 return 8; // 8 threads for 1080p on high perf machines.
628 } else if (width * height > 1280 * 960 && cpus >= 6) {
629 // 3 threads for 1080p.
630 return 3;
631 } else if (width * height > 640 * 480 && cpus >= 3) {
632 // 2 threads for qHD/HD.
633 return 2;
634 } else {
635 // 1 thread for VGA or less.
636 return 1;
637 }
638 }
639
InitAndSetControlSettings()640 int VP8EncoderImpl::InitAndSetControlSettings() {
641 vpx_codec_flags_t flags = 0;
642 flags |= VPX_CODEC_USE_OUTPUT_PARTITION;
643
644 if (encoders_.size() > 1) {
645 int error = vpx_codec_enc_init_multi(&encoders_[0], vpx_codec_vp8_cx(),
646 &configurations_[0], encoders_.size(),
647 flags, &downsampling_factors_[0]);
648 if (error) {
649 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
650 }
651 } else {
652 if (vpx_codec_enc_init(&encoders_[0], vpx_codec_vp8_cx(),
653 &configurations_[0], flags)) {
654 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
655 }
656 }
657 // Enable denoising for the highest resolution stream, and for
658 // the second highest resolution if we are doing more than 2
659 // spatial layers/streams.
660 // TODO(holmer): Investigate possibility of adding a libvpx API
661 // for getting the denoised frame from the encoder and using that
662 // when encoding lower resolution streams. Would it work with the
663 // multi-res encoding feature?
664 denoiserState denoiser_state = kDenoiserOnYOnly;
665 #if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64)
666 denoiser_state = kDenoiserOnYOnly;
667 #else
668 denoiser_state = kDenoiserOnAdaptive;
669 #endif
670 vpx_codec_control(
671 &encoders_[0], VP8E_SET_NOISE_SENSITIVITY,
672 codec_.codecSpecific.VP8.denoisingOn ? denoiser_state : kDenoiserOff);
673 if (encoders_.size() > 2) {
674 vpx_codec_control(
675 &encoders_[1], VP8E_SET_NOISE_SENSITIVITY,
676 codec_.codecSpecific.VP8.denoisingOn ? denoiser_state : kDenoiserOff);
677 }
678 for (size_t i = 0; i < encoders_.size(); ++i) {
679 // Allow more screen content to be detected as static.
680 vpx_codec_control(&(encoders_[i]), VP8E_SET_STATIC_THRESHOLD,
681 codec_.mode == kScreensharing ? 300 : 1);
682 vpx_codec_control(&(encoders_[i]), VP8E_SET_CPUUSED, cpu_speed_[i]);
683 vpx_codec_control(&(encoders_[i]), VP8E_SET_TOKEN_PARTITIONS,
684 static_cast<vp8e_token_partitions>(token_partitions_));
685 vpx_codec_control(&(encoders_[i]), VP8E_SET_MAX_INTRA_BITRATE_PCT,
686 rc_max_intra_target_);
687 // VP8E_SET_SCREEN_CONTENT_MODE 2 = screen content with more aggressive
688 // rate control (drop frames on large target bitrate overshoot)
689 vpx_codec_control(&(encoders_[i]), VP8E_SET_SCREEN_CONTENT_MODE,
690 codec_.mode == kScreensharing ? 2 : 0);
691 }
692 inited_ = true;
693 return WEBRTC_VIDEO_CODEC_OK;
694 }
695
MaxIntraTarget(uint32_t optimalBuffersize)696 uint32_t VP8EncoderImpl::MaxIntraTarget(uint32_t optimalBuffersize) {
697 // Set max to the optimal buffer level (normalized by target BR),
698 // and scaled by a scalePar.
699 // Max target size = scalePar * optimalBufferSize * targetBR[Kbps].
700 // This values is presented in percentage of perFrameBw:
701 // perFrameBw = targetBR[Kbps] * 1000 / frameRate.
702 // The target in % is as follows:
703
704 float scalePar = 0.5;
705 uint32_t targetPct = optimalBuffersize * scalePar * codec_.maxFramerate / 10;
706
707 // Don't go below 3 times the per frame bandwidth.
708 const uint32_t minIntraTh = 300;
709 return (targetPct < minIntraTh) ? minIntraTh : targetPct;
710 }
711
Encode(const VideoFrame & frame,const CodecSpecificInfo * codec_specific_info,const std::vector<FrameType> * frame_types)712 int VP8EncoderImpl::Encode(const VideoFrame& frame,
713 const CodecSpecificInfo* codec_specific_info,
714 const std::vector<FrameType>* frame_types) {
715 if (!inited_)
716 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
717 if (frame.IsZeroSize())
718 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
719 if (encoded_complete_callback_ == NULL)
720 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
721
722 if (quality_scaler_enabled_)
723 quality_scaler_.OnEncodeFrame(frame);
724 const VideoFrame& input_image =
725 quality_scaler_enabled_ ? quality_scaler_.GetScaledFrame(frame) : frame;
726
727 if (quality_scaler_enabled_ && (input_image.width() != codec_.width ||
728 input_image.height() != codec_.height)) {
729 int ret = UpdateCodecFrameSize(input_image);
730 if (ret < 0)
731 return ret;
732 }
733
734 // Since we are extracting raw pointers from |input_image| to
735 // |raw_images_[0]|, the resolution of these frames must match. Note that
736 // |input_image| might be scaled from |frame|. In that case, the resolution of
737 // |raw_images_[0]| should have been updated in UpdateCodecFrameSize.
738 RTC_DCHECK_EQ(input_image.width(), static_cast<int>(raw_images_[0].d_w));
739 RTC_DCHECK_EQ(input_image.height(), static_cast<int>(raw_images_[0].d_h));
740
741 // Image in vpx_image_t format.
742 // Input image is const. VP8's raw image is not defined as const.
743 raw_images_[0].planes[VPX_PLANE_Y] =
744 const_cast<uint8_t*>(input_image.buffer(kYPlane));
745 raw_images_[0].planes[VPX_PLANE_U] =
746 const_cast<uint8_t*>(input_image.buffer(kUPlane));
747 raw_images_[0].planes[VPX_PLANE_V] =
748 const_cast<uint8_t*>(input_image.buffer(kVPlane));
749
750 raw_images_[0].stride[VPX_PLANE_Y] = input_image.stride(kYPlane);
751 raw_images_[0].stride[VPX_PLANE_U] = input_image.stride(kUPlane);
752 raw_images_[0].stride[VPX_PLANE_V] = input_image.stride(kVPlane);
753
754 for (size_t i = 1; i < encoders_.size(); ++i) {
755 // Scale the image down a number of times by downsampling factor
756 libyuv::I420Scale(
757 raw_images_[i - 1].planes[VPX_PLANE_Y],
758 raw_images_[i - 1].stride[VPX_PLANE_Y],
759 raw_images_[i - 1].planes[VPX_PLANE_U],
760 raw_images_[i - 1].stride[VPX_PLANE_U],
761 raw_images_[i - 1].planes[VPX_PLANE_V],
762 raw_images_[i - 1].stride[VPX_PLANE_V], raw_images_[i - 1].d_w,
763 raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y],
764 raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U],
765 raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].planes[VPX_PLANE_V],
766 raw_images_[i].stride[VPX_PLANE_V], raw_images_[i].d_w,
767 raw_images_[i].d_h, libyuv::kFilterBilinear);
768 }
769 vpx_enc_frame_flags_t flags[kMaxSimulcastStreams];
770 for (size_t i = 0; i < encoders_.size(); ++i) {
771 int ret = temporal_layers_[i]->EncodeFlags(input_image.timestamp());
772 if (ret < 0) {
773 // Drop this frame.
774 return WEBRTC_VIDEO_CODEC_OK;
775 }
776 flags[i] = ret;
777 }
778 bool send_key_frame = false;
779 for (size_t i = 0; i < key_frame_request_.size() && i < send_stream_.size();
780 ++i) {
781 if (key_frame_request_[i] && send_stream_[i]) {
782 send_key_frame = true;
783 break;
784 }
785 }
786 if (!send_key_frame && frame_types) {
787 for (size_t i = 0; i < frame_types->size() && i < send_stream_.size();
788 ++i) {
789 if ((*frame_types)[i] == kVideoFrameKey && send_stream_[i]) {
790 send_key_frame = true;
791 break;
792 }
793 }
794 }
795 // The flag modification below (due to forced key frame, RPS, etc.,) for now
796 // will be the same for all encoders/spatial layers.
797 // TODO(marpan/holmer): Allow for key frame request to be set per encoder.
798 bool only_predict_from_key_frame = false;
799 if (send_key_frame) {
800 // Adapt the size of the key frame when in screenshare with 1 temporal
801 // layer.
802 if (encoders_.size() == 1 && codec_.mode == kScreensharing &&
803 codec_.codecSpecific.VP8.numberOfTemporalLayers <= 1) {
804 const uint32_t forceKeyFrameIntraTh = 100;
805 vpx_codec_control(&(encoders_[0]), VP8E_SET_MAX_INTRA_BITRATE_PCT,
806 forceKeyFrameIntraTh);
807 }
808 // Key frame request from caller.
809 // Will update both golden and alt-ref.
810 for (size_t i = 0; i < encoders_.size(); ++i) {
811 flags[i] = VPX_EFLAG_FORCE_KF;
812 }
813 std::fill(key_frame_request_.begin(), key_frame_request_.end(), false);
814 } else if (codec_specific_info &&
815 codec_specific_info->codecType == kVideoCodecVP8) {
816 if (feedback_mode_) {
817 // Handle RPSI and SLI messages and set up the appropriate encode flags.
818 bool sendRefresh = false;
819 if (codec_specific_info->codecSpecific.VP8.hasReceivedRPSI) {
820 rps_.ReceivedRPSI(codec_specific_info->codecSpecific.VP8.pictureIdRPSI);
821 }
822 if (codec_specific_info->codecSpecific.VP8.hasReceivedSLI) {
823 sendRefresh = rps_.ReceivedSLI(input_image.timestamp());
824 }
825 for (size_t i = 0; i < encoders_.size(); ++i) {
826 flags[i] = rps_.EncodeFlags(picture_id_[i], sendRefresh,
827 input_image.timestamp());
828 }
829 } else {
830 if (codec_specific_info->codecSpecific.VP8.hasReceivedRPSI) {
831 // Is this our last key frame? If not ignore.
832 // |picture_id_| is defined per spatial stream/layer, so check that
833 // |RPSI| matches the last key frame from any of the spatial streams.
834 // If so, then all spatial streams for this encoding will predict from
835 // its long-term reference (last key frame).
836 int RPSI = codec_specific_info->codecSpecific.VP8.pictureIdRPSI;
837 for (size_t i = 0; i < encoders_.size(); ++i) {
838 if (last_key_frame_picture_id_[i] == RPSI) {
839 // Request for a long term reference frame.
840 // Note 1: overwrites any temporal settings.
841 // Note 2: VP8_EFLAG_NO_UPD_ENTROPY is not needed as that flag is
842 // set by error_resilient mode.
843 for (size_t j = 0; j < encoders_.size(); ++j) {
844 flags[j] = VP8_EFLAG_NO_UPD_ARF;
845 flags[j] |= VP8_EFLAG_NO_REF_GF;
846 flags[j] |= VP8_EFLAG_NO_REF_LAST;
847 }
848 only_predict_from_key_frame = true;
849 break;
850 }
851 }
852 }
853 }
854 }
855 // Set the encoder frame flags and temporal layer_id for each spatial stream.
856 // Note that |temporal_layers_| are defined starting from lowest resolution at
857 // position 0 to highest resolution at position |encoders_.size() - 1|,
858 // whereas |encoder_| is from highest to lowest resolution.
859 size_t stream_idx = encoders_.size() - 1;
860 for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) {
861 // Allow the layers adapter to temporarily modify the configuration. This
862 // change isn't stored in configurations_ so change will be discarded at
863 // the next update.
864 vpx_codec_enc_cfg_t temp_config;
865 memcpy(&temp_config, &configurations_[i], sizeof(vpx_codec_enc_cfg_t));
866 if (temporal_layers_[stream_idx]->UpdateConfiguration(&temp_config)) {
867 if (vpx_codec_enc_config_set(&encoders_[i], &temp_config))
868 return WEBRTC_VIDEO_CODEC_ERROR;
869 }
870
871 vpx_codec_control(&encoders_[i], VP8E_SET_FRAME_FLAGS, flags[stream_idx]);
872 vpx_codec_control(&encoders_[i], VP8E_SET_TEMPORAL_LAYER_ID,
873 temporal_layers_[stream_idx]->CurrentLayerId());
874 }
875 // TODO(holmer): Ideally the duration should be the timestamp diff of this
876 // frame and the next frame to be encoded, which we don't have. Instead we
877 // would like to use the duration of the previous frame. Unfortunately the
878 // rate control seems to be off with that setup. Using the average input
879 // frame rate to calculate an average duration for now.
880 assert(codec_.maxFramerate > 0);
881 uint32_t duration = 90000 / codec_.maxFramerate;
882
883 // Note we must pass 0 for |flags| field in encode call below since they are
884 // set above in |vpx_codec_control| function for each encoder/spatial layer.
885 int error = vpx_codec_encode(&encoders_[0], &raw_images_[0], timestamp_,
886 duration, 0, VPX_DL_REALTIME);
887 // Reset specific intra frame thresholds, following the key frame.
888 if (send_key_frame) {
889 vpx_codec_control(&(encoders_[0]), VP8E_SET_MAX_INTRA_BITRATE_PCT,
890 rc_max_intra_target_);
891 }
892 if (error)
893 return WEBRTC_VIDEO_CODEC_ERROR;
894 timestamp_ += duration;
895 return GetEncodedPartitions(input_image, only_predict_from_key_frame);
896 }
897
898 // TODO(pbos): Make sure this works for properly for >1 encoders.
UpdateCodecFrameSize(const VideoFrame & input_image)899 int VP8EncoderImpl::UpdateCodecFrameSize(const VideoFrame& input_image) {
900 codec_.width = input_image.width();
901 codec_.height = input_image.height();
902 if (codec_.numberOfSimulcastStreams <= 1) {
903 // For now scaling is only used for single-layer streams.
904 codec_.simulcastStream[0].width = input_image.width();
905 codec_.simulcastStream[0].height = input_image.height();
906 }
907 // Update the cpu_speed setting for resolution change.
908 vpx_codec_control(&(encoders_[0]), VP8E_SET_CPUUSED,
909 SetCpuSpeed(codec_.width, codec_.height));
910 raw_images_[0].w = codec_.width;
911 raw_images_[0].h = codec_.height;
912 raw_images_[0].d_w = codec_.width;
913 raw_images_[0].d_h = codec_.height;
914 vpx_img_set_rect(&raw_images_[0], 0, 0, codec_.width, codec_.height);
915
916 // Update encoder context for new frame size.
917 // Change of frame size will automatically trigger a key frame.
918 configurations_[0].g_w = codec_.width;
919 configurations_[0].g_h = codec_.height;
920 if (vpx_codec_enc_config_set(&encoders_[0], &configurations_[0])) {
921 return WEBRTC_VIDEO_CODEC_ERROR;
922 }
923 return WEBRTC_VIDEO_CODEC_OK;
924 }
925
PopulateCodecSpecific(CodecSpecificInfo * codec_specific,const vpx_codec_cx_pkt_t & pkt,int stream_idx,uint32_t timestamp,bool only_predicting_from_key_frame)926 void VP8EncoderImpl::PopulateCodecSpecific(
927 CodecSpecificInfo* codec_specific,
928 const vpx_codec_cx_pkt_t& pkt,
929 int stream_idx,
930 uint32_t timestamp,
931 bool only_predicting_from_key_frame) {
932 assert(codec_specific != NULL);
933 codec_specific->codecType = kVideoCodecVP8;
934 CodecSpecificInfoVP8* vp8Info = &(codec_specific->codecSpecific.VP8);
935 vp8Info->pictureId = picture_id_[stream_idx];
936 if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
937 last_key_frame_picture_id_[stream_idx] = picture_id_[stream_idx];
938 }
939 vp8Info->simulcastIdx = stream_idx;
940 vp8Info->keyIdx = kNoKeyIdx; // TODO(hlundin) populate this
941 vp8Info->nonReference =
942 (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) ? true : false;
943 bool base_layer_sync_point = (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ||
944 only_predicting_from_key_frame;
945 temporal_layers_[stream_idx]->PopulateCodecSpecific(base_layer_sync_point,
946 vp8Info, timestamp);
947 // Prepare next.
948 picture_id_[stream_idx] = (picture_id_[stream_idx] + 1) & 0x7FFF;
949 }
950
GetEncodedPartitions(const VideoFrame & input_image,bool only_predicting_from_key_frame)951 int VP8EncoderImpl::GetEncodedPartitions(const VideoFrame& input_image,
952 bool only_predicting_from_key_frame) {
953 int bw_resolutions_disabled =
954 (encoders_.size() > 1) ? NumStreamsDisabled(send_stream_) : -1;
955
956 int stream_idx = static_cast<int>(encoders_.size()) - 1;
957 int result = WEBRTC_VIDEO_CODEC_OK;
958 for (size_t encoder_idx = 0; encoder_idx < encoders_.size();
959 ++encoder_idx, --stream_idx) {
960 vpx_codec_iter_t iter = NULL;
961 int part_idx = 0;
962 encoded_images_[encoder_idx]._length = 0;
963 encoded_images_[encoder_idx]._frameType = kVideoFrameDelta;
964 RTPFragmentationHeader frag_info;
965 // token_partitions_ is number of bits used.
966 frag_info.VerifyAndAllocateFragmentationHeader((1 << token_partitions_) +
967 1);
968 CodecSpecificInfo codec_specific;
969 const vpx_codec_cx_pkt_t* pkt = NULL;
970 while ((pkt = vpx_codec_get_cx_data(&encoders_[encoder_idx], &iter)) !=
971 NULL) {
972 switch (pkt->kind) {
973 case VPX_CODEC_CX_FRAME_PKT: {
974 uint32_t length = encoded_images_[encoder_idx]._length;
975 memcpy(&encoded_images_[encoder_idx]._buffer[length],
976 pkt->data.frame.buf, pkt->data.frame.sz);
977 frag_info.fragmentationOffset[part_idx] = length;
978 frag_info.fragmentationLength[part_idx] = pkt->data.frame.sz;
979 frag_info.fragmentationPlType[part_idx] = 0; // not known here
980 frag_info.fragmentationTimeDiff[part_idx] = 0;
981 encoded_images_[encoder_idx]._length += pkt->data.frame.sz;
982 assert(length <= encoded_images_[encoder_idx]._size);
983 ++part_idx;
984 break;
985 }
986 default:
987 break;
988 }
989 // End of frame
990 if ((pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT) == 0) {
991 // check if encoded frame is a key frame
992 if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
993 encoded_images_[encoder_idx]._frameType = kVideoFrameKey;
994 rps_.EncodedKeyFrame(picture_id_[stream_idx]);
995 }
996 PopulateCodecSpecific(&codec_specific, *pkt, stream_idx,
997 input_image.timestamp(),
998 only_predicting_from_key_frame);
999 break;
1000 }
1001 }
1002 encoded_images_[encoder_idx]._timeStamp = input_image.timestamp();
1003 encoded_images_[encoder_idx].capture_time_ms_ =
1004 input_image.render_time_ms();
1005
1006 int qp = -1;
1007 vpx_codec_control(&encoders_[encoder_idx], VP8E_GET_LAST_QUANTIZER_64, &qp);
1008 temporal_layers_[stream_idx]->FrameEncoded(
1009 encoded_images_[encoder_idx]._length,
1010 encoded_images_[encoder_idx]._timeStamp, qp);
1011 if (send_stream_[stream_idx]) {
1012 if (encoded_images_[encoder_idx]._length > 0) {
1013 TRACE_COUNTER_ID1("webrtc", "EncodedFrameSize", encoder_idx,
1014 encoded_images_[encoder_idx]._length);
1015 encoded_images_[encoder_idx]._encodedHeight =
1016 codec_.simulcastStream[stream_idx].height;
1017 encoded_images_[encoder_idx]._encodedWidth =
1018 codec_.simulcastStream[stream_idx].width;
1019 encoded_images_[encoder_idx]
1020 .adapt_reason_.quality_resolution_downscales =
1021 quality_scaler_enabled_ ? quality_scaler_.downscale_shift() : -1;
1022 // Report once per frame (lowest stream always sent).
1023 encoded_images_[encoder_idx].adapt_reason_.bw_resolutions_disabled =
1024 (stream_idx == 0) ? bw_resolutions_disabled : -1;
1025 encoded_complete_callback_->Encoded(encoded_images_[encoder_idx],
1026 &codec_specific, &frag_info);
1027 } else if (codec_.mode == kScreensharing) {
1028 result = WEBRTC_VIDEO_CODEC_TARGET_BITRATE_OVERSHOOT;
1029 }
1030 }
1031 }
1032 if (encoders_.size() == 1 && send_stream_[0]) {
1033 if (encoded_images_[0]._length > 0) {
1034 int qp;
1035 vpx_codec_control(&encoders_[0], VP8E_GET_LAST_QUANTIZER_64, &qp);
1036 quality_scaler_.ReportQP(qp);
1037 } else {
1038 quality_scaler_.ReportDroppedFrame();
1039 }
1040 }
1041 return result;
1042 }
1043
SetChannelParameters(uint32_t packetLoss,int64_t rtt)1044 int VP8EncoderImpl::SetChannelParameters(uint32_t packetLoss, int64_t rtt) {
1045 rps_.SetRtt(rtt);
1046 return WEBRTC_VIDEO_CODEC_OK;
1047 }
1048
RegisterEncodeCompleteCallback(EncodedImageCallback * callback)1049 int VP8EncoderImpl::RegisterEncodeCompleteCallback(
1050 EncodedImageCallback* callback) {
1051 encoded_complete_callback_ = callback;
1052 return WEBRTC_VIDEO_CODEC_OK;
1053 }
1054
VP8DecoderImpl()1055 VP8DecoderImpl::VP8DecoderImpl()
1056 : decode_complete_callback_(NULL),
1057 inited_(false),
1058 feedback_mode_(false),
1059 decoder_(NULL),
1060 last_keyframe_(),
1061 image_format_(VPX_IMG_FMT_NONE),
1062 ref_frame_(NULL),
1063 propagation_cnt_(-1),
1064 last_frame_width_(0),
1065 last_frame_height_(0),
1066 key_frame_required_(true) {}
1067
~VP8DecoderImpl()1068 VP8DecoderImpl::~VP8DecoderImpl() {
1069 inited_ = true; // in order to do the actual release
1070 Release();
1071 }
1072
Reset()1073 int VP8DecoderImpl::Reset() {
1074 if (!inited_) {
1075 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
1076 }
1077 InitDecode(&codec_, 1);
1078 propagation_cnt_ = -1;
1079 return WEBRTC_VIDEO_CODEC_OK;
1080 }
1081
InitDecode(const VideoCodec * inst,int number_of_cores)1082 int VP8DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
1083 int ret_val = Release();
1084 if (ret_val < 0) {
1085 return ret_val;
1086 }
1087 if (decoder_ == NULL) {
1088 decoder_ = new vpx_codec_ctx_t;
1089 }
1090 if (inst && inst->codecType == kVideoCodecVP8) {
1091 feedback_mode_ = inst->codecSpecific.VP8.feedbackModeOn;
1092 }
1093 vpx_codec_dec_cfg_t cfg;
1094 // Setting number of threads to a constant value (1)
1095 cfg.threads = 1;
1096 cfg.h = cfg.w = 0; // set after decode
1097
1098 vpx_codec_flags_t flags = 0;
1099 #if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64)
1100 flags = VPX_CODEC_USE_POSTPROC;
1101 #ifdef INDEPENDENT_PARTITIONS
1102 flags |= VPX_CODEC_USE_INPUT_PARTITION;
1103 #endif
1104 #endif
1105
1106 if (vpx_codec_dec_init(decoder_, vpx_codec_vp8_dx(), &cfg, flags)) {
1107 return WEBRTC_VIDEO_CODEC_MEMORY;
1108 }
1109
1110 // Save VideoCodec instance for later; mainly for duplicating the decoder.
1111 if (&codec_ != inst)
1112 codec_ = *inst;
1113 propagation_cnt_ = -1;
1114
1115 inited_ = true;
1116
1117 // Always start with a complete key frame.
1118 key_frame_required_ = true;
1119 return WEBRTC_VIDEO_CODEC_OK;
1120 }
1121
Decode(const EncodedImage & input_image,bool missing_frames,const RTPFragmentationHeader * fragmentation,const CodecSpecificInfo * codec_specific_info,int64_t)1122 int VP8DecoderImpl::Decode(const EncodedImage& input_image,
1123 bool missing_frames,
1124 const RTPFragmentationHeader* fragmentation,
1125 const CodecSpecificInfo* codec_specific_info,
1126 int64_t /*render_time_ms*/) {
1127 if (!inited_) {
1128 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
1129 }
1130 if (decode_complete_callback_ == NULL) {
1131 return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
1132 }
1133 if (input_image._buffer == NULL && input_image._length > 0) {
1134 // Reset to avoid requesting key frames too often.
1135 if (propagation_cnt_ > 0)
1136 propagation_cnt_ = 0;
1137 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
1138 }
1139
1140 #ifdef INDEPENDENT_PARTITIONS
1141 if (fragmentation == NULL) {
1142 return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
1143 }
1144 #endif
1145
1146 #if !defined(WEBRTC_ARCH_ARM) && !defined(WEBRTC_ARCH_ARM64)
1147 vp8_postproc_cfg_t ppcfg;
1148 // MFQE enabled to reduce key frame popping.
1149 ppcfg.post_proc_flag = VP8_MFQE | VP8_DEBLOCK;
1150 // For VGA resolutions and lower, enable the demacroblocker postproc.
1151 if (last_frame_width_ * last_frame_height_ <= 640 * 360) {
1152 ppcfg.post_proc_flag |= VP8_DEMACROBLOCK;
1153 }
1154 // Strength of deblocking filter. Valid range:[0,16]
1155 ppcfg.deblocking_level = 3;
1156 vpx_codec_control(decoder_, VP8_SET_POSTPROC, &ppcfg);
1157 #endif
1158
1159 // Always start with a complete key frame.
1160 if (key_frame_required_) {
1161 if (input_image._frameType != kVideoFrameKey)
1162 return WEBRTC_VIDEO_CODEC_ERROR;
1163 // We have a key frame - is it complete?
1164 if (input_image._completeFrame) {
1165 key_frame_required_ = false;
1166 } else {
1167 return WEBRTC_VIDEO_CODEC_ERROR;
1168 }
1169 }
1170 // Restrict error propagation using key frame requests. Disabled when
1171 // the feedback mode is enabled (RPS).
1172 // Reset on a key frame refresh.
1173 if (!feedback_mode_) {
1174 if (input_image._frameType == kVideoFrameKey &&
1175 input_image._completeFrame) {
1176 propagation_cnt_ = -1;
1177 // Start count on first loss.
1178 } else if ((!input_image._completeFrame || missing_frames) &&
1179 propagation_cnt_ == -1) {
1180 propagation_cnt_ = 0;
1181 }
1182 if (propagation_cnt_ >= 0) {
1183 propagation_cnt_++;
1184 }
1185 }
1186
1187 vpx_codec_iter_t iter = NULL;
1188 vpx_image_t* img;
1189 int ret;
1190
1191 // Check for missing frames.
1192 if (missing_frames) {
1193 // Call decoder with zero data length to signal missing frames.
1194 if (vpx_codec_decode(decoder_, NULL, 0, 0, VPX_DL_REALTIME)) {
1195 // Reset to avoid requesting key frames too often.
1196 if (propagation_cnt_ > 0)
1197 propagation_cnt_ = 0;
1198 return WEBRTC_VIDEO_CODEC_ERROR;
1199 }
1200 img = vpx_codec_get_frame(decoder_, &iter);
1201 iter = NULL;
1202 }
1203
1204 #ifdef INDEPENDENT_PARTITIONS
1205 if (DecodePartitions(inputImage, fragmentation)) {
1206 // Reset to avoid requesting key frames too often.
1207 if (propagation_cnt_ > 0) {
1208 propagation_cnt_ = 0;
1209 }
1210 return WEBRTC_VIDEO_CODEC_ERROR;
1211 }
1212 #else
1213 uint8_t* buffer = input_image._buffer;
1214 if (input_image._length == 0) {
1215 buffer = NULL; // Triggers full frame concealment.
1216 }
1217 if (vpx_codec_decode(decoder_, buffer, input_image._length, 0,
1218 VPX_DL_REALTIME)) {
1219 // Reset to avoid requesting key frames too often.
1220 if (propagation_cnt_ > 0) {
1221 propagation_cnt_ = 0;
1222 }
1223 return WEBRTC_VIDEO_CODEC_ERROR;
1224 }
1225 #endif
1226
1227 // Store encoded frame if key frame. (Used in Copy method.)
1228 if (input_image._frameType == kVideoFrameKey && input_image._buffer != NULL) {
1229 const uint32_t bytes_to_copy = input_image._length;
1230 if (last_keyframe_._size < bytes_to_copy) {
1231 delete[] last_keyframe_._buffer;
1232 last_keyframe_._buffer = NULL;
1233 last_keyframe_._size = 0;
1234 }
1235 uint8_t* temp_buffer = last_keyframe_._buffer; // Save buffer ptr.
1236 uint32_t temp_size = last_keyframe_._size; // Save size.
1237 last_keyframe_ = input_image; // Shallow copy.
1238 last_keyframe_._buffer = temp_buffer; // Restore buffer ptr.
1239 last_keyframe_._size = temp_size; // Restore buffer size.
1240 if (!last_keyframe_._buffer) {
1241 // Allocate memory.
1242 last_keyframe_._size = bytes_to_copy;
1243 last_keyframe_._buffer = new uint8_t[last_keyframe_._size];
1244 }
1245 // Copy encoded frame.
1246 memcpy(last_keyframe_._buffer, input_image._buffer, bytes_to_copy);
1247 last_keyframe_._length = bytes_to_copy;
1248 }
1249
1250 img = vpx_codec_get_frame(decoder_, &iter);
1251 ret = ReturnFrame(img, input_image._timeStamp, input_image.ntp_time_ms_);
1252 if (ret != 0) {
1253 // Reset to avoid requesting key frames too often.
1254 if (ret < 0 && propagation_cnt_ > 0)
1255 propagation_cnt_ = 0;
1256 return ret;
1257 }
1258 if (feedback_mode_) {
1259 // Whenever we receive an incomplete key frame all reference buffers will
1260 // be corrupt. If that happens we must request new key frames until we
1261 // decode a complete key frame.
1262 if (input_image._frameType == kVideoFrameKey && !input_image._completeFrame)
1263 return WEBRTC_VIDEO_CODEC_ERROR;
1264 // Check for reference updates and last reference buffer corruption and
1265 // signal successful reference propagation or frame corruption to the
1266 // encoder.
1267 int reference_updates = 0;
1268 if (vpx_codec_control(decoder_, VP8D_GET_LAST_REF_UPDATES,
1269 &reference_updates)) {
1270 // Reset to avoid requesting key frames too often.
1271 if (propagation_cnt_ > 0) {
1272 propagation_cnt_ = 0;
1273 }
1274 return WEBRTC_VIDEO_CODEC_ERROR;
1275 }
1276 int corrupted = 0;
1277 if (vpx_codec_control(decoder_, VP8D_GET_FRAME_CORRUPTED, &corrupted)) {
1278 // Reset to avoid requesting key frames too often.
1279 if (propagation_cnt_ > 0)
1280 propagation_cnt_ = 0;
1281 return WEBRTC_VIDEO_CODEC_ERROR;
1282 }
1283 int16_t picture_id = -1;
1284 if (codec_specific_info) {
1285 picture_id = codec_specific_info->codecSpecific.VP8.pictureId;
1286 }
1287 if (picture_id > -1) {
1288 if (((reference_updates & VP8_GOLD_FRAME) ||
1289 (reference_updates & VP8_ALTR_FRAME)) &&
1290 !corrupted) {
1291 decode_complete_callback_->ReceivedDecodedReferenceFrame(picture_id);
1292 }
1293 decode_complete_callback_->ReceivedDecodedFrame(picture_id);
1294 }
1295 if (corrupted) {
1296 // we can decode but with artifacts
1297 return WEBRTC_VIDEO_CODEC_REQUEST_SLI;
1298 }
1299 }
1300 // Check Vs. threshold
1301 if (propagation_cnt_ > kVp8ErrorPropagationTh) {
1302 // Reset to avoid requesting key frames too often.
1303 propagation_cnt_ = 0;
1304 return WEBRTC_VIDEO_CODEC_ERROR;
1305 }
1306 return WEBRTC_VIDEO_CODEC_OK;
1307 }
1308
DecodePartitions(const EncodedImage & input_image,const RTPFragmentationHeader * fragmentation)1309 int VP8DecoderImpl::DecodePartitions(
1310 const EncodedImage& input_image,
1311 const RTPFragmentationHeader* fragmentation) {
1312 for (int i = 0; i < fragmentation->fragmentationVectorSize; ++i) {
1313 const uint8_t* partition =
1314 input_image._buffer + fragmentation->fragmentationOffset[i];
1315 const uint32_t partition_length = fragmentation->fragmentationLength[i];
1316 if (vpx_codec_decode(decoder_, partition, partition_length, 0,
1317 VPX_DL_REALTIME)) {
1318 return WEBRTC_VIDEO_CODEC_ERROR;
1319 }
1320 }
1321 // Signal end of frame data. If there was no frame data this will trigger
1322 // a full frame concealment.
1323 if (vpx_codec_decode(decoder_, NULL, 0, 0, VPX_DL_REALTIME))
1324 return WEBRTC_VIDEO_CODEC_ERROR;
1325 return WEBRTC_VIDEO_CODEC_OK;
1326 }
1327
ReturnFrame(const vpx_image_t * img,uint32_t timestamp,int64_t ntp_time_ms)1328 int VP8DecoderImpl::ReturnFrame(const vpx_image_t* img,
1329 uint32_t timestamp,
1330 int64_t ntp_time_ms) {
1331 if (img == NULL) {
1332 // Decoder OK and NULL image => No show frame
1333 return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
1334 }
1335 last_frame_width_ = img->d_w;
1336 last_frame_height_ = img->d_h;
1337 // Allocate memory for decoded image.
1338 VideoFrame decoded_image(buffer_pool_.CreateBuffer(img->d_w, img->d_h),
1339 timestamp, 0, kVideoRotation_0);
1340 libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
1341 img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
1342 img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
1343 decoded_image.buffer(kYPlane), decoded_image.stride(kYPlane),
1344 decoded_image.buffer(kUPlane), decoded_image.stride(kUPlane),
1345 decoded_image.buffer(kVPlane), decoded_image.stride(kVPlane),
1346 img->d_w, img->d_h);
1347 decoded_image.set_ntp_time_ms(ntp_time_ms);
1348 int ret = decode_complete_callback_->Decoded(decoded_image);
1349 if (ret != 0)
1350 return ret;
1351
1352 // Remember image format for later
1353 image_format_ = img->fmt;
1354 return WEBRTC_VIDEO_CODEC_OK;
1355 }
1356
RegisterDecodeCompleteCallback(DecodedImageCallback * callback)1357 int VP8DecoderImpl::RegisterDecodeCompleteCallback(
1358 DecodedImageCallback* callback) {
1359 decode_complete_callback_ = callback;
1360 return WEBRTC_VIDEO_CODEC_OK;
1361 }
1362
Release()1363 int VP8DecoderImpl::Release() {
1364 if (last_keyframe_._buffer != NULL) {
1365 delete[] last_keyframe_._buffer;
1366 last_keyframe_._buffer = NULL;
1367 }
1368 if (decoder_ != NULL) {
1369 if (vpx_codec_destroy(decoder_)) {
1370 return WEBRTC_VIDEO_CODEC_MEMORY;
1371 }
1372 delete decoder_;
1373 decoder_ = NULL;
1374 }
1375 if (ref_frame_ != NULL) {
1376 vpx_img_free(&ref_frame_->img);
1377 delete ref_frame_;
1378 ref_frame_ = NULL;
1379 }
1380 buffer_pool_.Release();
1381 inited_ = false;
1382 return WEBRTC_VIDEO_CODEC_OK;
1383 }
1384
ImplementationName() const1385 const char* VP8DecoderImpl::ImplementationName() const {
1386 return "libvpx";
1387 }
1388
CopyReference(VP8DecoderImpl * copy)1389 int VP8DecoderImpl::CopyReference(VP8DecoderImpl* copy) {
1390 // The type of frame to copy should be set in ref_frame_->frame_type
1391 // before the call to this function.
1392 if (vpx_codec_control(decoder_, VP8_COPY_REFERENCE, ref_frame_) !=
1393 VPX_CODEC_OK) {
1394 return -1;
1395 }
1396 if (vpx_codec_control(copy->decoder_, VP8_SET_REFERENCE, ref_frame_) !=
1397 VPX_CODEC_OK) {
1398 return -1;
1399 }
1400 return 0;
1401 }
1402
1403 } // namespace webrtc
1404