1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_coding/acm2/acm_receiver.h"
12
13 #include <stdlib.h> // malloc
14
15 #include <algorithm> // sort
16 #include <vector>
17
18 #include "webrtc/base/checks.h"
19 #include "webrtc/base/format_macros.h"
20 #include "webrtc/base/logging.h"
21 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
22 #include "webrtc/common_types.h"
23 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
24 #include "webrtc/modules/audio_coding/acm2/acm_resampler.h"
25 #include "webrtc/modules/audio_coding/acm2/call_statistics.h"
26 #include "webrtc/modules/audio_coding/neteq/include/neteq.h"
27 #include "webrtc/system_wrappers/include/clock.h"
28 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
29 #include "webrtc/system_wrappers/include/tick_util.h"
30 #include "webrtc/system_wrappers/include/trace.h"
31
32 namespace webrtc {
33
34 namespace acm2 {
35
36 namespace {
37
38 // |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_|
39 // before the call to this function.
SetAudioFrameActivityAndType(bool vad_enabled,NetEqOutputType type,AudioFrame * audio_frame)40 void SetAudioFrameActivityAndType(bool vad_enabled,
41 NetEqOutputType type,
42 AudioFrame* audio_frame) {
43 if (vad_enabled) {
44 switch (type) {
45 case kOutputNormal: {
46 audio_frame->vad_activity_ = AudioFrame::kVadActive;
47 audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
48 break;
49 }
50 case kOutputVADPassive: {
51 audio_frame->vad_activity_ = AudioFrame::kVadPassive;
52 audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
53 break;
54 }
55 case kOutputCNG: {
56 audio_frame->vad_activity_ = AudioFrame::kVadPassive;
57 audio_frame->speech_type_ = AudioFrame::kCNG;
58 break;
59 }
60 case kOutputPLC: {
61 // Don't change |audio_frame->vad_activity_|, it should be the same as
62 // |previous_audio_activity_|.
63 audio_frame->speech_type_ = AudioFrame::kPLC;
64 break;
65 }
66 case kOutputPLCtoCNG: {
67 audio_frame->vad_activity_ = AudioFrame::kVadPassive;
68 audio_frame->speech_type_ = AudioFrame::kPLCCNG;
69 break;
70 }
71 default:
72 assert(false);
73 }
74 } else {
75 // Always return kVadUnknown when receive VAD is inactive
76 audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
77 switch (type) {
78 case kOutputNormal: {
79 audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
80 break;
81 }
82 case kOutputCNG: {
83 audio_frame->speech_type_ = AudioFrame::kCNG;
84 break;
85 }
86 case kOutputPLC: {
87 audio_frame->speech_type_ = AudioFrame::kPLC;
88 break;
89 }
90 case kOutputPLCtoCNG: {
91 audio_frame->speech_type_ = AudioFrame::kPLCCNG;
92 break;
93 }
94 case kOutputVADPassive: {
95 // Normally, we should no get any VAD decision if post-decoding VAD is
96 // not active. However, if post-decoding VAD has been active then
97 // disabled, we might be here for couple of frames.
98 audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
99 LOG(WARNING) << "Post-decoding VAD is disabled but output is "
100 << "labeled VAD-passive";
101 break;
102 }
103 default:
104 assert(false);
105 }
106 }
107 }
108
109 // Is the given codec a CNG codec?
110 // TODO(kwiberg): Move to RentACodec.
IsCng(int codec_id)111 bool IsCng(int codec_id) {
112 auto i = RentACodec::CodecIdFromIndex(codec_id);
113 return (i && (*i == RentACodec::CodecId::kCNNB ||
114 *i == RentACodec::CodecId::kCNWB ||
115 *i == RentACodec::CodecId::kCNSWB ||
116 *i == RentACodec::CodecId::kCNFB));
117 }
118
119 } // namespace
120
AcmReceiver(const AudioCodingModule::Config & config)121 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
122 : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
123 id_(config.id),
124 last_audio_decoder_(nullptr),
125 previous_audio_activity_(AudioFrame::kVadPassive),
126 audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
127 last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
128 neteq_(NetEq::Create(config.neteq_config)),
129 vad_enabled_(config.neteq_config.enable_post_decode_vad),
130 clock_(config.clock),
131 resampled_last_output_frame_(true) {
132 assert(clock_);
133 memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
134 memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
135 }
136
~AcmReceiver()137 AcmReceiver::~AcmReceiver() {
138 delete neteq_;
139 }
140
SetMinimumDelay(int delay_ms)141 int AcmReceiver::SetMinimumDelay(int delay_ms) {
142 if (neteq_->SetMinimumDelay(delay_ms))
143 return 0;
144 LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
145 return -1;
146 }
147
SetMaximumDelay(int delay_ms)148 int AcmReceiver::SetMaximumDelay(int delay_ms) {
149 if (neteq_->SetMaximumDelay(delay_ms))
150 return 0;
151 LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
152 return -1;
153 }
154
LeastRequiredDelayMs() const155 int AcmReceiver::LeastRequiredDelayMs() const {
156 return neteq_->LeastRequiredDelayMs();
157 }
158
last_packet_sample_rate_hz() const159 rtc::Optional<int> AcmReceiver::last_packet_sample_rate_hz() const {
160 CriticalSectionScoped lock(crit_sect_.get());
161 return last_packet_sample_rate_hz_;
162 }
163
last_output_sample_rate_hz() const164 int AcmReceiver::last_output_sample_rate_hz() const {
165 return neteq_->last_output_sample_rate_hz();
166 }
167
InsertPacket(const WebRtcRTPHeader & rtp_header,rtc::ArrayView<const uint8_t> incoming_payload)168 int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,
169 rtc::ArrayView<const uint8_t> incoming_payload) {
170 uint32_t receive_timestamp = 0;
171 const RTPHeader* header = &rtp_header.header; // Just a shorthand.
172
173 {
174 CriticalSectionScoped lock(crit_sect_.get());
175
176 const Decoder* decoder = RtpHeaderToDecoder(*header, incoming_payload[0]);
177 if (!decoder) {
178 LOG_F(LS_ERROR) << "Payload-type "
179 << static_cast<int>(header->payloadType)
180 << " is not registered.";
181 return -1;
182 }
183 const int sample_rate_hz = [&decoder] {
184 const auto ci = RentACodec::CodecIdFromIndex(decoder->acm_codec_id);
185 return ci ? RentACodec::CodecInstById(*ci)->plfreq : -1;
186 }();
187 receive_timestamp = NowInTimestamp(sample_rate_hz);
188
189 // If this is a CNG while the audio codec is not mono, skip pushing in
190 // packets into NetEq.
191 if (IsCng(decoder->acm_codec_id) && last_audio_decoder_ &&
192 last_audio_decoder_->channels > 1)
193 return 0;
194 if (!IsCng(decoder->acm_codec_id) &&
195 decoder->acm_codec_id !=
196 *RentACodec::CodecIndexFromId(RentACodec::CodecId::kAVT)) {
197 last_audio_decoder_ = decoder;
198 last_packet_sample_rate_hz_ = rtc::Optional<int>(decoder->sample_rate_hz);
199 }
200
201 } // |crit_sect_| is released.
202
203 if (neteq_->InsertPacket(rtp_header, incoming_payload, receive_timestamp) <
204 0) {
205 LOG(LERROR) << "AcmReceiver::InsertPacket "
206 << static_cast<int>(header->payloadType)
207 << " Failed to insert packet";
208 return -1;
209 }
210 return 0;
211 }
212
GetAudio(int desired_freq_hz,AudioFrame * audio_frame)213 int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
214 enum NetEqOutputType type;
215 size_t samples_per_channel;
216 size_t num_channels;
217
218 // Accessing members, take the lock.
219 CriticalSectionScoped lock(crit_sect_.get());
220
221 // Always write the output to |audio_buffer_| first.
222 if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples,
223 audio_buffer_.get(),
224 &samples_per_channel,
225 &num_channels,
226 &type) != NetEq::kOK) {
227 LOG(LERROR) << "AcmReceiver::GetAudio - NetEq Failed.";
228 return -1;
229 }
230
231 const int current_sample_rate_hz = neteq_->last_output_sample_rate_hz();
232
233 // Update if resampling is required.
234 const bool need_resampling =
235 (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz);
236
237 if (need_resampling && !resampled_last_output_frame_) {
238 // Prime the resampler with the last frame.
239 int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
240 int samples_per_channel_int = resampler_.Resample10Msec(
241 last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
242 num_channels, AudioFrame::kMaxDataSizeSamples, temp_output);
243 if (samples_per_channel_int < 0) {
244 LOG(LERROR) << "AcmReceiver::GetAudio - "
245 "Resampling last_audio_buffer_ failed.";
246 return -1;
247 }
248 samples_per_channel = static_cast<size_t>(samples_per_channel_int);
249 }
250
251 // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either
252 // through resampling, or through straight memcpy.
253 // TODO(henrik.lundin) Glitches in the output may appear if the output rate
254 // from NetEq changes. See WebRTC issue 3923.
255 if (need_resampling) {
256 int samples_per_channel_int = resampler_.Resample10Msec(
257 audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
258 num_channels, AudioFrame::kMaxDataSizeSamples, audio_frame->data_);
259 if (samples_per_channel_int < 0) {
260 LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
261 return -1;
262 }
263 samples_per_channel = static_cast<size_t>(samples_per_channel_int);
264 resampled_last_output_frame_ = true;
265 } else {
266 resampled_last_output_frame_ = false;
267 // We might end up here ONLY if codec is changed.
268 memcpy(audio_frame->data_,
269 audio_buffer_.get(),
270 samples_per_channel * num_channels * sizeof(int16_t));
271 }
272
273 // Swap buffers, so that the current audio is stored in |last_audio_buffer_|
274 // for next time.
275 audio_buffer_.swap(last_audio_buffer_);
276
277 audio_frame->num_channels_ = num_channels;
278 audio_frame->samples_per_channel_ = samples_per_channel;
279 audio_frame->sample_rate_hz_ = static_cast<int>(samples_per_channel * 100);
280
281 // Should set |vad_activity| before calling SetAudioFrameActivityAndType().
282 audio_frame->vad_activity_ = previous_audio_activity_;
283 SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
284 previous_audio_activity_ = audio_frame->vad_activity_;
285 call_stats_.DecodedByNetEq(audio_frame->speech_type_);
286
287 // Computes the RTP timestamp of the first sample in |audio_frame| from
288 // |GetPlayoutTimestamp|, which is the timestamp of the last sample of
289 // |audio_frame|.
290 uint32_t playout_timestamp = 0;
291 if (GetPlayoutTimestamp(&playout_timestamp)) {
292 audio_frame->timestamp_ = playout_timestamp -
293 static_cast<uint32_t>(audio_frame->samples_per_channel_);
294 } else {
295 // Remain 0 until we have a valid |playout_timestamp|.
296 audio_frame->timestamp_ = 0;
297 }
298
299 return 0;
300 }
301
AddCodec(int acm_codec_id,uint8_t payload_type,size_t channels,int sample_rate_hz,AudioDecoder * audio_decoder,const std::string & name)302 int32_t AcmReceiver::AddCodec(int acm_codec_id,
303 uint8_t payload_type,
304 size_t channels,
305 int sample_rate_hz,
306 AudioDecoder* audio_decoder,
307 const std::string& name) {
308 const auto neteq_decoder = [acm_codec_id, channels]() -> NetEqDecoder {
309 if (acm_codec_id == -1)
310 return NetEqDecoder::kDecoderArbitrary; // External decoder.
311 const rtc::Optional<RentACodec::CodecId> cid =
312 RentACodec::CodecIdFromIndex(acm_codec_id);
313 RTC_DCHECK(cid) << "Invalid codec index: " << acm_codec_id;
314 const rtc::Optional<NetEqDecoder> ned =
315 RentACodec::NetEqDecoderFromCodecId(*cid, channels);
316 RTC_DCHECK(ned) << "Invalid codec ID: " << static_cast<int>(*cid);
317 return *ned;
318 }();
319
320 CriticalSectionScoped lock(crit_sect_.get());
321
322 // The corresponding NetEq decoder ID.
323 // If this codec has been registered before.
324 auto it = decoders_.find(payload_type);
325 if (it != decoders_.end()) {
326 const Decoder& decoder = it->second;
327 if (acm_codec_id != -1 && decoder.acm_codec_id == acm_codec_id &&
328 decoder.channels == channels &&
329 decoder.sample_rate_hz == sample_rate_hz) {
330 // Re-registering the same codec. Do nothing and return.
331 return 0;
332 }
333
334 // Changing codec. First unregister the old codec, then register the new
335 // one.
336 if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
337 LOG(LERROR) << "Cannot remove payload " << static_cast<int>(payload_type);
338 return -1;
339 }
340
341 decoders_.erase(it);
342 }
343
344 int ret_val;
345 if (!audio_decoder) {
346 ret_val = neteq_->RegisterPayloadType(neteq_decoder, name, payload_type);
347 } else {
348 ret_val = neteq_->RegisterExternalDecoder(
349 audio_decoder, neteq_decoder, name, payload_type, sample_rate_hz);
350 }
351 if (ret_val != NetEq::kOK) {
352 LOG(LERROR) << "AcmReceiver::AddCodec " << acm_codec_id
353 << static_cast<int>(payload_type)
354 << " channels: " << channels;
355 return -1;
356 }
357
358 Decoder decoder;
359 decoder.acm_codec_id = acm_codec_id;
360 decoder.payload_type = payload_type;
361 decoder.channels = channels;
362 decoder.sample_rate_hz = sample_rate_hz;
363 decoders_[payload_type] = decoder;
364 return 0;
365 }
366
EnableVad()367 void AcmReceiver::EnableVad() {
368 neteq_->EnableVad();
369 CriticalSectionScoped lock(crit_sect_.get());
370 vad_enabled_ = true;
371 }
372
DisableVad()373 void AcmReceiver::DisableVad() {
374 neteq_->DisableVad();
375 CriticalSectionScoped lock(crit_sect_.get());
376 vad_enabled_ = false;
377 }
378
FlushBuffers()379 void AcmReceiver::FlushBuffers() {
380 neteq_->FlushBuffers();
381 }
382
383 // If failed in removing one of the codecs, this method continues to remove as
384 // many as it can.
RemoveAllCodecs()385 int AcmReceiver::RemoveAllCodecs() {
386 int ret_val = 0;
387 CriticalSectionScoped lock(crit_sect_.get());
388 for (auto it = decoders_.begin(); it != decoders_.end(); ) {
389 auto cur = it;
390 ++it; // it will be valid even if we erase cur
391 if (neteq_->RemovePayloadType(cur->second.payload_type) == 0) {
392 decoders_.erase(cur);
393 } else {
394 LOG_F(LS_ERROR) << "Cannot remove payload "
395 << static_cast<int>(cur->second.payload_type);
396 ret_val = -1;
397 }
398 }
399
400 // No codec is registered, invalidate last audio decoder.
401 last_audio_decoder_ = nullptr;
402 last_packet_sample_rate_hz_ = rtc::Optional<int>();
403 return ret_val;
404 }
405
RemoveCodec(uint8_t payload_type)406 int AcmReceiver::RemoveCodec(uint8_t payload_type) {
407 CriticalSectionScoped lock(crit_sect_.get());
408 auto it = decoders_.find(payload_type);
409 if (it == decoders_.end()) { // Such a payload-type is not registered.
410 return 0;
411 }
412 if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
413 LOG(LERROR) << "AcmReceiver::RemoveCodec" << static_cast<int>(payload_type);
414 return -1;
415 }
416 if (last_audio_decoder_ == &it->second) {
417 last_audio_decoder_ = nullptr;
418 last_packet_sample_rate_hz_ = rtc::Optional<int>();
419 }
420 decoders_.erase(it);
421 return 0;
422 }
423
set_id(int id)424 void AcmReceiver::set_id(int id) {
425 CriticalSectionScoped lock(crit_sect_.get());
426 id_ = id;
427 }
428
GetPlayoutTimestamp(uint32_t * timestamp)429 bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) {
430 return neteq_->GetPlayoutTimestamp(timestamp);
431 }
432
LastAudioCodec(CodecInst * codec) const433 int AcmReceiver::LastAudioCodec(CodecInst* codec) const {
434 CriticalSectionScoped lock(crit_sect_.get());
435 if (!last_audio_decoder_) {
436 return -1;
437 }
438 *codec = *RentACodec::CodecInstById(
439 *RentACodec::CodecIdFromIndex(last_audio_decoder_->acm_codec_id));
440 codec->pltype = last_audio_decoder_->payload_type;
441 codec->channels = last_audio_decoder_->channels;
442 codec->plfreq = last_audio_decoder_->sample_rate_hz;
443 return 0;
444 }
445
GetNetworkStatistics(NetworkStatistics * acm_stat)446 void AcmReceiver::GetNetworkStatistics(NetworkStatistics* acm_stat) {
447 NetEqNetworkStatistics neteq_stat;
448 // NetEq function always returns zero, so we don't check the return value.
449 neteq_->NetworkStatistics(&neteq_stat);
450
451 acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms;
452 acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms;
453 acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false;
454 acm_stat->currentPacketLossRate = neteq_stat.packet_loss_rate;
455 acm_stat->currentDiscardRate = neteq_stat.packet_discard_rate;
456 acm_stat->currentExpandRate = neteq_stat.expand_rate;
457 acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate;
458 acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate;
459 acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate;
460 acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate;
461 acm_stat->clockDriftPPM = neteq_stat.clockdrift_ppm;
462 acm_stat->addedSamples = neteq_stat.added_zero_samples;
463 acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms;
464 acm_stat->medianWaitingTimeMs = neteq_stat.median_waiting_time_ms;
465 acm_stat->minWaitingTimeMs = neteq_stat.min_waiting_time_ms;
466 acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms;
467 }
468
DecoderByPayloadType(uint8_t payload_type,CodecInst * codec) const469 int AcmReceiver::DecoderByPayloadType(uint8_t payload_type,
470 CodecInst* codec) const {
471 CriticalSectionScoped lock(crit_sect_.get());
472 auto it = decoders_.find(payload_type);
473 if (it == decoders_.end()) {
474 LOG(LERROR) << "AcmReceiver::DecoderByPayloadType "
475 << static_cast<int>(payload_type);
476 return -1;
477 }
478 const Decoder& decoder = it->second;
479 *codec = *RentACodec::CodecInstById(
480 *RentACodec::CodecIdFromIndex(decoder.acm_codec_id));
481 codec->pltype = decoder.payload_type;
482 codec->channels = decoder.channels;
483 codec->plfreq = decoder.sample_rate_hz;
484 return 0;
485 }
486
EnableNack(size_t max_nack_list_size)487 int AcmReceiver::EnableNack(size_t max_nack_list_size) {
488 neteq_->EnableNack(max_nack_list_size);
489 return 0;
490 }
491
DisableNack()492 void AcmReceiver::DisableNack() {
493 neteq_->DisableNack();
494 }
495
GetNackList(int64_t round_trip_time_ms) const496 std::vector<uint16_t> AcmReceiver::GetNackList(
497 int64_t round_trip_time_ms) const {
498 return neteq_->GetNackList(round_trip_time_ms);
499 }
500
ResetInitialDelay()501 void AcmReceiver::ResetInitialDelay() {
502 neteq_->SetMinimumDelay(0);
503 // TODO(turajs): Should NetEq Buffer be flushed?
504 }
505
RtpHeaderToDecoder(const RTPHeader & rtp_header,uint8_t payload_type) const506 const AcmReceiver::Decoder* AcmReceiver::RtpHeaderToDecoder(
507 const RTPHeader& rtp_header,
508 uint8_t payload_type) const {
509 auto it = decoders_.find(rtp_header.payloadType);
510 const auto red_index =
511 RentACodec::CodecIndexFromId(RentACodec::CodecId::kRED);
512 if (red_index && // This ensures that RED is defined in WebRTC.
513 it != decoders_.end() && it->second.acm_codec_id == *red_index) {
514 // This is a RED packet, get the payload of the audio codec.
515 it = decoders_.find(payload_type & 0x7F);
516 }
517
518 // Check if the payload is registered.
519 return it != decoders_.end() ? &it->second : nullptr;
520 }
521
NowInTimestamp(int decoder_sampling_rate) const522 uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const {
523 // Down-cast the time to (32-6)-bit since we only care about
524 // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms.
525 // We masked 6 most significant bits of 32-bit so there is no overflow in
526 // the conversion from milliseconds to timestamp.
527 const uint32_t now_in_ms = static_cast<uint32_t>(
528 clock_->TimeInMilliseconds() & 0x03ffffff);
529 return static_cast<uint32_t>(
530 (decoder_sampling_rate / 1000) * now_in_ms);
531 }
532
GetDecodingCallStatistics(AudioDecodingCallStats * stats) const533 void AcmReceiver::GetDecodingCallStatistics(
534 AudioDecodingCallStats* stats) const {
535 CriticalSectionScoped lock(crit_sect_.get());
536 *stats = call_stats_.GetDecodingStatistics();
537 }
538
539 } // namespace acm2
540
541 } // namespace webrtc
542