1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "audio/audio_transport_impl.h"
12
13 #include <algorithm>
14 #include <memory>
15 #include <utility>
16
17 #include "audio/remix_resample.h"
18 #include "audio/utility/audio_frame_operations.h"
19 #include "call/audio_sender.h"
20 #include "modules/audio_processing/include/audio_frame_proxies.h"
21 #include "rtc_base/checks.h"
22
23 namespace webrtc {
24
25 namespace {
26
27 // We want to process at the lowest sample rate and channel count possible
28 // without losing information. Choose the lowest native rate at least equal to
29 // the minimum of input and codec rates, choose lowest channel count, and
30 // configure the audio frame.
InitializeCaptureFrame(int input_sample_rate,int send_sample_rate_hz,size_t input_num_channels,size_t send_num_channels,AudioFrame * audio_frame)31 void InitializeCaptureFrame(int input_sample_rate,
32 int send_sample_rate_hz,
33 size_t input_num_channels,
34 size_t send_num_channels,
35 AudioFrame* audio_frame) {
36 RTC_DCHECK(audio_frame);
37 int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz);
38 for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) {
39 audio_frame->sample_rate_hz_ = native_rate_hz;
40 if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) {
41 break;
42 }
43 }
44 audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels);
45 }
46
ProcessCaptureFrame(uint32_t delay_ms,bool key_pressed,bool swap_stereo_channels,AudioProcessing * audio_processing,AudioFrame * audio_frame)47 void ProcessCaptureFrame(uint32_t delay_ms,
48 bool key_pressed,
49 bool swap_stereo_channels,
50 AudioProcessing* audio_processing,
51 AudioFrame* audio_frame) {
52 RTC_DCHECK(audio_frame);
53 if (audio_processing) {
54 audio_processing->set_stream_delay_ms(delay_ms);
55 audio_processing->set_stream_key_pressed(key_pressed);
56 int error = ProcessAudioFrame(audio_processing, audio_frame);
57
58 RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error;
59 }
60
61 if (swap_stereo_channels) {
62 AudioFrameOperations::SwapStereoChannels(audio_frame);
63 }
64 }
65
66 // Resample audio in |frame| to given sample rate preserving the
67 // channel count and place the result in |destination|.
Resample(const AudioFrame & frame,const int destination_sample_rate,PushResampler<int16_t> * resampler,int16_t * destination)68 int Resample(const AudioFrame& frame,
69 const int destination_sample_rate,
70 PushResampler<int16_t>* resampler,
71 int16_t* destination) {
72 const int number_of_channels = static_cast<int>(frame.num_channels_);
73 const int target_number_of_samples_per_channel =
74 destination_sample_rate / 100;
75 resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
76 number_of_channels);
77
78 // TODO(yujo): make resampler take an AudioFrame, and add special case
79 // handling of muted frames.
80 return resampler->Resample(
81 frame.data(), frame.samples_per_channel_ * number_of_channels,
82 destination, number_of_channels * target_number_of_samples_per_channel);
83 }
84 } // namespace
85
AudioTransportImpl(AudioMixer * mixer,AudioProcessing * audio_processing)86 AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer,
87 AudioProcessing* audio_processing)
88 : audio_processing_(audio_processing), mixer_(mixer) {
89 RTC_DCHECK(mixer);
90 }
91
~AudioTransportImpl()92 AudioTransportImpl::~AudioTransportImpl() {}
93
94 // Not used in Chromium. Process captured audio and distribute to all sending
95 // streams, and try to do this at the lowest possible sample rate.
RecordedDataIsAvailable(const void * audio_data,const size_t number_of_frames,const size_t bytes_per_sample,const size_t number_of_channels,const uint32_t sample_rate,const uint32_t audio_delay_milliseconds,const int32_t,const uint32_t,const bool key_pressed,uint32_t &)96 int32_t AudioTransportImpl::RecordedDataIsAvailable(
97 const void* audio_data,
98 const size_t number_of_frames,
99 const size_t bytes_per_sample,
100 const size_t number_of_channels,
101 const uint32_t sample_rate,
102 const uint32_t audio_delay_milliseconds,
103 const int32_t /*clock_drift*/,
104 const uint32_t /*volume*/,
105 const bool key_pressed,
106 uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs
107 RTC_DCHECK(audio_data);
108 RTC_DCHECK_GE(number_of_channels, 1);
109 RTC_DCHECK_LE(number_of_channels, 2);
110 RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
111 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
112 // 100 = 1 second / data duration (10 ms).
113 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
114 RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
115 AudioFrame::kMaxDataSizeBytes);
116
117 int send_sample_rate_hz = 0;
118 size_t send_num_channels = 0;
119 bool swap_stereo_channels = false;
120 {
121 MutexLock lock(&capture_lock_);
122 send_sample_rate_hz = send_sample_rate_hz_;
123 send_num_channels = send_num_channels_;
124 swap_stereo_channels = swap_stereo_channels_;
125 }
126
127 std::unique_ptr<AudioFrame> audio_frame(new AudioFrame());
128 InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels,
129 send_num_channels, audio_frame.get());
130 voe::RemixAndResample(static_cast<const int16_t*>(audio_data),
131 number_of_frames, number_of_channels, sample_rate,
132 &capture_resampler_, audio_frame.get());
133 ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
134 swap_stereo_channels, audio_processing_,
135 audio_frame.get());
136
137 // Typing detection (utilizes the APM/VAD decision). We let the VAD determine
138 // if we're using this feature or not.
139 // TODO(solenberg): GetConfig() takes a lock. Work around that.
140 bool typing_detected = false;
141 if (audio_processing_ &&
142 audio_processing_->GetConfig().voice_detection.enabled) {
143 if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) {
144 bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive;
145 typing_detected = typing_detection_.Process(key_pressed, vad_active);
146 }
147 }
148
149 // Copy frame and push to each sending stream. The copy is required since an
150 // encoding task will be posted internally to each stream.
151 {
152 MutexLock lock(&capture_lock_);
153 typing_noise_detected_ = typing_detected;
154
155 RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
156 if (!audio_senders_.empty()) {
157 auto it = audio_senders_.begin();
158 while (++it != audio_senders_.end()) {
159 std::unique_ptr<AudioFrame> audio_frame_copy(new AudioFrame());
160 audio_frame_copy->CopyFrom(*audio_frame);
161 (*it)->SendAudioData(std::move(audio_frame_copy));
162 }
163 // Send the original frame to the first stream w/o copying.
164 (*audio_senders_.begin())->SendAudioData(std::move(audio_frame));
165 }
166 }
167
168 return 0;
169 }
170
171 // Mix all received streams, feed the result to the AudioProcessing module, then
172 // resample the result to the requested output rate.
NeedMorePlayData(const size_t nSamples,const size_t nBytesPerSample,const size_t nChannels,const uint32_t samplesPerSec,void * audioSamples,size_t & nSamplesOut,int64_t * elapsed_time_ms,int64_t * ntp_time_ms)173 int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
174 const size_t nBytesPerSample,
175 const size_t nChannels,
176 const uint32_t samplesPerSec,
177 void* audioSamples,
178 size_t& nSamplesOut,
179 int64_t* elapsed_time_ms,
180 int64_t* ntp_time_ms) {
181 RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
182 RTC_DCHECK_GE(nChannels, 1);
183 RTC_DCHECK_LE(nChannels, 2);
184 RTC_DCHECK_GE(
185 samplesPerSec,
186 static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz));
187
188 // 100 = 1 second / data duration (10 ms).
189 RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
190 RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
191 AudioFrame::kMaxDataSizeBytes);
192
193 mixer_->Mix(nChannels, &mixed_frame_);
194 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
195 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
196
197 if (audio_processing_) {
198 const auto error =
199 ProcessReverseAudioFrame(audio_processing_, &mixed_frame_);
200 RTC_DCHECK_EQ(error, AudioProcessing::kNoError);
201 }
202
203 nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
204 static_cast<int16_t*>(audioSamples));
205 RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples);
206 return 0;
207 }
208
209 // Used by Chromium - same as NeedMorePlayData() but because Chrome has its
210 // own APM instance, does not call audio_processing_->ProcessReverseStream().
PullRenderData(int bits_per_sample,int sample_rate,size_t number_of_channels,size_t number_of_frames,void * audio_data,int64_t * elapsed_time_ms,int64_t * ntp_time_ms)211 void AudioTransportImpl::PullRenderData(int bits_per_sample,
212 int sample_rate,
213 size_t number_of_channels,
214 size_t number_of_frames,
215 void* audio_data,
216 int64_t* elapsed_time_ms,
217 int64_t* ntp_time_ms) {
218 RTC_DCHECK_EQ(bits_per_sample, 16);
219 RTC_DCHECK_GE(number_of_channels, 1);
220 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
221
222 // 100 = 1 second / data duration (10 ms).
223 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
224
225 // 8 = bits per byte.
226 RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
227 AudioFrame::kMaxDataSizeBytes);
228 mixer_->Mix(number_of_channels, &mixed_frame_);
229 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
230 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
231
232 auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_,
233 static_cast<int16_t*>(audio_data));
234 RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames);
235 }
236
UpdateAudioSenders(std::vector<AudioSender * > senders,int send_sample_rate_hz,size_t send_num_channels)237 void AudioTransportImpl::UpdateAudioSenders(std::vector<AudioSender*> senders,
238 int send_sample_rate_hz,
239 size_t send_num_channels) {
240 MutexLock lock(&capture_lock_);
241 audio_senders_ = std::move(senders);
242 send_sample_rate_hz_ = send_sample_rate_hz;
243 send_num_channels_ = send_num_channels;
244 }
245
SetStereoChannelSwapping(bool enable)246 void AudioTransportImpl::SetStereoChannelSwapping(bool enable) {
247 MutexLock lock(&capture_lock_);
248 swap_stereo_channels_ = enable;
249 }
250
typing_noise_detected() const251 bool AudioTransportImpl::typing_noise_detected() const {
252 MutexLock lock(&capture_lock_);
253 return typing_noise_detected_;
254 }
255 } // namespace webrtc
256