1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_mixer/frame_combiner.h"
12
13 #include <cstdint>
14 #include <initializer_list>
15 #include <numeric>
16 #include <string>
17 #include <type_traits>
18 #include <vector>
19
20 #include "absl/types/optional.h"
21 #include "api/array_view.h"
22 #include "api/rtp_packet_info.h"
23 #include "api/rtp_packet_infos.h"
24 #include "api/units/timestamp.h"
25 #include "audio/utility/audio_frame_operations.h"
26 #include "modules/audio_mixer/gain_change_calculator.h"
27 #include "modules/audio_mixer/sine_wave_generator.h"
28 #include "rtc_base/checks.h"
29 #include "rtc_base/strings/string_builder.h"
30 #include "test/gmock.h"
31 #include "test/gtest.h"
32
33 namespace webrtc {
34
35 namespace {
36
37 using ::testing::ElementsAreArray;
38 using ::testing::IsEmpty;
39 using ::testing::UnorderedElementsAreArray;
40
41 using LimiterType = FrameCombiner::LimiterType;
42
43 struct FrameCombinerConfig {
44 bool use_limiter;
45 int sample_rate_hz;
46 int number_of_channels;
47 float wave_frequency;
48 };
49
ProduceDebugText(int sample_rate_hz,int number_of_channels,int number_of_sources)50 std::string ProduceDebugText(int sample_rate_hz,
51 int number_of_channels,
52 int number_of_sources) {
53 rtc::StringBuilder ss;
54 ss << "Sample rate: " << sample_rate_hz << " ,";
55 ss << "number of channels: " << number_of_channels << " ,";
56 ss << "number of sources: " << number_of_sources;
57 return ss.Release();
58 }
59
ProduceDebugText(const FrameCombinerConfig & config)60 std::string ProduceDebugText(const FrameCombinerConfig& config) {
61 rtc::StringBuilder ss;
62 ss << "Sample rate: " << config.sample_rate_hz << " ,";
63 ss << "number of channels: " << config.number_of_channels << " ,";
64 ss << "limiter active: " << (config.use_limiter ? "on" : "off") << " ,";
65 ss << "wave frequency: " << config.wave_frequency << " ,";
66 return ss.Release();
67 }
68
69 AudioFrame frame1;
70 AudioFrame frame2;
71
SetUpFrames(int sample_rate_hz,int number_of_channels)72 void SetUpFrames(int sample_rate_hz, int number_of_channels) {
73 RtpPacketInfo packet_info1(/*ssrc=*/1001, /*csrcs=*/{},
74 /*rtp_timestamp=*/1000,
75 /*receive_time=*/Timestamp::Millis(1));
76 RtpPacketInfo packet_info2(/*ssrc=*/4004, /*csrcs=*/{},
77 /*rtp_timestamp=*/1234,
78 /*receive_time=*/Timestamp::Millis(2));
79 RtpPacketInfo packet_info3(/*ssrc=*/7007, /*csrcs=*/{},
80 /*rtp_timestamp=*/1333,
81 /*receive_time=*/Timestamp::Millis(2));
82
83 frame1.packet_infos_ = RtpPacketInfos({packet_info1});
84 frame2.packet_infos_ = RtpPacketInfos({packet_info2, packet_info3});
85
86 for (auto* frame : {&frame1, &frame2}) {
87 frame->UpdateFrame(0, nullptr, rtc::CheckedDivExact(sample_rate_hz, 100),
88 sample_rate_hz, AudioFrame::kNormalSpeech,
89 AudioFrame::kVadActive, number_of_channels);
90 }
91 }
92 } // namespace
93
94 // The limiter requires sample rate divisible by 2000.
TEST(FrameCombiner,BasicApiCallsLimiter)95 TEST(FrameCombiner, BasicApiCallsLimiter) {
96 FrameCombiner combiner(true);
97 for (const int rate : {8000, 18000, 34000, 48000}) {
98 for (const int number_of_channels : {1, 2, 4, 8}) {
99 const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
100 SetUpFrames(rate, number_of_channels);
101
102 for (const int number_of_frames : {0, 1, 2}) {
103 SCOPED_TRACE(
104 ProduceDebugText(rate, number_of_channels, number_of_frames));
105 const std::vector<AudioFrame*> frames_to_combine(
106 all_frames.begin(), all_frames.begin() + number_of_frames);
107 AudioFrame audio_frame_for_mixing;
108 combiner.Combine(frames_to_combine, number_of_channels, rate,
109 frames_to_combine.size(), &audio_frame_for_mixing);
110 }
111 }
112 }
113 }
114
115 // The RtpPacketInfos field of the mixed packet should contain the union of the
116 // RtpPacketInfos from the frames that were actually mixed.
TEST(FrameCombiner,ContainsAllRtpPacketInfos)117 TEST(FrameCombiner, ContainsAllRtpPacketInfos) {
118 static constexpr int kSampleRateHz = 48000;
119 static constexpr int kNumChannels = 1;
120 FrameCombiner combiner(true);
121 const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
122 SetUpFrames(kSampleRateHz, kNumChannels);
123
124 for (const int number_of_frames : {0, 1, 2}) {
125 SCOPED_TRACE(
126 ProduceDebugText(kSampleRateHz, kNumChannels, number_of_frames));
127 const std::vector<AudioFrame*> frames_to_combine(
128 all_frames.begin(), all_frames.begin() + number_of_frames);
129
130 std::vector<RtpPacketInfo> packet_infos;
131 for (const auto& frame : frames_to_combine) {
132 packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(),
133 frame->packet_infos_.end());
134 }
135
136 AudioFrame audio_frame_for_mixing;
137 combiner.Combine(frames_to_combine, kNumChannels, kSampleRateHz,
138 frames_to_combine.size(), &audio_frame_for_mixing);
139 EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
140 UnorderedElementsAreArray(packet_infos));
141 }
142 }
143
144 // There are DCHECKs in place to check for invalid parameters.
TEST(FrameCombinerDeathTest,DebugBuildCrashesWithManyChannels)145 TEST(FrameCombinerDeathTest, DebugBuildCrashesWithManyChannels) {
146 FrameCombiner combiner(true);
147 for (const int rate : {8000, 18000, 34000, 48000}) {
148 for (const int number_of_channels : {10, 20, 21}) {
149 if (static_cast<size_t>(rate / 100 * number_of_channels) >
150 AudioFrame::kMaxDataSizeSamples) {
151 continue;
152 }
153 const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
154 SetUpFrames(rate, number_of_channels);
155
156 const int number_of_frames = 2;
157 SCOPED_TRACE(
158 ProduceDebugText(rate, number_of_channels, number_of_frames));
159 const std::vector<AudioFrame*> frames_to_combine(
160 all_frames.begin(), all_frames.begin() + number_of_frames);
161 AudioFrame audio_frame_for_mixing;
162 #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
163 EXPECT_DEATH(
164 combiner.Combine(frames_to_combine, number_of_channels, rate,
165 frames_to_combine.size(), &audio_frame_for_mixing),
166 "");
167 #elif !RTC_DCHECK_IS_ON
168 combiner.Combine(frames_to_combine, number_of_channels, rate,
169 frames_to_combine.size(), &audio_frame_for_mixing);
170 #endif
171 }
172 }
173 }
174
TEST(FrameCombinerDeathTest,DebugBuildCrashesWithHighRate)175 TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) {
176 FrameCombiner combiner(true);
177 for (const int rate : {50000, 96000, 128000, 196000}) {
178 for (const int number_of_channels : {1, 2, 3}) {
179 if (static_cast<size_t>(rate / 100 * number_of_channels) >
180 AudioFrame::kMaxDataSizeSamples) {
181 continue;
182 }
183 const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
184 SetUpFrames(rate, number_of_channels);
185
186 const int number_of_frames = 2;
187 SCOPED_TRACE(
188 ProduceDebugText(rate, number_of_channels, number_of_frames));
189 const std::vector<AudioFrame*> frames_to_combine(
190 all_frames.begin(), all_frames.begin() + number_of_frames);
191 AudioFrame audio_frame_for_mixing;
192 #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
193 EXPECT_DEATH(
194 combiner.Combine(frames_to_combine, number_of_channels, rate,
195 frames_to_combine.size(), &audio_frame_for_mixing),
196 "");
197 #elif !RTC_DCHECK_IS_ON
198 combiner.Combine(frames_to_combine, number_of_channels, rate,
199 frames_to_combine.size(), &audio_frame_for_mixing);
200 #endif
201 }
202 }
203 }
204
205 // With no limiter, the rate has to be divisible by 100 since we use
206 // 10 ms frames.
TEST(FrameCombiner,BasicApiCallsNoLimiter)207 TEST(FrameCombiner, BasicApiCallsNoLimiter) {
208 FrameCombiner combiner(false);
209 for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
210 for (const int number_of_channels : {1, 2, 4, 8}) {
211 const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
212 SetUpFrames(rate, number_of_channels);
213
214 for (const int number_of_frames : {0, 1, 2}) {
215 SCOPED_TRACE(
216 ProduceDebugText(rate, number_of_channels, number_of_frames));
217 const std::vector<AudioFrame*> frames_to_combine(
218 all_frames.begin(), all_frames.begin() + number_of_frames);
219 AudioFrame audio_frame_for_mixing;
220 combiner.Combine(frames_to_combine, number_of_channels, rate,
221 frames_to_combine.size(), &audio_frame_for_mixing);
222 }
223 }
224 }
225 }
226
TEST(FrameCombiner,CombiningZeroFramesShouldProduceSilence)227 TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
228 FrameCombiner combiner(false);
229 for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
230 for (const int number_of_channels : {1, 2}) {
231 SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0));
232
233 AudioFrame audio_frame_for_mixing;
234
235 const std::vector<AudioFrame*> frames_to_combine;
236 combiner.Combine(frames_to_combine, number_of_channels, rate,
237 frames_to_combine.size(), &audio_frame_for_mixing);
238 const int16_t* audio_frame_for_mixing_data =
239 audio_frame_for_mixing.data();
240 const std::vector<int16_t> mixed_data(
241 audio_frame_for_mixing_data,
242 audio_frame_for_mixing_data + number_of_channels * rate / 100);
243
244 const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
245 EXPECT_EQ(mixed_data, expected);
246 EXPECT_THAT(audio_frame_for_mixing.packet_infos_, IsEmpty());
247 }
248 }
249 }
250
TEST(FrameCombiner,CombiningOneFrameShouldNotChangeFrame)251 TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
252 FrameCombiner combiner(false);
253 for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
254 for (const int number_of_channels : {1, 2, 4, 8, 10}) {
255 SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
256
257 AudioFrame audio_frame_for_mixing;
258
259 SetUpFrames(rate, number_of_channels);
260 int16_t* frame1_data = frame1.mutable_data();
261 std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
262 const std::vector<AudioFrame*> frames_to_combine = {&frame1};
263 combiner.Combine(frames_to_combine, number_of_channels, rate,
264 frames_to_combine.size(), &audio_frame_for_mixing);
265
266 const int16_t* audio_frame_for_mixing_data =
267 audio_frame_for_mixing.data();
268 const std::vector<int16_t> mixed_data(
269 audio_frame_for_mixing_data,
270 audio_frame_for_mixing_data + number_of_channels * rate / 100);
271
272 std::vector<int16_t> expected(number_of_channels * rate / 100);
273 std::iota(expected.begin(), expected.end(), 0);
274 EXPECT_EQ(mixed_data, expected);
275 EXPECT_THAT(audio_frame_for_mixing.packet_infos_,
276 ElementsAreArray(frame1.packet_infos_));
277 }
278 }
279 }
280
281 // Send a sine wave through the FrameCombiner, and check that the
282 // difference between input and output varies smoothly. Also check
283 // that it is inside reasonable bounds. This is to catch issues like
284 // chromium:695993 and chromium:816875.
TEST(FrameCombiner,GainCurveIsSmoothForAlternatingNumberOfStreams)285 TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
286 // Rates are divisible by 2000 when limiter is active.
287 std::vector<FrameCombinerConfig> configs = {
288 {false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f},
289 {true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f},
290 {true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f},
291 };
292
293 for (const auto& config : configs) {
294 SCOPED_TRACE(ProduceDebugText(config));
295
296 FrameCombiner combiner(config.use_limiter);
297
298 constexpr int16_t wave_amplitude = 30000;
299 SineWaveGenerator wave_generator(config.wave_frequency, wave_amplitude);
300
301 GainChangeCalculator change_calculator;
302 float cumulative_change = 0.f;
303
304 constexpr size_t iterations = 100;
305
306 for (size_t i = 0; i < iterations; ++i) {
307 SetUpFrames(config.sample_rate_hz, config.number_of_channels);
308 wave_generator.GenerateNextFrame(&frame1);
309 AudioFrameOperations::Mute(&frame2);
310
311 std::vector<AudioFrame*> frames_to_combine = {&frame1};
312 if (i % 2 == 0) {
313 frames_to_combine.push_back(&frame2);
314 }
315 const size_t number_of_samples =
316 frame1.samples_per_channel_ * config.number_of_channels;
317
318 // Ensures limiter is on if 'use_limiter'.
319 constexpr size_t number_of_streams = 2;
320 AudioFrame audio_frame_for_mixing;
321 combiner.Combine(frames_to_combine, config.number_of_channels,
322 config.sample_rate_hz, number_of_streams,
323 &audio_frame_for_mixing);
324 cumulative_change += change_calculator.CalculateGainChange(
325 rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),
326 rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),
327 number_of_samples));
328 }
329
330 // Check that the gain doesn't vary too much.
331 EXPECT_LT(cumulative_change, 10);
332
333 // Check that the latest gain is within reasonable bounds. It
334 // should be slightly less that 1.
335 EXPECT_LT(0.9f, change_calculator.LatestGain());
336 EXPECT_LT(change_calculator.LatestGain(), 1.01f);
337 }
338 }
339 } // namespace webrtc
340