1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <stdio.h>
13 #include <algorithm>
14 #include <limits>
15 #include <queue>
16
17 #include "webrtc/common_audio/include/audio_util.h"
18 #include "webrtc/common_audio/resampler/include/push_resampler.h"
19 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
20 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
21 #include "webrtc/modules/audio_processing/include/audio_processing.h"
22 #include "webrtc/modules/audio_processing/test/test_utils.h"
23 #include "webrtc/modules/interface/module_common_types.h"
24 #include "webrtc/system_wrappers/interface/event_wrapper.h"
25 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
26 #include "webrtc/system_wrappers/interface/trace.h"
27 #include "webrtc/test/testsupport/fileutils.h"
28 #include "webrtc/test/testsupport/gtest_disable.h"
29 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
30 #include "gtest/gtest.h"
31 #include "external/webrtc/webrtc/modules/audio_processing/test/unittest.pb.h"
32 #else
33 #include "testing/gtest/include/gtest/gtest.h"
34 #include "webrtc/audio_processing/unittest.pb.h"
35 #endif
36
37 #if (defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)) || \
38 (defined(WEBRTC_LINUX) && defined(WEBRTC_ARCH_X86_64) && !defined(NDEBUG))
39 # define WEBRTC_AUDIOPROC_BIT_EXACT
40 #endif
41
42 namespace webrtc {
43 namespace {
44
45 // TODO(bjornv): This is not feasible until the functionality has been
46 // re-implemented; see comment at the bottom of this file.
47 // When false, this will compare the output data with the results stored to
48 // file. This is the typical case. When the file should be updated, it can
49 // be set to true with the command-line switch --write_ref_data.
50 #ifdef WEBRTC_AUDIOPROC_BIT_EXACT
51 bool write_ref_data = false;
52 const int kChannels[] = {1, 2};
53 const size_t kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
54 #endif
55
56 const int kSampleRates[] = {8000, 16000, 32000};
57 const size_t kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
58
59 #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
60 // AECM doesn't support super-wb.
61 const int kProcessSampleRates[] = {8000, 16000};
62 #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
63 const int kProcessSampleRates[] = {8000, 16000, 32000};
64 #endif
65 const size_t kProcessSampleRatesSize = sizeof(kProcessSampleRates) /
66 sizeof(*kProcessSampleRates);
67
ConvertToFloat(const int16_t * int_data,ChannelBuffer<float> * cb)68 void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
69 ChannelBuffer<int16_t> cb_int(cb->samples_per_channel(),
70 cb->num_channels());
71 Deinterleave(int_data,
72 cb->samples_per_channel(),
73 cb->num_channels(),
74 cb_int.channels());
75 ScaleToFloat(cb_int.data(),
76 cb->samples_per_channel() * cb->num_channels(),
77 cb->data());
78 }
79
ConvertToFloat(const AudioFrame & frame,ChannelBuffer<float> * cb)80 void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
81 ConvertToFloat(frame.data_, cb);
82 }
83
84 // Number of channels including the keyboard channel.
TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout)85 int TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
86 switch (layout) {
87 case AudioProcessing::kMono:
88 return 1;
89 case AudioProcessing::kMonoAndKeyboard:
90 case AudioProcessing::kStereo:
91 return 2;
92 case AudioProcessing::kStereoAndKeyboard:
93 return 3;
94 }
95 assert(false);
96 return -1;
97 }
98
TruncateToMultipleOf10(int value)99 int TruncateToMultipleOf10(int value) {
100 return (value / 10) * 10;
101 }
102
MixStereoToMono(const float * stereo,float * mono,int samples_per_channel)103 void MixStereoToMono(const float* stereo, float* mono,
104 int samples_per_channel) {
105 for (int i = 0; i < samples_per_channel; ++i) {
106 mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2;
107 }
108 }
109
MixStereoToMono(const int16_t * stereo,int16_t * mono,int samples_per_channel)110 void MixStereoToMono(const int16_t* stereo, int16_t* mono,
111 int samples_per_channel) {
112 for (int i = 0; i < samples_per_channel; i++)
113 mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1;
114 }
115
CopyLeftToRightChannel(int16_t * stereo,int samples_per_channel)116 void CopyLeftToRightChannel(int16_t* stereo, int samples_per_channel) {
117 for (int i = 0; i < samples_per_channel; i++) {
118 stereo[i * 2 + 1] = stereo[i * 2];
119 }
120 }
121
VerifyChannelsAreEqual(int16_t * stereo,int samples_per_channel)122 void VerifyChannelsAreEqual(int16_t* stereo, int samples_per_channel) {
123 for (int i = 0; i < samples_per_channel; i++) {
124 EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]);
125 }
126 }
127
SetFrameTo(AudioFrame * frame,int16_t value)128 void SetFrameTo(AudioFrame* frame, int16_t value) {
129 for (int i = 0; i < frame->samples_per_channel_ * frame->num_channels_; ++i) {
130 frame->data_[i] = value;
131 }
132 }
133
SetFrameTo(AudioFrame * frame,int16_t left,int16_t right)134 void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) {
135 ASSERT_EQ(2, frame->num_channels_);
136 for (int i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
137 frame->data_[i] = left;
138 frame->data_[i + 1] = right;
139 }
140 }
141
ScaleFrame(AudioFrame * frame,float scale)142 void ScaleFrame(AudioFrame* frame, float scale) {
143 for (int i = 0; i < frame->samples_per_channel_ * frame->num_channels_; ++i) {
144 frame->data_[i] = RoundToInt16(frame->data_[i] * scale);
145 }
146 }
147
FrameDataAreEqual(const AudioFrame & frame1,const AudioFrame & frame2)148 bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
149 if (frame1.samples_per_channel_ != frame2.samples_per_channel_) {
150 return false;
151 }
152 if (frame1.num_channels_ != frame2.num_channels_) {
153 return false;
154 }
155 if (memcmp(frame1.data_, frame2.data_,
156 frame1.samples_per_channel_ * frame1.num_channels_ *
157 sizeof(int16_t))) {
158 return false;
159 }
160 return true;
161 }
162
EnableAllAPComponents(AudioProcessing * ap)163 void EnableAllAPComponents(AudioProcessing* ap) {
164 #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
165 EXPECT_NOERR(ap->echo_control_mobile()->Enable(true));
166
167 EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveDigital));
168 EXPECT_NOERR(ap->gain_control()->Enable(true));
169 #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
170 EXPECT_NOERR(ap->echo_cancellation()->enable_drift_compensation(true));
171 EXPECT_NOERR(ap->echo_cancellation()->enable_metrics(true));
172 EXPECT_NOERR(ap->echo_cancellation()->enable_delay_logging(true));
173 EXPECT_NOERR(ap->echo_cancellation()->Enable(true));
174
175 EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
176 EXPECT_NOERR(ap->gain_control()->set_analog_level_limits(0, 255));
177 EXPECT_NOERR(ap->gain_control()->Enable(true));
178 #endif
179
180 EXPECT_NOERR(ap->high_pass_filter()->Enable(true));
181 EXPECT_NOERR(ap->level_estimator()->Enable(true));
182 EXPECT_NOERR(ap->noise_suppression()->Enable(true));
183
184 EXPECT_NOERR(ap->voice_detection()->Enable(true));
185 }
186
187 #ifdef WEBRTC_AUDIOPROC_BIT_EXACT
188 // These functions are only used by the bit-exact test.
189 template <class T>
AbsValue(T a)190 T AbsValue(T a) {
191 return a > 0 ? a: -a;
192 }
193
MaxAudioFrame(const AudioFrame & frame)194 int16_t MaxAudioFrame(const AudioFrame& frame) {
195 const int length = frame.samples_per_channel_ * frame.num_channels_;
196 int16_t max_data = AbsValue(frame.data_[0]);
197 for (int i = 1; i < length; i++) {
198 max_data = std::max(max_data, AbsValue(frame.data_[i]));
199 }
200
201 return max_data;
202 }
203
204 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
TestStats(const AudioProcessing::Statistic & test,const audioproc::Test::Statistic & reference)205 void TestStats(const AudioProcessing::Statistic& test,
206 const audioproc::Test::Statistic& reference) {
207 EXPECT_EQ(reference.instant(), test.instant);
208 EXPECT_EQ(reference.average(), test.average);
209 EXPECT_EQ(reference.maximum(), test.maximum);
210 EXPECT_EQ(reference.minimum(), test.minimum);
211 }
212
WriteStatsMessage(const AudioProcessing::Statistic & output,audioproc::Test::Statistic * msg)213 void WriteStatsMessage(const AudioProcessing::Statistic& output,
214 audioproc::Test::Statistic* msg) {
215 msg->set_instant(output.instant);
216 msg->set_average(output.average);
217 msg->set_maximum(output.maximum);
218 msg->set_minimum(output.minimum);
219 }
220 #endif
221
OpenFileAndWriteMessage(const std::string filename,const::google::protobuf::MessageLite & msg)222 void OpenFileAndWriteMessage(const std::string filename,
223 const ::google::protobuf::MessageLite& msg) {
224 FILE* file = fopen(filename.c_str(), "wb");
225 ASSERT_TRUE(file != NULL);
226
227 int32_t size = msg.ByteSize();
228 ASSERT_GT(size, 0);
229 scoped_ptr<uint8_t[]> array(new uint8_t[size]);
230 ASSERT_TRUE(msg.SerializeToArray(array.get(), size));
231
232 ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file));
233 ASSERT_EQ(static_cast<size_t>(size),
234 fwrite(array.get(), sizeof(array[0]), size, file));
235 fclose(file);
236 }
237 #endif // WEBRTC_AUDIOPROC_BIT_EXACT
238
ResourceFilePath(std::string name,int sample_rate_hz)239 std::string ResourceFilePath(std::string name, int sample_rate_hz) {
240 std::ostringstream ss;
241 // Resource files are all stereo.
242 ss << name << sample_rate_hz / 1000 << "_stereo";
243 return test::ResourcePath(ss.str(), "pcm");
244 }
245
OutputFilePath(std::string name,int input_rate,int output_rate,int reverse_rate,int num_input_channels,int num_output_channels,int num_reverse_channels)246 std::string OutputFilePath(std::string name,
247 int input_rate,
248 int output_rate,
249 int reverse_rate,
250 int num_input_channels,
251 int num_output_channels,
252 int num_reverse_channels) {
253 std::ostringstream ss;
254 ss << name << "_i" << num_input_channels << "_" << input_rate / 1000
255 << "_r" << num_reverse_channels << "_" << reverse_rate / 1000 << "_";
256 if (num_output_channels == 1) {
257 ss << "mono";
258 } else if (num_output_channels == 2) {
259 ss << "stereo";
260 } else {
261 assert(false);
262 }
263 ss << output_rate / 1000 << ".pcm";
264
265 return test::OutputPath() + ss.str();
266 }
267
OpenFileAndReadMessage(const std::string filename,::google::protobuf::MessageLite * msg)268 void OpenFileAndReadMessage(const std::string filename,
269 ::google::protobuf::MessageLite* msg) {
270 FILE* file = fopen(filename.c_str(), "rb");
271 ASSERT_TRUE(file != NULL);
272 ReadMessageFromFile(file, msg);
273 fclose(file);
274 }
275
276 class ApmTest : public ::testing::Test {
277 protected:
278 ApmTest();
279 virtual void SetUp();
280 virtual void TearDown();
281
SetUpTestCase()282 static void SetUpTestCase() {
283 Trace::CreateTrace();
284 std::string trace_filename = test::OutputPath() + "audioproc_trace.txt";
285 ASSERT_EQ(0, Trace::SetTraceFile(trace_filename.c_str()));
286 }
287
TearDownTestCase()288 static void TearDownTestCase() {
289 Trace::ReturnTrace();
290 }
291
292 // Used to select between int and float interface tests.
293 enum Format {
294 kIntFormat,
295 kFloatFormat
296 };
297
298 void Init(int sample_rate_hz,
299 int output_sample_rate_hz,
300 int reverse_sample_rate_hz,
301 int num_reverse_channels,
302 int num_input_channels,
303 int num_output_channels,
304 bool open_output_file);
305 void Init(AudioProcessing* ap);
306 void EnableAllComponents();
307 bool ReadFrame(FILE* file, AudioFrame* frame);
308 bool ReadFrame(FILE* file, AudioFrame* frame, ChannelBuffer<float>* cb);
309 void ReadFrameWithRewind(FILE* file, AudioFrame* frame);
310 void ReadFrameWithRewind(FILE* file, AudioFrame* frame,
311 ChannelBuffer<float>* cb);
312 void ProcessWithDefaultStreamParameters(AudioFrame* frame);
313 void ProcessDelayVerificationTest(int delay_ms, int system_delay_ms,
314 int delay_min, int delay_max);
315 void TestChangingChannels(int num_channels,
316 AudioProcessing::Error expected_return);
317 void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate);
318 void RunManualVolumeChangeIsPossibleTest(int sample_rate);
319 void StreamParametersTest(Format format);
320 int ProcessStreamChooser(Format format);
321 int AnalyzeReverseStreamChooser(Format format);
322 void ProcessDebugDump(const std::string& in_filename,
323 const std::string& out_filename,
324 Format format);
325 void VerifyDebugDumpTest(Format format);
326
327 const std::string output_path_;
328 const std::string ref_path_;
329 const std::string ref_filename_;
330 scoped_ptr<AudioProcessing> apm_;
331 AudioFrame* frame_;
332 AudioFrame* revframe_;
333 scoped_ptr<ChannelBuffer<float> > float_cb_;
334 scoped_ptr<ChannelBuffer<float> > revfloat_cb_;
335 int output_sample_rate_hz_;
336 int num_output_channels_;
337 FILE* far_file_;
338 FILE* near_file_;
339 FILE* out_file_;
340 };
341
ApmTest()342 ApmTest::ApmTest()
343 : output_path_(test::OutputPath()),
344 ref_path_(test::ProjectRootPath() + "data/audio_processing/"),
345 #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
346 ref_filename_(ref_path_ + "output_data_fixed.pb"),
347 #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
348 ref_filename_(ref_path_ + "output_data_float.pb"),
349 #endif
350 frame_(NULL),
351 revframe_(NULL),
352 output_sample_rate_hz_(0),
353 num_output_channels_(0),
354 far_file_(NULL),
355 near_file_(NULL),
356 out_file_(NULL) {
357 Config config;
358 config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
359 apm_.reset(AudioProcessing::Create(config));
360 }
361
SetUp()362 void ApmTest::SetUp() {
363 ASSERT_TRUE(apm_.get() != NULL);
364
365 frame_ = new AudioFrame();
366 revframe_ = new AudioFrame();
367
368 #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
369 Init(16000, 16000, 16000, 2, 2, 2, false);
370 #else
371 Init(32000, 32000, 32000, 2, 2, 2, false);
372 #endif
373 }
374
TearDown()375 void ApmTest::TearDown() {
376 if (frame_) {
377 delete frame_;
378 }
379 frame_ = NULL;
380
381 if (revframe_) {
382 delete revframe_;
383 }
384 revframe_ = NULL;
385
386 if (far_file_) {
387 ASSERT_EQ(0, fclose(far_file_));
388 }
389 far_file_ = NULL;
390
391 if (near_file_) {
392 ASSERT_EQ(0, fclose(near_file_));
393 }
394 near_file_ = NULL;
395
396 if (out_file_) {
397 ASSERT_EQ(0, fclose(out_file_));
398 }
399 out_file_ = NULL;
400 }
401
Init(AudioProcessing * ap)402 void ApmTest::Init(AudioProcessing* ap) {
403 ASSERT_EQ(kNoErr,
404 ap->Initialize(frame_->sample_rate_hz_,
405 output_sample_rate_hz_,
406 revframe_->sample_rate_hz_,
407 LayoutFromChannels(frame_->num_channels_),
408 LayoutFromChannels(num_output_channels_),
409 LayoutFromChannels(revframe_->num_channels_)));
410 }
411
Init(int sample_rate_hz,int output_sample_rate_hz,int reverse_sample_rate_hz,int num_input_channels,int num_output_channels,int num_reverse_channels,bool open_output_file)412 void ApmTest::Init(int sample_rate_hz,
413 int output_sample_rate_hz,
414 int reverse_sample_rate_hz,
415 int num_input_channels,
416 int num_output_channels,
417 int num_reverse_channels,
418 bool open_output_file) {
419 SetContainerFormat(sample_rate_hz, num_input_channels, frame_, &float_cb_);
420 output_sample_rate_hz_ = output_sample_rate_hz;
421 num_output_channels_ = num_output_channels;
422
423 SetContainerFormat(reverse_sample_rate_hz, num_reverse_channels, revframe_,
424 &revfloat_cb_);
425 Init(apm_.get());
426
427 if (far_file_) {
428 ASSERT_EQ(0, fclose(far_file_));
429 }
430 std::string filename = ResourceFilePath("far", sample_rate_hz);
431 far_file_ = fopen(filename.c_str(), "rb");
432 ASSERT_TRUE(far_file_ != NULL) << "Could not open file " <<
433 filename << "\n";
434
435 if (near_file_) {
436 ASSERT_EQ(0, fclose(near_file_));
437 }
438 filename = ResourceFilePath("near", sample_rate_hz);
439 near_file_ = fopen(filename.c_str(), "rb");
440 ASSERT_TRUE(near_file_ != NULL) << "Could not open file " <<
441 filename << "\n";
442
443 if (open_output_file) {
444 if (out_file_) {
445 ASSERT_EQ(0, fclose(out_file_));
446 }
447 filename = OutputFilePath("out",
448 sample_rate_hz,
449 output_sample_rate_hz,
450 reverse_sample_rate_hz,
451 num_input_channels,
452 num_output_channels,
453 num_reverse_channels);
454 out_file_ = fopen(filename.c_str(), "wb");
455 ASSERT_TRUE(out_file_ != NULL) << "Could not open file " <<
456 filename << "\n";
457 }
458 }
459
EnableAllComponents()460 void ApmTest::EnableAllComponents() {
461 EnableAllAPComponents(apm_.get());
462 }
463
ReadFrame(FILE * file,AudioFrame * frame,ChannelBuffer<float> * cb)464 bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame,
465 ChannelBuffer<float>* cb) {
466 // The files always contain stereo audio.
467 size_t frame_size = frame->samples_per_channel_ * 2;
468 size_t read_count = fread(frame->data_,
469 sizeof(int16_t),
470 frame_size,
471 file);
472 if (read_count != frame_size) {
473 // Check that the file really ended.
474 EXPECT_NE(0, feof(file));
475 return false; // This is expected.
476 }
477
478 if (frame->num_channels_ == 1) {
479 MixStereoToMono(frame->data_, frame->data_,
480 frame->samples_per_channel_);
481 }
482
483 if (cb) {
484 ConvertToFloat(*frame, cb);
485 }
486 return true;
487 }
488
ReadFrame(FILE * file,AudioFrame * frame)489 bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame) {
490 return ReadFrame(file, frame, NULL);
491 }
492
493 // If the end of the file has been reached, rewind it and attempt to read the
494 // frame again.
ReadFrameWithRewind(FILE * file,AudioFrame * frame,ChannelBuffer<float> * cb)495 void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame,
496 ChannelBuffer<float>* cb) {
497 if (!ReadFrame(near_file_, frame_, cb)) {
498 rewind(near_file_);
499 ASSERT_TRUE(ReadFrame(near_file_, frame_, cb));
500 }
501 }
502
ReadFrameWithRewind(FILE * file,AudioFrame * frame)503 void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame) {
504 ReadFrameWithRewind(file, frame, NULL);
505 }
506
ProcessWithDefaultStreamParameters(AudioFrame * frame)507 void ApmTest::ProcessWithDefaultStreamParameters(AudioFrame* frame) {
508 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
509 apm_->echo_cancellation()->set_stream_drift_samples(0);
510 EXPECT_EQ(apm_->kNoError,
511 apm_->gain_control()->set_stream_analog_level(127));
512 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame));
513 }
514
ProcessStreamChooser(Format format)515 int ApmTest::ProcessStreamChooser(Format format) {
516 if (format == kIntFormat) {
517 return apm_->ProcessStream(frame_);
518 }
519 return apm_->ProcessStream(float_cb_->channels(),
520 frame_->samples_per_channel_,
521 frame_->sample_rate_hz_,
522 LayoutFromChannels(frame_->num_channels_),
523 output_sample_rate_hz_,
524 LayoutFromChannels(num_output_channels_),
525 float_cb_->channels());
526 }
527
AnalyzeReverseStreamChooser(Format format)528 int ApmTest::AnalyzeReverseStreamChooser(Format format) {
529 if (format == kIntFormat) {
530 return apm_->AnalyzeReverseStream(revframe_);
531 }
532 return apm_->AnalyzeReverseStream(
533 revfloat_cb_->channels(),
534 revframe_->samples_per_channel_,
535 revframe_->sample_rate_hz_,
536 LayoutFromChannels(revframe_->num_channels_));
537 }
538
ProcessDelayVerificationTest(int delay_ms,int system_delay_ms,int delay_min,int delay_max)539 void ApmTest::ProcessDelayVerificationTest(int delay_ms, int system_delay_ms,
540 int delay_min, int delay_max) {
541 // The |revframe_| and |frame_| should include the proper frame information,
542 // hence can be used for extracting information.
543 AudioFrame tmp_frame;
544 std::queue<AudioFrame*> frame_queue;
545 bool causal = true;
546
547 tmp_frame.CopyFrom(*revframe_);
548 SetFrameTo(&tmp_frame, 0);
549
550 EXPECT_EQ(apm_->kNoError, apm_->Initialize());
551 // Initialize the |frame_queue| with empty frames.
552 int frame_delay = delay_ms / 10;
553 while (frame_delay < 0) {
554 AudioFrame* frame = new AudioFrame();
555 frame->CopyFrom(tmp_frame);
556 frame_queue.push(frame);
557 frame_delay++;
558 causal = false;
559 }
560 while (frame_delay > 0) {
561 AudioFrame* frame = new AudioFrame();
562 frame->CopyFrom(tmp_frame);
563 frame_queue.push(frame);
564 frame_delay--;
565 }
566 // Run for 4.5 seconds, skipping statistics from the first 2.5 seconds. We
567 // need enough frames with audio to have reliable estimates, but as few as
568 // possible to keep processing time down. 4.5 seconds seemed to be a good
569 // compromise for this recording.
570 for (int frame_count = 0; frame_count < 450; ++frame_count) {
571 AudioFrame* frame = new AudioFrame();
572 frame->CopyFrom(tmp_frame);
573 // Use the near end recording, since that has more speech in it.
574 ASSERT_TRUE(ReadFrame(near_file_, frame));
575 frame_queue.push(frame);
576 AudioFrame* reverse_frame = frame;
577 AudioFrame* process_frame = frame_queue.front();
578 if (!causal) {
579 reverse_frame = frame_queue.front();
580 // When we call ProcessStream() the frame is modified, so we can't use the
581 // pointer directly when things are non-causal. Use an intermediate frame
582 // and copy the data.
583 process_frame = &tmp_frame;
584 process_frame->CopyFrom(*frame);
585 }
586 EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(reverse_frame));
587 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(system_delay_ms));
588 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(process_frame));
589 frame = frame_queue.front();
590 frame_queue.pop();
591 delete frame;
592
593 if (frame_count == 250) {
594 int median;
595 int std;
596 // Discard the first delay metrics to avoid convergence effects.
597 EXPECT_EQ(apm_->kNoError,
598 apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
599 }
600 }
601
602 rewind(near_file_);
603 while (!frame_queue.empty()) {
604 AudioFrame* frame = frame_queue.front();
605 frame_queue.pop();
606 delete frame;
607 }
608 // Calculate expected delay estimate and acceptable regions. Further,
609 // limit them w.r.t. AEC delay estimation support.
610 const int samples_per_ms = std::min(16, frame_->samples_per_channel_ / 10);
611 int expected_median = std::min(std::max(delay_ms - system_delay_ms,
612 delay_min), delay_max);
613 int expected_median_high = std::min(std::max(
614 expected_median + 96 / samples_per_ms, delay_min), delay_max);
615 int expected_median_low = std::min(std::max(
616 expected_median - 96 / samples_per_ms, delay_min), delay_max);
617 // Verify delay metrics.
618 int median;
619 int std;
620 EXPECT_EQ(apm_->kNoError,
621 apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
622 EXPECT_GE(expected_median_high, median);
623 EXPECT_LE(expected_median_low, median);
624 }
625
StreamParametersTest(Format format)626 void ApmTest::StreamParametersTest(Format format) {
627 // No errors when the components are disabled.
628 EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format));
629
630 // -- Missing AGC level --
631 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
632 EXPECT_EQ(apm_->kStreamParameterNotSetError,
633 ProcessStreamChooser(format));
634
635 // Resets after successful ProcessStream().
636 EXPECT_EQ(apm_->kNoError,
637 apm_->gain_control()->set_stream_analog_level(127));
638 EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format));
639 EXPECT_EQ(apm_->kStreamParameterNotSetError,
640 ProcessStreamChooser(format));
641
642 // Other stream parameters set correctly.
643 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
644 EXPECT_EQ(apm_->kNoError,
645 apm_->echo_cancellation()->enable_drift_compensation(true));
646 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100));
647 apm_->echo_cancellation()->set_stream_drift_samples(0);
648 EXPECT_EQ(apm_->kStreamParameterNotSetError,
649 ProcessStreamChooser(format));
650 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false));
651 EXPECT_EQ(apm_->kNoError,
652 apm_->echo_cancellation()->enable_drift_compensation(false));
653
654 // -- Missing delay --
655 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
656 EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format));
657 EXPECT_EQ(apm_->kStreamParameterNotSetError,
658 ProcessStreamChooser(format));
659
660 // Resets after successful ProcessStream().
661 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100));
662 EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format));
663 EXPECT_EQ(apm_->kStreamParameterNotSetError,
664 ProcessStreamChooser(format));
665
666 // Other stream parameters set correctly.
667 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
668 EXPECT_EQ(apm_->kNoError,
669 apm_->echo_cancellation()->enable_drift_compensation(true));
670 apm_->echo_cancellation()->set_stream_drift_samples(0);
671 EXPECT_EQ(apm_->kNoError,
672 apm_->gain_control()->set_stream_analog_level(127));
673 EXPECT_EQ(apm_->kStreamParameterNotSetError,
674 ProcessStreamChooser(format));
675 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false));
676
677 // -- Missing drift --
678 EXPECT_EQ(apm_->kStreamParameterNotSetError,
679 ProcessStreamChooser(format));
680
681 // Resets after successful ProcessStream().
682 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100));
683 apm_->echo_cancellation()->set_stream_drift_samples(0);
684 EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format));
685 EXPECT_EQ(apm_->kStreamParameterNotSetError,
686 ProcessStreamChooser(format));
687
688 // Other stream parameters set correctly.
689 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
690 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100));
691 EXPECT_EQ(apm_->kNoError,
692 apm_->gain_control()->set_stream_analog_level(127));
693 EXPECT_EQ(apm_->kStreamParameterNotSetError,
694 ProcessStreamChooser(format));
695
696 // -- No stream parameters --
697 EXPECT_EQ(apm_->kNoError,
698 AnalyzeReverseStreamChooser(format));
699 EXPECT_EQ(apm_->kStreamParameterNotSetError,
700 ProcessStreamChooser(format));
701
702 // -- All there --
703 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100));
704 apm_->echo_cancellation()->set_stream_drift_samples(0);
705 EXPECT_EQ(apm_->kNoError,
706 apm_->gain_control()->set_stream_analog_level(127));
707 EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format));
708 }
709
TEST_F(ApmTest,StreamParametersInt)710 TEST_F(ApmTest, StreamParametersInt) {
711 StreamParametersTest(kIntFormat);
712 }
713
TEST_F(ApmTest,StreamParametersFloat)714 TEST_F(ApmTest, StreamParametersFloat) {
715 StreamParametersTest(kFloatFormat);
716 }
717
TEST_F(ApmTest,DefaultDelayOffsetIsZero)718 TEST_F(ApmTest, DefaultDelayOffsetIsZero) {
719 EXPECT_EQ(0, apm_->delay_offset_ms());
720 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(50));
721 EXPECT_EQ(50, apm_->stream_delay_ms());
722 }
723
TEST_F(ApmTest,DelayOffsetWithLimitsIsSetProperly)724 TEST_F(ApmTest, DelayOffsetWithLimitsIsSetProperly) {
725 // High limit of 500 ms.
726 apm_->set_delay_offset_ms(100);
727 EXPECT_EQ(100, apm_->delay_offset_ms());
728 EXPECT_EQ(apm_->kBadStreamParameterWarning, apm_->set_stream_delay_ms(450));
729 EXPECT_EQ(500, apm_->stream_delay_ms());
730 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100));
731 EXPECT_EQ(200, apm_->stream_delay_ms());
732
733 // Low limit of 0 ms.
734 apm_->set_delay_offset_ms(-50);
735 EXPECT_EQ(-50, apm_->delay_offset_ms());
736 EXPECT_EQ(apm_->kBadStreamParameterWarning, apm_->set_stream_delay_ms(20));
737 EXPECT_EQ(0, apm_->stream_delay_ms());
738 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100));
739 EXPECT_EQ(50, apm_->stream_delay_ms());
740 }
741
TestChangingChannels(int num_channels,AudioProcessing::Error expected_return)742 void ApmTest::TestChangingChannels(int num_channels,
743 AudioProcessing::Error expected_return) {
744 frame_->num_channels_ = num_channels;
745 EXPECT_EQ(expected_return, apm_->ProcessStream(frame_));
746 EXPECT_EQ(expected_return, apm_->AnalyzeReverseStream(frame_));
747 }
748
TEST_F(ApmTest,Channels)749 TEST_F(ApmTest, Channels) {
750 // Testing number of invalid channels.
751 TestChangingChannels(0, apm_->kBadNumberChannelsError);
752 TestChangingChannels(3, apm_->kBadNumberChannelsError);
753 // Testing number of valid channels.
754 for (int i = 1; i < 3; i++) {
755 TestChangingChannels(i, kNoErr);
756 EXPECT_EQ(i, apm_->num_input_channels());
757 EXPECT_EQ(i, apm_->num_reverse_channels());
758 }
759 }
760
TEST_F(ApmTest,SampleRatesInt)761 TEST_F(ApmTest, SampleRatesInt) {
762 // Testing invalid sample rates
763 SetContainerFormat(10000, 2, frame_, &float_cb_);
764 EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(kIntFormat));
765 // Testing valid sample rates
766 int fs[] = {8000, 16000, 32000};
767 for (size_t i = 0; i < sizeof(fs) / sizeof(*fs); i++) {
768 SetContainerFormat(fs[i], 2, frame_, &float_cb_);
769 EXPECT_NOERR(ProcessStreamChooser(kIntFormat));
770 EXPECT_EQ(fs[i], apm_->input_sample_rate_hz());
771 }
772 }
773
TEST_F(ApmTest,EchoCancellation)774 TEST_F(ApmTest, EchoCancellation) {
775 EXPECT_EQ(apm_->kNoError,
776 apm_->echo_cancellation()->enable_drift_compensation(true));
777 EXPECT_TRUE(apm_->echo_cancellation()->is_drift_compensation_enabled());
778 EXPECT_EQ(apm_->kNoError,
779 apm_->echo_cancellation()->enable_drift_compensation(false));
780 EXPECT_FALSE(apm_->echo_cancellation()->is_drift_compensation_enabled());
781
782 EchoCancellation::SuppressionLevel level[] = {
783 EchoCancellation::kLowSuppression,
784 EchoCancellation::kModerateSuppression,
785 EchoCancellation::kHighSuppression,
786 };
787 for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) {
788 EXPECT_EQ(apm_->kNoError,
789 apm_->echo_cancellation()->set_suppression_level(level[i]));
790 EXPECT_EQ(level[i],
791 apm_->echo_cancellation()->suppression_level());
792 }
793
794 EchoCancellation::Metrics metrics;
795 EXPECT_EQ(apm_->kNotEnabledError,
796 apm_->echo_cancellation()->GetMetrics(&metrics));
797
798 EXPECT_EQ(apm_->kNoError,
799 apm_->echo_cancellation()->enable_metrics(true));
800 EXPECT_TRUE(apm_->echo_cancellation()->are_metrics_enabled());
801 EXPECT_EQ(apm_->kNoError,
802 apm_->echo_cancellation()->enable_metrics(false));
803 EXPECT_FALSE(apm_->echo_cancellation()->are_metrics_enabled());
804
805 int median = 0;
806 int std = 0;
807 EXPECT_EQ(apm_->kNotEnabledError,
808 apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
809
810 EXPECT_EQ(apm_->kNoError,
811 apm_->echo_cancellation()->enable_delay_logging(true));
812 EXPECT_TRUE(apm_->echo_cancellation()->is_delay_logging_enabled());
813 EXPECT_EQ(apm_->kNoError,
814 apm_->echo_cancellation()->enable_delay_logging(false));
815 EXPECT_FALSE(apm_->echo_cancellation()->is_delay_logging_enabled());
816
817 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
818 EXPECT_TRUE(apm_->echo_cancellation()->is_enabled());
819 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false));
820 EXPECT_FALSE(apm_->echo_cancellation()->is_enabled());
821
822 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
823 EXPECT_TRUE(apm_->echo_cancellation()->is_enabled());
824 EXPECT_TRUE(apm_->echo_cancellation()->aec_core() != NULL);
825 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false));
826 EXPECT_FALSE(apm_->echo_cancellation()->is_enabled());
827 EXPECT_FALSE(apm_->echo_cancellation()->aec_core() != NULL);
828 }
829
TEST_F(ApmTest,DISABLED_EchoCancellationReportsCorrectDelays)830 TEST_F(ApmTest, DISABLED_EchoCancellationReportsCorrectDelays) {
831 // Enable AEC only.
832 EXPECT_EQ(apm_->kNoError,
833 apm_->echo_cancellation()->enable_drift_compensation(false));
834 EXPECT_EQ(apm_->kNoError,
835 apm_->echo_cancellation()->enable_metrics(false));
836 EXPECT_EQ(apm_->kNoError,
837 apm_->echo_cancellation()->enable_delay_logging(true));
838 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
839 Config config;
840 config.Set<ReportedDelay>(new ReportedDelay(true));
841 apm_->SetExtraOptions(config);
842
843 // Internally in the AEC the amount of lookahead the delay estimation can
844 // handle is 15 blocks and the maximum delay is set to 60 blocks.
845 const int kLookaheadBlocks = 15;
846 const int kMaxDelayBlocks = 60;
847 // The AEC has a startup time before it actually starts to process. This
848 // procedure can flush the internal far-end buffer, which of course affects
849 // the delay estimation. Therefore, we set a system_delay high enough to
850 // avoid that. The smallest system_delay you can report without flushing the
851 // buffer is 66 ms in 8 kHz.
852 //
853 // It is known that for 16 kHz (and 32 kHz) sampling frequency there is an
854 // additional stuffing of 8 ms on the fly, but it seems to have no impact on
855 // delay estimation. This should be noted though. In case of test failure,
856 // this could be the cause.
857 const int kSystemDelayMs = 66;
858 // Test a couple of corner cases and verify that the estimated delay is
859 // within a valid region (set to +-1.5 blocks). Note that these cases are
860 // sampling frequency dependent.
861 for (size_t i = 0; i < kProcessSampleRatesSize; i++) {
862 Init(kProcessSampleRates[i],
863 kProcessSampleRates[i],
864 kProcessSampleRates[i],
865 2,
866 2,
867 2,
868 false);
869 // Sampling frequency dependent variables.
870 const int num_ms_per_block = std::max(4,
871 640 / frame_->samples_per_channel_);
872 const int delay_min_ms = -kLookaheadBlocks * num_ms_per_block;
873 const int delay_max_ms = (kMaxDelayBlocks - 1) * num_ms_per_block;
874
875 // 1) Verify correct delay estimate at lookahead boundary.
876 int delay_ms = TruncateToMultipleOf10(kSystemDelayMs + delay_min_ms);
877 ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms,
878 delay_max_ms);
879 // 2) A delay less than maximum lookahead should give an delay estimate at
880 // the boundary (= -kLookaheadBlocks * num_ms_per_block).
881 delay_ms -= 20;
882 ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms,
883 delay_max_ms);
884 // 3) Three values around zero delay. Note that we need to compensate for
885 // the fake system_delay.
886 delay_ms = TruncateToMultipleOf10(kSystemDelayMs - 10);
887 ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms,
888 delay_max_ms);
889 delay_ms = TruncateToMultipleOf10(kSystemDelayMs);
890 ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms,
891 delay_max_ms);
892 delay_ms = TruncateToMultipleOf10(kSystemDelayMs + 10);
893 ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms,
894 delay_max_ms);
895 // 4) Verify correct delay estimate at maximum delay boundary.
896 delay_ms = TruncateToMultipleOf10(kSystemDelayMs + delay_max_ms);
897 ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms,
898 delay_max_ms);
899 // 5) A delay above the maximum delay should give an estimate at the
900 // boundary (= (kMaxDelayBlocks - 1) * num_ms_per_block).
901 delay_ms += 20;
902 ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms,
903 delay_max_ms);
904 }
905 }
906
TEST_F(ApmTest,EchoControlMobile)907 TEST_F(ApmTest, EchoControlMobile) {
908 // AECM won't use super-wideband.
909 SetFrameSampleRate(frame_, 32000);
910 EXPECT_NOERR(apm_->ProcessStream(frame_));
911 EXPECT_EQ(apm_->kBadSampleRateError,
912 apm_->echo_control_mobile()->Enable(true));
913 SetFrameSampleRate(frame_, 16000);
914 EXPECT_NOERR(apm_->ProcessStream(frame_));
915 EXPECT_EQ(apm_->kNoError,
916 apm_->echo_control_mobile()->Enable(true));
917 SetFrameSampleRate(frame_, 32000);
918 EXPECT_EQ(apm_->kUnsupportedComponentError, apm_->ProcessStream(frame_));
919
920 // Turn AECM on (and AEC off)
921 Init(16000, 16000, 16000, 2, 2, 2, false);
922 EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true));
923 EXPECT_TRUE(apm_->echo_control_mobile()->is_enabled());
924
925 // Toggle routing modes
926 EchoControlMobile::RoutingMode mode[] = {
927 EchoControlMobile::kQuietEarpieceOrHeadset,
928 EchoControlMobile::kEarpiece,
929 EchoControlMobile::kLoudEarpiece,
930 EchoControlMobile::kSpeakerphone,
931 EchoControlMobile::kLoudSpeakerphone,
932 };
933 for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) {
934 EXPECT_EQ(apm_->kNoError,
935 apm_->echo_control_mobile()->set_routing_mode(mode[i]));
936 EXPECT_EQ(mode[i],
937 apm_->echo_control_mobile()->routing_mode());
938 }
939 // Turn comfort noise off/on
940 EXPECT_EQ(apm_->kNoError,
941 apm_->echo_control_mobile()->enable_comfort_noise(false));
942 EXPECT_FALSE(apm_->echo_control_mobile()->is_comfort_noise_enabled());
943 EXPECT_EQ(apm_->kNoError,
944 apm_->echo_control_mobile()->enable_comfort_noise(true));
945 EXPECT_TRUE(apm_->echo_control_mobile()->is_comfort_noise_enabled());
946 // Set and get echo path
947 const size_t echo_path_size =
948 apm_->echo_control_mobile()->echo_path_size_bytes();
949 scoped_ptr<char[]> echo_path_in(new char[echo_path_size]);
950 scoped_ptr<char[]> echo_path_out(new char[echo_path_size]);
951 EXPECT_EQ(apm_->kNullPointerError,
952 apm_->echo_control_mobile()->SetEchoPath(NULL, echo_path_size));
953 EXPECT_EQ(apm_->kNullPointerError,
954 apm_->echo_control_mobile()->GetEchoPath(NULL, echo_path_size));
955 EXPECT_EQ(apm_->kBadParameterError,
956 apm_->echo_control_mobile()->GetEchoPath(echo_path_out.get(), 1));
957 EXPECT_EQ(apm_->kNoError,
958 apm_->echo_control_mobile()->GetEchoPath(echo_path_out.get(),
959 echo_path_size));
960 for (size_t i = 0; i < echo_path_size; i++) {
961 echo_path_in[i] = echo_path_out[i] + 1;
962 }
963 EXPECT_EQ(apm_->kBadParameterError,
964 apm_->echo_control_mobile()->SetEchoPath(echo_path_in.get(), 1));
965 EXPECT_EQ(apm_->kNoError,
966 apm_->echo_control_mobile()->SetEchoPath(echo_path_in.get(),
967 echo_path_size));
968 EXPECT_EQ(apm_->kNoError,
969 apm_->echo_control_mobile()->GetEchoPath(echo_path_out.get(),
970 echo_path_size));
971 for (size_t i = 0; i < echo_path_size; i++) {
972 EXPECT_EQ(echo_path_in[i], echo_path_out[i]);
973 }
974
975 // Process a few frames with NS in the default disabled state. This exercises
976 // a different codepath than with it enabled.
977 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
978 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
979 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
980 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
981
982 // Turn AECM off
983 EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false));
984 EXPECT_FALSE(apm_->echo_control_mobile()->is_enabled());
985 }
986
TEST_F(ApmTest,GainControl)987 TEST_F(ApmTest, GainControl) {
988 // Testing gain modes
989 EXPECT_EQ(apm_->kNoError,
990 apm_->gain_control()->set_mode(
991 apm_->gain_control()->mode()));
992
993 GainControl::Mode mode[] = {
994 GainControl::kAdaptiveAnalog,
995 GainControl::kAdaptiveDigital,
996 GainControl::kFixedDigital
997 };
998 for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) {
999 EXPECT_EQ(apm_->kNoError,
1000 apm_->gain_control()->set_mode(mode[i]));
1001 EXPECT_EQ(mode[i], apm_->gain_control()->mode());
1002 }
1003 // Testing invalid target levels
1004 EXPECT_EQ(apm_->kBadParameterError,
1005 apm_->gain_control()->set_target_level_dbfs(-3));
1006 EXPECT_EQ(apm_->kBadParameterError,
1007 apm_->gain_control()->set_target_level_dbfs(-40));
1008 // Testing valid target levels
1009 EXPECT_EQ(apm_->kNoError,
1010 apm_->gain_control()->set_target_level_dbfs(
1011 apm_->gain_control()->target_level_dbfs()));
1012
1013 int level_dbfs[] = {0, 6, 31};
1014 for (size_t i = 0; i < sizeof(level_dbfs)/sizeof(*level_dbfs); i++) {
1015 EXPECT_EQ(apm_->kNoError,
1016 apm_->gain_control()->set_target_level_dbfs(level_dbfs[i]));
1017 EXPECT_EQ(level_dbfs[i], apm_->gain_control()->target_level_dbfs());
1018 }
1019
1020 // Testing invalid compression gains
1021 EXPECT_EQ(apm_->kBadParameterError,
1022 apm_->gain_control()->set_compression_gain_db(-1));
1023 EXPECT_EQ(apm_->kBadParameterError,
1024 apm_->gain_control()->set_compression_gain_db(100));
1025
1026 // Testing valid compression gains
1027 EXPECT_EQ(apm_->kNoError,
1028 apm_->gain_control()->set_compression_gain_db(
1029 apm_->gain_control()->compression_gain_db()));
1030
1031 int gain_db[] = {0, 10, 90};
1032 for (size_t i = 0; i < sizeof(gain_db)/sizeof(*gain_db); i++) {
1033 EXPECT_EQ(apm_->kNoError,
1034 apm_->gain_control()->set_compression_gain_db(gain_db[i]));
1035 EXPECT_EQ(gain_db[i], apm_->gain_control()->compression_gain_db());
1036 }
1037
1038 // Testing limiter off/on
1039 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(false));
1040 EXPECT_FALSE(apm_->gain_control()->is_limiter_enabled());
1041 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(true));
1042 EXPECT_TRUE(apm_->gain_control()->is_limiter_enabled());
1043
1044 // Testing invalid level limits
1045 EXPECT_EQ(apm_->kBadParameterError,
1046 apm_->gain_control()->set_analog_level_limits(-1, 512));
1047 EXPECT_EQ(apm_->kBadParameterError,
1048 apm_->gain_control()->set_analog_level_limits(100000, 512));
1049 EXPECT_EQ(apm_->kBadParameterError,
1050 apm_->gain_control()->set_analog_level_limits(512, -1));
1051 EXPECT_EQ(apm_->kBadParameterError,
1052 apm_->gain_control()->set_analog_level_limits(512, 100000));
1053 EXPECT_EQ(apm_->kBadParameterError,
1054 apm_->gain_control()->set_analog_level_limits(512, 255));
1055
1056 // Testing valid level limits
1057 EXPECT_EQ(apm_->kNoError,
1058 apm_->gain_control()->set_analog_level_limits(
1059 apm_->gain_control()->analog_level_minimum(),
1060 apm_->gain_control()->analog_level_maximum()));
1061
1062 int min_level[] = {0, 255, 1024};
1063 for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) {
1064 EXPECT_EQ(apm_->kNoError,
1065 apm_->gain_control()->set_analog_level_limits(min_level[i], 1024));
1066 EXPECT_EQ(min_level[i], apm_->gain_control()->analog_level_minimum());
1067 }
1068
1069 int max_level[] = {0, 1024, 65535};
1070 for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) {
1071 EXPECT_EQ(apm_->kNoError,
1072 apm_->gain_control()->set_analog_level_limits(0, max_level[i]));
1073 EXPECT_EQ(max_level[i], apm_->gain_control()->analog_level_maximum());
1074 }
1075
1076 // TODO(ajm): stream_is_saturated() and stream_analog_level()
1077
1078 // Turn AGC off
1079 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false));
1080 EXPECT_FALSE(apm_->gain_control()->is_enabled());
1081 }
1082
RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate)1083 void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) {
1084 Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false);
1085 EXPECT_EQ(apm_->kNoError,
1086 apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
1087 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
1088
1089 int out_analog_level = 0;
1090 for (int i = 0; i < 2000; ++i) {
1091 ReadFrameWithRewind(near_file_, frame_);
1092 // Ensure the audio is at a low level, so the AGC will try to increase it.
1093 ScaleFrame(frame_, 0.25);
1094
1095 // Always pass in the same volume.
1096 EXPECT_EQ(apm_->kNoError,
1097 apm_->gain_control()->set_stream_analog_level(100));
1098 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1099 out_analog_level = apm_->gain_control()->stream_analog_level();
1100 }
1101
1102 // Ensure the AGC is still able to reach the maximum.
1103 EXPECT_EQ(255, out_analog_level);
1104 }
1105
1106 // Verifies that despite volume slider quantization, the AGC can continue to
1107 // increase its volume.
TEST_F(ApmTest,QuantizedVolumeDoesNotGetStuck)1108 TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) {
1109 for (size_t i = 0; i < kSampleRatesSize; ++i) {
1110 RunQuantizedVolumeDoesNotGetStuckTest(kSampleRates[i]);
1111 }
1112 }
1113
RunManualVolumeChangeIsPossibleTest(int sample_rate)1114 void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) {
1115 Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false);
1116 EXPECT_EQ(apm_->kNoError,
1117 apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
1118 EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
1119
1120 int out_analog_level = 100;
1121 for (int i = 0; i < 1000; ++i) {
1122 ReadFrameWithRewind(near_file_, frame_);
1123 // Ensure the audio is at a low level, so the AGC will try to increase it.
1124 ScaleFrame(frame_, 0.25);
1125
1126 EXPECT_EQ(apm_->kNoError,
1127 apm_->gain_control()->set_stream_analog_level(out_analog_level));
1128 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1129 out_analog_level = apm_->gain_control()->stream_analog_level();
1130 }
1131
1132 // Ensure the volume was raised.
1133 EXPECT_GT(out_analog_level, 100);
1134 int highest_level_reached = out_analog_level;
1135 // Simulate a user manual volume change.
1136 out_analog_level = 100;
1137
1138 for (int i = 0; i < 300; ++i) {
1139 ReadFrameWithRewind(near_file_, frame_);
1140 ScaleFrame(frame_, 0.25);
1141
1142 EXPECT_EQ(apm_->kNoError,
1143 apm_->gain_control()->set_stream_analog_level(out_analog_level));
1144 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1145 out_analog_level = apm_->gain_control()->stream_analog_level();
1146 // Check that AGC respected the manually adjusted volume.
1147 EXPECT_LT(out_analog_level, highest_level_reached);
1148 }
1149 // Check that the volume was still raised.
1150 EXPECT_GT(out_analog_level, 100);
1151 }
1152
TEST_F(ApmTest,ManualVolumeChangeIsPossible)1153 TEST_F(ApmTest, ManualVolumeChangeIsPossible) {
1154 for (size_t i = 0; i < kSampleRatesSize; ++i) {
1155 RunManualVolumeChangeIsPossibleTest(kSampleRates[i]);
1156 }
1157 }
1158
TEST_F(ApmTest,NoiseSuppression)1159 TEST_F(ApmTest, NoiseSuppression) {
1160 // Test valid suppression levels.
1161 NoiseSuppression::Level level[] = {
1162 NoiseSuppression::kLow,
1163 NoiseSuppression::kModerate,
1164 NoiseSuppression::kHigh,
1165 NoiseSuppression::kVeryHigh
1166 };
1167 for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) {
1168 EXPECT_EQ(apm_->kNoError,
1169 apm_->noise_suppression()->set_level(level[i]));
1170 EXPECT_EQ(level[i], apm_->noise_suppression()->level());
1171 }
1172
1173 // Turn NS on/off
1174 EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(true));
1175 EXPECT_TRUE(apm_->noise_suppression()->is_enabled());
1176 EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(false));
1177 EXPECT_FALSE(apm_->noise_suppression()->is_enabled());
1178 }
1179
TEST_F(ApmTest,HighPassFilter)1180 TEST_F(ApmTest, HighPassFilter) {
1181 // Turn HP filter on/off
1182 EXPECT_EQ(apm_->kNoError, apm_->high_pass_filter()->Enable(true));
1183 EXPECT_TRUE(apm_->high_pass_filter()->is_enabled());
1184 EXPECT_EQ(apm_->kNoError, apm_->high_pass_filter()->Enable(false));
1185 EXPECT_FALSE(apm_->high_pass_filter()->is_enabled());
1186 }
1187
TEST_F(ApmTest,LevelEstimator)1188 TEST_F(ApmTest, LevelEstimator) {
1189 // Turn level estimator on/off
1190 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
1191 EXPECT_FALSE(apm_->level_estimator()->is_enabled());
1192
1193 EXPECT_EQ(apm_->kNotEnabledError, apm_->level_estimator()->RMS());
1194
1195 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
1196 EXPECT_TRUE(apm_->level_estimator()->is_enabled());
1197
1198 // Run this test in wideband; in super-wb, the splitting filter distorts the
1199 // audio enough to cause deviation from the expectation for small values.
1200 frame_->samples_per_channel_ = 160;
1201 frame_->num_channels_ = 2;
1202 frame_->sample_rate_hz_ = 16000;
1203
1204 // Min value if no frames have been processed.
1205 EXPECT_EQ(127, apm_->level_estimator()->RMS());
1206
1207 // Min value on zero frames.
1208 SetFrameTo(frame_, 0);
1209 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1210 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1211 EXPECT_EQ(127, apm_->level_estimator()->RMS());
1212
1213 // Try a few RMS values.
1214 // (These also test that the value resets after retrieving it.)
1215 SetFrameTo(frame_, 32767);
1216 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1217 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1218 EXPECT_EQ(0, apm_->level_estimator()->RMS());
1219
1220 SetFrameTo(frame_, 30000);
1221 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1222 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1223 EXPECT_EQ(1, apm_->level_estimator()->RMS());
1224
1225 SetFrameTo(frame_, 10000);
1226 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1227 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1228 EXPECT_EQ(10, apm_->level_estimator()->RMS());
1229
1230 SetFrameTo(frame_, 10);
1231 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1232 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1233 EXPECT_EQ(70, apm_->level_estimator()->RMS());
1234
1235 // Verify reset after enable/disable.
1236 SetFrameTo(frame_, 32767);
1237 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1238 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
1239 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
1240 SetFrameTo(frame_, 1);
1241 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1242 EXPECT_EQ(90, apm_->level_estimator()->RMS());
1243
1244 // Verify reset after initialize.
1245 SetFrameTo(frame_, 32767);
1246 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1247 EXPECT_EQ(apm_->kNoError, apm_->Initialize());
1248 SetFrameTo(frame_, 1);
1249 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1250 EXPECT_EQ(90, apm_->level_estimator()->RMS());
1251 }
1252
TEST_F(ApmTest,VoiceDetection)1253 TEST_F(ApmTest, VoiceDetection) {
1254 // Test external VAD
1255 EXPECT_EQ(apm_->kNoError,
1256 apm_->voice_detection()->set_stream_has_voice(true));
1257 EXPECT_TRUE(apm_->voice_detection()->stream_has_voice());
1258 EXPECT_EQ(apm_->kNoError,
1259 apm_->voice_detection()->set_stream_has_voice(false));
1260 EXPECT_FALSE(apm_->voice_detection()->stream_has_voice());
1261
1262 // Test valid likelihoods
1263 VoiceDetection::Likelihood likelihood[] = {
1264 VoiceDetection::kVeryLowLikelihood,
1265 VoiceDetection::kLowLikelihood,
1266 VoiceDetection::kModerateLikelihood,
1267 VoiceDetection::kHighLikelihood
1268 };
1269 for (size_t i = 0; i < sizeof(likelihood)/sizeof(*likelihood); i++) {
1270 EXPECT_EQ(apm_->kNoError,
1271 apm_->voice_detection()->set_likelihood(likelihood[i]));
1272 EXPECT_EQ(likelihood[i], apm_->voice_detection()->likelihood());
1273 }
1274
1275 /* TODO(bjornv): Enable once VAD supports other frame lengths than 10 ms
1276 // Test invalid frame sizes
1277 EXPECT_EQ(apm_->kBadParameterError,
1278 apm_->voice_detection()->set_frame_size_ms(12));
1279
1280 // Test valid frame sizes
1281 for (int i = 10; i <= 30; i += 10) {
1282 EXPECT_EQ(apm_->kNoError,
1283 apm_->voice_detection()->set_frame_size_ms(i));
1284 EXPECT_EQ(i, apm_->voice_detection()->frame_size_ms());
1285 }
1286 */
1287
1288 // Turn VAD on/off
1289 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
1290 EXPECT_TRUE(apm_->voice_detection()->is_enabled());
1291 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
1292 EXPECT_FALSE(apm_->voice_detection()->is_enabled());
1293
1294 // Test that AudioFrame activity is maintained when VAD is disabled.
1295 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
1296 AudioFrame::VADActivity activity[] = {
1297 AudioFrame::kVadActive,
1298 AudioFrame::kVadPassive,
1299 AudioFrame::kVadUnknown
1300 };
1301 for (size_t i = 0; i < sizeof(activity)/sizeof(*activity); i++) {
1302 frame_->vad_activity_ = activity[i];
1303 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1304 EXPECT_EQ(activity[i], frame_->vad_activity_);
1305 }
1306
1307 // Test that AudioFrame activity is set when VAD is enabled.
1308 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
1309 frame_->vad_activity_ = AudioFrame::kVadUnknown;
1310 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1311 EXPECT_NE(AudioFrame::kVadUnknown, frame_->vad_activity_);
1312
1313 // TODO(bjornv): Add tests for streamed voice; stream_has_voice()
1314 }
1315
TEST_F(ApmTest,AllProcessingDisabledByDefault)1316 TEST_F(ApmTest, AllProcessingDisabledByDefault) {
1317 EXPECT_FALSE(apm_->echo_cancellation()->is_enabled());
1318 EXPECT_FALSE(apm_->echo_control_mobile()->is_enabled());
1319 EXPECT_FALSE(apm_->gain_control()->is_enabled());
1320 EXPECT_FALSE(apm_->high_pass_filter()->is_enabled());
1321 EXPECT_FALSE(apm_->level_estimator()->is_enabled());
1322 EXPECT_FALSE(apm_->noise_suppression()->is_enabled());
1323 EXPECT_FALSE(apm_->voice_detection()->is_enabled());
1324 }
1325
TEST_F(ApmTest,NoProcessingWhenAllComponentsDisabled)1326 TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) {
1327 for (size_t i = 0; i < kSampleRatesSize; i++) {
1328 Init(kSampleRates[i], kSampleRates[i], kSampleRates[i], 2, 2, 2, false);
1329 SetFrameTo(frame_, 1000, 2000);
1330 AudioFrame frame_copy;
1331 frame_copy.CopyFrom(*frame_);
1332 for (int j = 0; j < 1000; j++) {
1333 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1334 EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
1335 }
1336 }
1337 }
1338
TEST_F(ApmTest,IdenticalInputChannelsResultInIdenticalOutputChannels)1339 TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
1340 EnableAllComponents();
1341
1342 for (size_t i = 0; i < kProcessSampleRatesSize; i++) {
1343 Init(kProcessSampleRates[i],
1344 kProcessSampleRates[i],
1345 kProcessSampleRates[i],
1346 2,
1347 2,
1348 2,
1349 false);
1350 int analog_level = 127;
1351 ASSERT_EQ(0, feof(far_file_));
1352 ASSERT_EQ(0, feof(near_file_));
1353 while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) {
1354 CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_);
1355
1356 ASSERT_EQ(kNoErr, apm_->AnalyzeReverseStream(revframe_));
1357
1358 CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_);
1359 frame_->vad_activity_ = AudioFrame::kVadUnknown;
1360
1361 ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0));
1362 apm_->echo_cancellation()->set_stream_drift_samples(0);
1363 ASSERT_EQ(kNoErr,
1364 apm_->gain_control()->set_stream_analog_level(analog_level));
1365 ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_));
1366 analog_level = apm_->gain_control()->stream_analog_level();
1367
1368 VerifyChannelsAreEqual(frame_->data_, frame_->samples_per_channel_);
1369 }
1370 rewind(far_file_);
1371 rewind(near_file_);
1372 }
1373 }
1374
TEST_F(ApmTest,SplittingFilter)1375 TEST_F(ApmTest, SplittingFilter) {
1376 // Verify the filter is not active through undistorted audio when:
1377 // 1. No components are enabled...
1378 SetFrameTo(frame_, 1000);
1379 AudioFrame frame_copy;
1380 frame_copy.CopyFrom(*frame_);
1381 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1382 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1383 EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
1384
1385 // 2. Only the level estimator is enabled...
1386 SetFrameTo(frame_, 1000);
1387 frame_copy.CopyFrom(*frame_);
1388 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
1389 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1390 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1391 EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
1392 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
1393
1394 // 3. Only VAD is enabled...
1395 SetFrameTo(frame_, 1000);
1396 frame_copy.CopyFrom(*frame_);
1397 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
1398 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1399 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1400 EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
1401 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
1402
1403 // 4. Both VAD and the level estimator are enabled...
1404 SetFrameTo(frame_, 1000);
1405 frame_copy.CopyFrom(*frame_);
1406 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
1407 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
1408 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1409 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1410 EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
1411 EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
1412 EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
1413
1414 // 5. Not using super-wb.
1415 frame_->samples_per_channel_ = 160;
1416 frame_->num_channels_ = 2;
1417 frame_->sample_rate_hz_ = 16000;
1418 // Enable AEC, which would require the filter in super-wb. We rely on the
1419 // first few frames of data being unaffected by the AEC.
1420 // TODO(andrew): This test, and the one below, rely rather tenuously on the
1421 // behavior of the AEC. Think of something more robust.
1422 EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
1423 // Make sure we have extended filter enabled. This makes sure nothing is
1424 // touched until we have a farend frame.
1425 Config config;
1426 config.Set<DelayCorrection>(new DelayCorrection(true));
1427 apm_->SetExtraOptions(config);
1428 SetFrameTo(frame_, 1000);
1429 frame_copy.CopyFrom(*frame_);
1430 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
1431 apm_->echo_cancellation()->set_stream_drift_samples(0);
1432 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1433 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
1434 apm_->echo_cancellation()->set_stream_drift_samples(0);
1435 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1436 EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
1437
1438 // Check the test is valid. We should have distortion from the filter
1439 // when AEC is enabled (which won't affect the audio).
1440 frame_->samples_per_channel_ = 320;
1441 frame_->num_channels_ = 2;
1442 frame_->sample_rate_hz_ = 32000;
1443 SetFrameTo(frame_, 1000);
1444 frame_copy.CopyFrom(*frame_);
1445 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
1446 apm_->echo_cancellation()->set_stream_drift_samples(0);
1447 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1448 EXPECT_FALSE(FrameDataAreEqual(*frame_, frame_copy));
1449 }
1450
1451 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
ProcessDebugDump(const std::string & in_filename,const std::string & out_filename,Format format)1452 void ApmTest::ProcessDebugDump(const std::string& in_filename,
1453 const std::string& out_filename,
1454 Format format) {
1455 FILE* in_file = fopen(in_filename.c_str(), "rb");
1456 ASSERT_TRUE(in_file != NULL);
1457 audioproc::Event event_msg;
1458 bool first_init = true;
1459
1460 while (ReadMessageFromFile(in_file, &event_msg)) {
1461 if (event_msg.type() == audioproc::Event::INIT) {
1462 const audioproc::Init msg = event_msg.init();
1463 int reverse_sample_rate = msg.sample_rate();
1464 if (msg.has_reverse_sample_rate()) {
1465 reverse_sample_rate = msg.reverse_sample_rate();
1466 }
1467 int output_sample_rate = msg.sample_rate();
1468 if (msg.has_output_sample_rate()) {
1469 output_sample_rate = msg.output_sample_rate();
1470 }
1471
1472 Init(msg.sample_rate(),
1473 output_sample_rate,
1474 reverse_sample_rate,
1475 msg.num_input_channels(),
1476 msg.num_output_channels(),
1477 msg.num_reverse_channels(),
1478 false);
1479 if (first_init) {
1480 // StartDebugRecording() writes an additional init message. Don't start
1481 // recording until after the first init to avoid the extra message.
1482 EXPECT_NOERR(apm_->StartDebugRecording(out_filename.c_str()));
1483 first_init = false;
1484 }
1485
1486 } else if (event_msg.type() == audioproc::Event::REVERSE_STREAM) {
1487 const audioproc::ReverseStream msg = event_msg.reverse_stream();
1488
1489 if (msg.channel_size() > 0) {
1490 ASSERT_EQ(revframe_->num_channels_, msg.channel_size());
1491 for (int i = 0; i < msg.channel_size(); ++i) {
1492 memcpy(revfloat_cb_->channel(i), msg.channel(i).data(),
1493 msg.channel(i).size());
1494 }
1495 } else {
1496 memcpy(revframe_->data_, msg.data().data(), msg.data().size());
1497 if (format == kFloatFormat) {
1498 // We're using an int16 input file; convert to float.
1499 ConvertToFloat(*revframe_, revfloat_cb_.get());
1500 }
1501 }
1502 AnalyzeReverseStreamChooser(format);
1503
1504 } else if (event_msg.type() == audioproc::Event::STREAM) {
1505 const audioproc::Stream msg = event_msg.stream();
1506 // ProcessStream could have changed this for the output frame.
1507 frame_->num_channels_ = apm_->num_input_channels();
1508
1509 EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(msg.level()));
1510 EXPECT_NOERR(apm_->set_stream_delay_ms(msg.delay()));
1511 apm_->echo_cancellation()->set_stream_drift_samples(msg.drift());
1512 if (msg.has_keypress()) {
1513 apm_->set_stream_key_pressed(msg.keypress());
1514 } else {
1515 apm_->set_stream_key_pressed(true);
1516 }
1517
1518 if (msg.input_channel_size() > 0) {
1519 ASSERT_EQ(frame_->num_channels_, msg.input_channel_size());
1520 for (int i = 0; i < msg.input_channel_size(); ++i) {
1521 memcpy(float_cb_->channel(i), msg.input_channel(i).data(),
1522 msg.input_channel(i).size());
1523 }
1524 } else {
1525 memcpy(frame_->data_, msg.input_data().data(), msg.input_data().size());
1526 if (format == kFloatFormat) {
1527 // We're using an int16 input file; convert to float.
1528 ConvertToFloat(*frame_, float_cb_.get());
1529 }
1530 }
1531 ProcessStreamChooser(format);
1532 }
1533 }
1534 EXPECT_NOERR(apm_->StopDebugRecording());
1535 fclose(in_file);
1536 }
1537
VerifyDebugDumpTest(Format format)1538 void ApmTest::VerifyDebugDumpTest(Format format) {
1539 const std::string in_filename = test::ResourcePath("ref03", "aecdump");
1540 std::string format_string;
1541 switch (format) {
1542 case kIntFormat:
1543 format_string = "_int";
1544 break;
1545 case kFloatFormat:
1546 format_string = "_float";
1547 break;
1548 }
1549 const std::string ref_filename =
1550 test::OutputPath() + "ref" + format_string + ".aecdump";
1551 const std::string out_filename =
1552 test::OutputPath() + "out" + format_string + ".aecdump";
1553 EnableAllComponents();
1554 ProcessDebugDump(in_filename, ref_filename, format);
1555 ProcessDebugDump(ref_filename, out_filename, format);
1556
1557 FILE* ref_file = fopen(ref_filename.c_str(), "rb");
1558 FILE* out_file = fopen(out_filename.c_str(), "rb");
1559 ASSERT_TRUE(ref_file != NULL);
1560 ASSERT_TRUE(out_file != NULL);
1561 scoped_ptr<uint8_t[]> ref_bytes;
1562 scoped_ptr<uint8_t[]> out_bytes;
1563
1564 size_t ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes);
1565 size_t out_size = ReadMessageBytesFromFile(out_file, &out_bytes);
1566 size_t bytes_read = 0;
1567 while (ref_size > 0 && out_size > 0) {
1568 bytes_read += ref_size;
1569 EXPECT_EQ(ref_size, out_size);
1570 EXPECT_EQ(0, memcmp(ref_bytes.get(), out_bytes.get(), ref_size));
1571 ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes);
1572 out_size = ReadMessageBytesFromFile(out_file, &out_bytes);
1573 }
1574 EXPECT_GT(bytes_read, 0u);
1575 EXPECT_NE(0, feof(ref_file));
1576 EXPECT_NE(0, feof(out_file));
1577 ASSERT_EQ(0, fclose(ref_file));
1578 ASSERT_EQ(0, fclose(out_file));
1579 }
1580
TEST_F(ApmTest,VerifyDebugDumpInt)1581 TEST_F(ApmTest, VerifyDebugDumpInt) {
1582 VerifyDebugDumpTest(kIntFormat);
1583 }
1584
TEST_F(ApmTest,VerifyDebugDumpFloat)1585 TEST_F(ApmTest, VerifyDebugDumpFloat) {
1586 VerifyDebugDumpTest(kFloatFormat);
1587 }
1588 #endif
1589
1590 // TODO(andrew): expand test to verify output.
TEST_F(ApmTest,DebugDump)1591 TEST_F(ApmTest, DebugDump) {
1592 const std::string filename = test::OutputPath() + "debug.aec";
1593 EXPECT_EQ(apm_->kNullPointerError,
1594 apm_->StartDebugRecording(static_cast<const char*>(NULL)));
1595
1596 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1597 // Stopping without having started should be OK.
1598 EXPECT_EQ(apm_->kNoError, apm_->StopDebugRecording());
1599
1600 EXPECT_EQ(apm_->kNoError, apm_->StartDebugRecording(filename.c_str()));
1601 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1602 EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_));
1603 EXPECT_EQ(apm_->kNoError, apm_->StopDebugRecording());
1604
1605 // Verify the file has been written.
1606 FILE* fid = fopen(filename.c_str(), "r");
1607 ASSERT_TRUE(fid != NULL);
1608
1609 // Clean it up.
1610 ASSERT_EQ(0, fclose(fid));
1611 ASSERT_EQ(0, remove(filename.c_str()));
1612 #else
1613 EXPECT_EQ(apm_->kUnsupportedFunctionError,
1614 apm_->StartDebugRecording(filename.c_str()));
1615 EXPECT_EQ(apm_->kUnsupportedFunctionError, apm_->StopDebugRecording());
1616
1617 // Verify the file has NOT been written.
1618 ASSERT_TRUE(fopen(filename.c_str(), "r") == NULL);
1619 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1620 }
1621
1622 // TODO(andrew): expand test to verify output.
TEST_F(ApmTest,DebugDumpFromFileHandle)1623 TEST_F(ApmTest, DebugDumpFromFileHandle) {
1624 FILE* fid = NULL;
1625 EXPECT_EQ(apm_->kNullPointerError, apm_->StartDebugRecording(fid));
1626 const std::string filename = test::OutputPath() + "debug.aec";
1627 fid = fopen(filename.c_str(), "w");
1628 ASSERT_TRUE(fid);
1629
1630 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1631 // Stopping without having started should be OK.
1632 EXPECT_EQ(apm_->kNoError, apm_->StopDebugRecording());
1633
1634 EXPECT_EQ(apm_->kNoError, apm_->StartDebugRecording(fid));
1635 EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_));
1636 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1637 EXPECT_EQ(apm_->kNoError, apm_->StopDebugRecording());
1638
1639 // Verify the file has been written.
1640 fid = fopen(filename.c_str(), "r");
1641 ASSERT_TRUE(fid != NULL);
1642
1643 // Clean it up.
1644 ASSERT_EQ(0, fclose(fid));
1645 ASSERT_EQ(0, remove(filename.c_str()));
1646 #else
1647 EXPECT_EQ(apm_->kUnsupportedFunctionError,
1648 apm_->StartDebugRecording(fid));
1649 EXPECT_EQ(apm_->kUnsupportedFunctionError, apm_->StopDebugRecording());
1650
1651 ASSERT_EQ(0, fclose(fid));
1652 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1653 }
1654
TEST_F(ApmTest,FloatAndIntInterfacesGiveIdenticalResults)1655 TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
1656 audioproc::OutputData ref_data;
1657 OpenFileAndReadMessage(ref_filename_, &ref_data);
1658
1659 Config config;
1660 config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
1661 scoped_ptr<AudioProcessing> fapm(AudioProcessing::Create(config));
1662 EnableAllComponents();
1663 EnableAllAPComponents(fapm.get());
1664 for (int i = 0; i < ref_data.test_size(); i++) {
1665 printf("Running test %d of %d...\n", i + 1, ref_data.test_size());
1666
1667 audioproc::Test* test = ref_data.mutable_test(i);
1668 // TODO(ajm): Restore downmixing test cases.
1669 if (test->num_input_channels() != test->num_output_channels())
1670 continue;
1671
1672 const int num_render_channels = test->num_reverse_channels();
1673 const int num_input_channels = test->num_input_channels();
1674 const int num_output_channels = test->num_output_channels();
1675 const int samples_per_channel = test->sample_rate() *
1676 AudioProcessing::kChunkSizeMs / 1000;
1677 const int output_length = samples_per_channel * num_output_channels;
1678
1679 Init(test->sample_rate(), test->sample_rate(), test->sample_rate(),
1680 num_input_channels, num_output_channels, num_render_channels, true);
1681 Init(fapm.get());
1682
1683 ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels);
1684 scoped_ptr<int16_t[]> output_int16(new int16_t[output_length]);
1685
1686 int analog_level = 127;
1687 while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) &&
1688 ReadFrame(near_file_, frame_, float_cb_.get())) {
1689 frame_->vad_activity_ = AudioFrame::kVadUnknown;
1690
1691 EXPECT_NOERR(apm_->AnalyzeReverseStream(revframe_));
1692 EXPECT_NOERR(fapm->AnalyzeReverseStream(
1693 revfloat_cb_->channels(),
1694 samples_per_channel,
1695 test->sample_rate(),
1696 LayoutFromChannels(num_render_channels)));
1697
1698 EXPECT_NOERR(apm_->set_stream_delay_ms(0));
1699 EXPECT_NOERR(fapm->set_stream_delay_ms(0));
1700 apm_->echo_cancellation()->set_stream_drift_samples(0);
1701 fapm->echo_cancellation()->set_stream_drift_samples(0);
1702 EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(analog_level));
1703 EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));
1704
1705 EXPECT_NOERR(apm_->ProcessStream(frame_));
1706 // TODO(ajm): Update to support different output rates.
1707 EXPECT_NOERR(fapm->ProcessStream(
1708 float_cb_->channels(),
1709 samples_per_channel,
1710 test->sample_rate(),
1711 LayoutFromChannels(num_input_channels),
1712 test->sample_rate(),
1713 LayoutFromChannels(num_output_channels),
1714 float_cb_->channels()));
1715
1716 // Convert to interleaved int16.
1717 ScaleAndRoundToInt16(float_cb_->data(), output_length, output_cb.data());
1718 Interleave(output_cb.channels(),
1719 samples_per_channel,
1720 num_output_channels,
1721 output_int16.get());
1722 // Verify float and int16 paths produce identical output.
1723 EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length));
1724
1725 analog_level = fapm->gain_control()->stream_analog_level();
1726 EXPECT_EQ(apm_->gain_control()->stream_analog_level(),
1727 fapm->gain_control()->stream_analog_level());
1728 EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(),
1729 fapm->echo_cancellation()->stream_has_echo());
1730 EXPECT_EQ(apm_->voice_detection()->stream_has_voice(),
1731 fapm->voice_detection()->stream_has_voice());
1732 EXPECT_EQ(apm_->noise_suppression()->speech_probability(),
1733 fapm->noise_suppression()->speech_probability());
1734
1735 // Reset in case of downmixing.
1736 frame_->num_channels_ = test->num_input_channels();
1737 }
1738 rewind(far_file_);
1739 rewind(near_file_);
1740 }
1741 }
1742
1743 // TODO(andrew): Add a test to process a few frames with different combinations
1744 // of enabled components.
1745
1746 // TODO(andrew): Make this test more robust such that it can be run on multiple
1747 // platforms. It currently requires bit-exactness.
1748 #ifdef WEBRTC_AUDIOPROC_BIT_EXACT
TEST_F(ApmTest,DISABLED_ON_ANDROID (Process))1749 TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) {
1750 GOOGLE_PROTOBUF_VERIFY_VERSION;
1751 audioproc::OutputData ref_data;
1752
1753 if (!write_ref_data) {
1754 OpenFileAndReadMessage(ref_filename_, &ref_data);
1755 } else {
1756 // Write the desired tests to the protobuf reference file.
1757 for (size_t i = 0; i < kChannelsSize; i++) {
1758 for (size_t j = 0; j < kChannelsSize; j++) {
1759 for (size_t l = 0; l < kProcessSampleRatesSize; l++) {
1760 audioproc::Test* test = ref_data.add_test();
1761 test->set_num_reverse_channels(kChannels[i]);
1762 test->set_num_input_channels(kChannels[j]);
1763 test->set_num_output_channels(kChannels[j]);
1764 test->set_sample_rate(kProcessSampleRates[l]);
1765 }
1766 }
1767 }
1768 }
1769
1770 EnableAllComponents();
1771
1772 for (int i = 0; i < ref_data.test_size(); i++) {
1773 printf("Running test %d of %d...\n", i + 1, ref_data.test_size());
1774
1775 audioproc::Test* test = ref_data.mutable_test(i);
1776 // TODO(ajm): We no longer allow different input and output channels. Skip
1777 // these tests for now, but they should be removed from the set.
1778 if (test->num_input_channels() != test->num_output_channels())
1779 continue;
1780
1781 Init(test->sample_rate(),
1782 test->sample_rate(),
1783 test->sample_rate(),
1784 test->num_input_channels(),
1785 test->num_output_channels(),
1786 test->num_reverse_channels(),
1787 true);
1788
1789 int frame_count = 0;
1790 int has_echo_count = 0;
1791 int has_voice_count = 0;
1792 int is_saturated_count = 0;
1793 int analog_level = 127;
1794 int analog_level_average = 0;
1795 int max_output_average = 0;
1796 float ns_speech_prob_average = 0.0f;
1797
1798 while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) {
1799 EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_));
1800
1801 frame_->vad_activity_ = AudioFrame::kVadUnknown;
1802
1803 EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
1804 apm_->echo_cancellation()->set_stream_drift_samples(0);
1805 EXPECT_EQ(apm_->kNoError,
1806 apm_->gain_control()->set_stream_analog_level(analog_level));
1807
1808 EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
1809
1810 // Ensure the frame was downmixed properly.
1811 EXPECT_EQ(test->num_output_channels(), frame_->num_channels_);
1812
1813 max_output_average += MaxAudioFrame(*frame_);
1814
1815 if (apm_->echo_cancellation()->stream_has_echo()) {
1816 has_echo_count++;
1817 }
1818
1819 analog_level = apm_->gain_control()->stream_analog_level();
1820 analog_level_average += analog_level;
1821 if (apm_->gain_control()->stream_is_saturated()) {
1822 is_saturated_count++;
1823 }
1824 if (apm_->voice_detection()->stream_has_voice()) {
1825 has_voice_count++;
1826 EXPECT_EQ(AudioFrame::kVadActive, frame_->vad_activity_);
1827 } else {
1828 EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_);
1829 }
1830
1831 ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
1832
1833 size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
1834 size_t write_count = fwrite(frame_->data_,
1835 sizeof(int16_t),
1836 frame_size,
1837 out_file_);
1838 ASSERT_EQ(frame_size, write_count);
1839
1840 // Reset in case of downmixing.
1841 frame_->num_channels_ = test->num_input_channels();
1842 frame_count++;
1843 }
1844 max_output_average /= frame_count;
1845 analog_level_average /= frame_count;
1846 ns_speech_prob_average /= frame_count;
1847
1848 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
1849 EchoCancellation::Metrics echo_metrics;
1850 EXPECT_EQ(apm_->kNoError,
1851 apm_->echo_cancellation()->GetMetrics(&echo_metrics));
1852 int median = 0;
1853 int std = 0;
1854 EXPECT_EQ(apm_->kNoError,
1855 apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
1856
1857 int rms_level = apm_->level_estimator()->RMS();
1858 EXPECT_LE(0, rms_level);
1859 EXPECT_GE(127, rms_level);
1860 #endif
1861
1862 if (!write_ref_data) {
1863 EXPECT_EQ(test->has_echo_count(), has_echo_count);
1864 EXPECT_EQ(test->has_voice_count(), has_voice_count);
1865 EXPECT_EQ(test->is_saturated_count(), is_saturated_count);
1866
1867 EXPECT_EQ(test->analog_level_average(), analog_level_average);
1868 EXPECT_EQ(test->max_output_average(), max_output_average);
1869
1870 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
1871 audioproc::Test::EchoMetrics reference = test->echo_metrics();
1872 TestStats(echo_metrics.residual_echo_return_loss,
1873 reference.residual_echo_return_loss());
1874 TestStats(echo_metrics.echo_return_loss,
1875 reference.echo_return_loss());
1876 TestStats(echo_metrics.echo_return_loss_enhancement,
1877 reference.echo_return_loss_enhancement());
1878 TestStats(echo_metrics.a_nlp,
1879 reference.a_nlp());
1880
1881 audioproc::Test::DelayMetrics reference_delay = test->delay_metrics();
1882 EXPECT_EQ(reference_delay.median(), median);
1883 EXPECT_EQ(reference_delay.std(), std);
1884
1885 EXPECT_EQ(test->rms_level(), rms_level);
1886
1887 EXPECT_FLOAT_EQ(test->ns_speech_probability_average(),
1888 ns_speech_prob_average);
1889 #endif
1890 } else {
1891 test->set_has_echo_count(has_echo_count);
1892 test->set_has_voice_count(has_voice_count);
1893 test->set_is_saturated_count(is_saturated_count);
1894
1895 test->set_analog_level_average(analog_level_average);
1896 test->set_max_output_average(max_output_average);
1897
1898 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
1899 audioproc::Test::EchoMetrics* message = test->mutable_echo_metrics();
1900 WriteStatsMessage(echo_metrics.residual_echo_return_loss,
1901 message->mutable_residual_echo_return_loss());
1902 WriteStatsMessage(echo_metrics.echo_return_loss,
1903 message->mutable_echo_return_loss());
1904 WriteStatsMessage(echo_metrics.echo_return_loss_enhancement,
1905 message->mutable_echo_return_loss_enhancement());
1906 WriteStatsMessage(echo_metrics.a_nlp,
1907 message->mutable_a_nlp());
1908
1909 audioproc::Test::DelayMetrics* message_delay =
1910 test->mutable_delay_metrics();
1911 message_delay->set_median(median);
1912 message_delay->set_std(std);
1913
1914 test->set_rms_level(rms_level);
1915
1916 EXPECT_LE(0.0f, ns_speech_prob_average);
1917 EXPECT_GE(1.0f, ns_speech_prob_average);
1918 test->set_ns_speech_probability_average(ns_speech_prob_average);
1919 #endif
1920 }
1921
1922 rewind(far_file_);
1923 rewind(near_file_);
1924 }
1925
1926 if (write_ref_data) {
1927 OpenFileAndWriteMessage(ref_filename_, ref_data);
1928 }
1929 }
1930
1931 #endif // WEBRTC_AUDIOPROC_BIT_EXACT
1932
TEST_F(ApmTest,NoErrorsWithKeyboardChannel)1933 TEST_F(ApmTest, NoErrorsWithKeyboardChannel) {
1934 struct ChannelFormat {
1935 AudioProcessing::ChannelLayout in_layout;
1936 AudioProcessing::ChannelLayout out_layout;
1937 };
1938 ChannelFormat cf[] = {
1939 {AudioProcessing::kMonoAndKeyboard, AudioProcessing::kMono},
1940 {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono},
1941 {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo},
1942 };
1943 size_t channel_format_size = sizeof(cf) / sizeof(*cf);
1944
1945 scoped_ptr<AudioProcessing> ap(AudioProcessing::Create());
1946 // Enable one component just to ensure some processing takes place.
1947 ap->noise_suppression()->Enable(true);
1948 for (size_t i = 0; i < channel_format_size; ++i) {
1949 const int in_rate = 44100;
1950 const int out_rate = 48000;
1951 ChannelBuffer<float> in_cb(SamplesFromRate(in_rate),
1952 TotalChannelsFromLayout(cf[i].in_layout));
1953 ChannelBuffer<float> out_cb(SamplesFromRate(out_rate),
1954 ChannelsFromLayout(cf[i].out_layout));
1955
1956 // Run over a few chunks.
1957 for (int j = 0; j < 10; ++j) {
1958 EXPECT_NOERR(ap->ProcessStream(
1959 in_cb.channels(),
1960 in_cb.samples_per_channel(),
1961 in_rate,
1962 cf[i].in_layout,
1963 out_rate,
1964 cf[i].out_layout,
1965 out_cb.channels()));
1966 }
1967 }
1968 }
1969
1970 // Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
1971 // stereo) file, converts to deinterleaved float (optionally downmixing) and
1972 // returns the result in |cb|. Returns false if the file ended (or on error) and
1973 // true otherwise.
1974 //
1975 // |int_data| and |float_data| are just temporary space that must be
1976 // sufficiently large to hold the 10 ms chunk.
ReadChunk(FILE * file,int16_t * int_data,float * float_data,ChannelBuffer<float> * cb)1977 bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
1978 ChannelBuffer<float>* cb) {
1979 // The files always contain stereo audio.
1980 size_t frame_size = cb->samples_per_channel() * 2;
1981 size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
1982 if (read_count != frame_size) {
1983 // Check that the file really ended.
1984 assert(feof(file));
1985 return false; // This is expected.
1986 }
1987
1988 ScaleToFloat(int_data, frame_size, float_data);
1989 if (cb->num_channels() == 1) {
1990 MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
1991 } else {
1992 Deinterleave(float_data, cb->samples_per_channel(), 2,
1993 cb->channels());
1994 }
1995
1996 return true;
1997 }
1998
1999 // Compares the reference and test arrays over a region around the expected
2000 // delay. Finds the highest SNR in that region and adds the variance and squared
2001 // error results to the supplied accumulators.
UpdateBestSNR(const float * ref,const float * test,int length,int expected_delay,double * variance_acc,double * sq_error_acc)2002 void UpdateBestSNR(const float* ref,
2003 const float* test,
2004 int length,
2005 int expected_delay,
2006 double* variance_acc,
2007 double* sq_error_acc) {
2008 double best_snr = std::numeric_limits<double>::min();
2009 double best_variance = 0;
2010 double best_sq_error = 0;
2011 // Search over a region of eight samples around the expected delay.
2012 for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4;
2013 ++delay) {
2014 double sq_error = 0;
2015 double variance = 0;
2016 for (int i = 0; i < length - delay; ++i) {
2017 double error = test[i + delay] - ref[i];
2018 sq_error += error * error;
2019 variance += ref[i] * ref[i];
2020 }
2021
2022 if (sq_error == 0) {
2023 *variance_acc += variance;
2024 return;
2025 }
2026 double snr = variance / sq_error;
2027 if (snr > best_snr) {
2028 best_snr = snr;
2029 best_variance = variance;
2030 best_sq_error = sq_error;
2031 }
2032 }
2033
2034 *variance_acc += best_variance;
2035 *sq_error_acc += best_sq_error;
2036 }
2037
2038 // Used to test a multitude of sample rate and channel combinations. It works
2039 // by first producing a set of reference files (in SetUpTestCase) that are
2040 // assumed to be correct, as the used parameters are verified by other tests
2041 // in this collection. Primarily the reference files are all produced at
2042 // "native" rates which do not involve any resampling.
2043
2044 // Each test pass produces an output file with a particular format. The output
2045 // is matched against the reference file closest to its internal processing
2046 // format. If necessary the output is resampled back to its process format.
2047 // Due to the resampling distortion, we don't expect identical results, but
2048 // enforce SNR thresholds which vary depending on the format. 0 is a special
2049 // case SNR which corresponds to inf, or zero error.
2050 typedef std::tr1::tuple<int, int, int, double> AudioProcessingTestData;
2051 class AudioProcessingTest
2052 : public testing::TestWithParam<AudioProcessingTestData> {
2053 public:
AudioProcessingTest()2054 AudioProcessingTest()
2055 : input_rate_(std::tr1::get<0>(GetParam())),
2056 output_rate_(std::tr1::get<1>(GetParam())),
2057 reverse_rate_(std::tr1::get<2>(GetParam())),
2058 expected_snr_(std::tr1::get<3>(GetParam())) {}
2059
~AudioProcessingTest()2060 virtual ~AudioProcessingTest() {}
2061
SetUpTestCase()2062 static void SetUpTestCase() {
2063 // Create all needed output reference files.
2064 const int kNativeRates[] = {8000, 16000, 32000};
2065 const size_t kNativeRatesSize =
2066 sizeof(kNativeRates) / sizeof(*kNativeRates);
2067 const int kNumChannels[] = {1, 2};
2068 const size_t kNumChannelsSize =
2069 sizeof(kNumChannels) / sizeof(*kNumChannels);
2070 for (size_t i = 0; i < kNativeRatesSize; ++i) {
2071 for (size_t j = 0; j < kNumChannelsSize; ++j) {
2072 for (size_t k = 0; k < kNumChannelsSize; ++k) {
2073 // The reference files always have matching input and output channels.
2074 ProcessFormat(kNativeRates[i],
2075 kNativeRates[i],
2076 kNativeRates[i],
2077 kNumChannels[j],
2078 kNumChannels[j],
2079 kNumChannels[k],
2080 "ref");
2081 }
2082 }
2083 }
2084 }
2085
2086 // Runs a process pass on files with the given parameters and dumps the output
2087 // to a file specified with |output_file_prefix|.
ProcessFormat(int input_rate,int output_rate,int reverse_rate,int num_input_channels,int num_output_channels,int num_reverse_channels,std::string output_file_prefix)2088 static void ProcessFormat(int input_rate,
2089 int output_rate,
2090 int reverse_rate,
2091 int num_input_channels,
2092 int num_output_channels,
2093 int num_reverse_channels,
2094 std::string output_file_prefix) {
2095 scoped_ptr<AudioProcessing> ap(AudioProcessing::Create());
2096 EnableAllAPComponents(ap.get());
2097 ap->Initialize(input_rate,
2098 output_rate,
2099 reverse_rate,
2100 LayoutFromChannels(num_input_channels),
2101 LayoutFromChannels(num_output_channels),
2102 LayoutFromChannels(num_reverse_channels));
2103
2104 FILE* far_file = fopen(ResourceFilePath("far", reverse_rate).c_str(), "rb");
2105 FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb");
2106 FILE* out_file = fopen(OutputFilePath(output_file_prefix,
2107 input_rate,
2108 output_rate,
2109 reverse_rate,
2110 num_input_channels,
2111 num_output_channels,
2112 num_reverse_channels).c_str(), "wb");
2113 ASSERT_TRUE(far_file != NULL);
2114 ASSERT_TRUE(near_file != NULL);
2115 ASSERT_TRUE(out_file != NULL);
2116
2117 ChannelBuffer<float> fwd_cb(SamplesFromRate(input_rate),
2118 num_input_channels);
2119 ChannelBuffer<float> rev_cb(SamplesFromRate(reverse_rate),
2120 num_reverse_channels);
2121 ChannelBuffer<float> out_cb(SamplesFromRate(output_rate),
2122 num_output_channels);
2123
2124 // Temporary buffers.
2125 const int max_length =
2126 2 * std::max(out_cb.samples_per_channel(),
2127 std::max(fwd_cb.samples_per_channel(),
2128 rev_cb.samples_per_channel()));
2129 scoped_ptr<float[]> float_data(new float[max_length]);
2130 scoped_ptr<int16_t[]> int_data(new int16_t[max_length]);
2131
2132 int analog_level = 127;
2133 while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) &&
2134 ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) {
2135 EXPECT_NOERR(ap->AnalyzeReverseStream(
2136 rev_cb.channels(),
2137 rev_cb.samples_per_channel(),
2138 reverse_rate,
2139 LayoutFromChannels(num_reverse_channels)));
2140
2141 EXPECT_NOERR(ap->set_stream_delay_ms(0));
2142 ap->echo_cancellation()->set_stream_drift_samples(0);
2143 EXPECT_NOERR(ap->gain_control()->set_stream_analog_level(analog_level));
2144
2145 EXPECT_NOERR(ap->ProcessStream(
2146 fwd_cb.channels(),
2147 fwd_cb.samples_per_channel(),
2148 input_rate,
2149 LayoutFromChannels(num_input_channels),
2150 output_rate,
2151 LayoutFromChannels(num_output_channels),
2152 out_cb.channels()));
2153
2154 Interleave(out_cb.channels(),
2155 out_cb.samples_per_channel(),
2156 out_cb.num_channels(),
2157 float_data.get());
2158 // Dump output to file.
2159 ASSERT_EQ(static_cast<size_t>(out_cb.length()),
2160 fwrite(float_data.get(), sizeof(float_data[0]),
2161 out_cb.length(), out_file));
2162
2163 analog_level = ap->gain_control()->stream_analog_level();
2164 }
2165 fclose(far_file);
2166 fclose(near_file);
2167 fclose(out_file);
2168 }
2169
2170 protected:
2171 int input_rate_;
2172 int output_rate_;
2173 int reverse_rate_;
2174 double expected_snr_;
2175 };
2176
TEST_P(AudioProcessingTest,Formats)2177 TEST_P(AudioProcessingTest, Formats) {
2178 struct ChannelFormat {
2179 int num_input;
2180 int num_output;
2181 int num_reverse;
2182 };
2183 ChannelFormat cf[] = {
2184 {1, 1, 1},
2185 {1, 1, 2},
2186 {2, 1, 1},
2187 {2, 1, 2},
2188 {2, 2, 1},
2189 {2, 2, 2},
2190 };
2191 size_t channel_format_size = sizeof(cf) / sizeof(*cf);
2192
2193 for (size_t i = 0; i < channel_format_size; ++i) {
2194 ProcessFormat(input_rate_,
2195 output_rate_,
2196 reverse_rate_,
2197 cf[i].num_input,
2198 cf[i].num_output,
2199 cf[i].num_reverse,
2200 "out");
2201 int min_ref_rate = std::min(input_rate_, output_rate_);
2202 int ref_rate;
2203 if (min_ref_rate > 16000) {
2204 ref_rate = 32000;
2205 } else if (min_ref_rate > 8000) {
2206 ref_rate = 16000;
2207 } else {
2208 ref_rate = 8000;
2209 }
2210 #ifdef WEBRTC_AUDIOPROC_FIXED_PROFILE
2211 ref_rate = std::min(ref_rate, 16000);
2212 #endif
2213
2214 FILE* out_file = fopen(OutputFilePath("out",
2215 input_rate_,
2216 output_rate_,
2217 reverse_rate_,
2218 cf[i].num_input,
2219 cf[i].num_output,
2220 cf[i].num_reverse).c_str(), "rb");
2221 // The reference files always have matching input and output channels.
2222 FILE* ref_file = fopen(OutputFilePath("ref",
2223 ref_rate,
2224 ref_rate,
2225 ref_rate,
2226 cf[i].num_output,
2227 cf[i].num_output,
2228 cf[i].num_reverse).c_str(), "rb");
2229 ASSERT_TRUE(out_file != NULL);
2230 ASSERT_TRUE(ref_file != NULL);
2231
2232 const int ref_length = SamplesFromRate(ref_rate) * cf[i].num_output;
2233 const int out_length = SamplesFromRate(output_rate_) * cf[i].num_output;
2234 // Data from the reference file.
2235 scoped_ptr<float[]> ref_data(new float[ref_length]);
2236 // Data from the output file.
2237 scoped_ptr<float[]> out_data(new float[out_length]);
2238 // Data from the resampled output, in case the reference and output rates
2239 // don't match.
2240 scoped_ptr<float[]> cmp_data(new float[ref_length]);
2241
2242 PushResampler<float> resampler;
2243 resampler.InitializeIfNeeded(output_rate_, ref_rate, cf[i].num_output);
2244
2245 // Compute the resampling delay of the output relative to the reference,
2246 // to find the region over which we should search for the best SNR.
2247 float expected_delay_sec = 0;
2248 if (input_rate_ != ref_rate) {
2249 // Input resampling delay.
2250 expected_delay_sec +=
2251 PushSincResampler::AlgorithmicDelaySeconds(input_rate_);
2252 }
2253 if (output_rate_ != ref_rate) {
2254 // Output resampling delay.
2255 expected_delay_sec +=
2256 PushSincResampler::AlgorithmicDelaySeconds(ref_rate);
2257 // Delay of converting the output back to its processing rate for testing.
2258 expected_delay_sec +=
2259 PushSincResampler::AlgorithmicDelaySeconds(output_rate_);
2260 }
2261 int expected_delay = floor(expected_delay_sec * ref_rate + 0.5f) *
2262 cf[i].num_output;
2263
2264 double variance = 0;
2265 double sq_error = 0;
2266 while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) &&
2267 fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) {
2268 float* out_ptr = out_data.get();
2269 if (output_rate_ != ref_rate) {
2270 // Resample the output back to its internal processing rate if necssary.
2271 ASSERT_EQ(ref_length, resampler.Resample(out_ptr,
2272 out_length,
2273 cmp_data.get(),
2274 ref_length));
2275 out_ptr = cmp_data.get();
2276 }
2277
2278 // Update the |sq_error| and |variance| accumulators with the highest SNR
2279 // of reference vs output.
2280 UpdateBestSNR(ref_data.get(),
2281 out_ptr,
2282 ref_length,
2283 expected_delay,
2284 &variance,
2285 &sq_error);
2286 }
2287
2288 std::cout << "(" << input_rate_ << ", "
2289 << output_rate_ << ", "
2290 << reverse_rate_ << ", "
2291 << cf[i].num_input << ", "
2292 << cf[i].num_output << ", "
2293 << cf[i].num_reverse << "): ";
2294 if (sq_error > 0) {
2295 double snr = 10 * log10(variance / sq_error);
2296 EXPECT_GE(snr, expected_snr_);
2297 EXPECT_NE(0, expected_snr_);
2298 std::cout << "SNR=" << snr << " dB" << std::endl;
2299 } else {
2300 EXPECT_EQ(expected_snr_, 0);
2301 std::cout << "SNR=" << "inf dB" << std::endl;
2302 }
2303
2304 fclose(out_file);
2305 fclose(ref_file);
2306 }
2307 }
2308
2309 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
2310 INSTANTIATE_TEST_CASE_P(
2311 CommonFormats, AudioProcessingTest, testing::Values(
2312 std::tr1::make_tuple(48000, 48000, 48000, 25),
2313 std::tr1::make_tuple(48000, 48000, 32000, 25),
2314 std::tr1::make_tuple(48000, 48000, 16000, 25),
2315 std::tr1::make_tuple(48000, 44100, 48000, 20),
2316 std::tr1::make_tuple(48000, 44100, 32000, 20),
2317 std::tr1::make_tuple(48000, 44100, 16000, 20),
2318 std::tr1::make_tuple(48000, 32000, 48000, 25),
2319 std::tr1::make_tuple(48000, 32000, 32000, 25),
2320 std::tr1::make_tuple(48000, 32000, 16000, 25),
2321 std::tr1::make_tuple(48000, 16000, 48000, 25),
2322 std::tr1::make_tuple(48000, 16000, 32000, 25),
2323 std::tr1::make_tuple(48000, 16000, 16000, 25),
2324
2325 std::tr1::make_tuple(44100, 48000, 48000, 20),
2326 std::tr1::make_tuple(44100, 48000, 32000, 20),
2327 std::tr1::make_tuple(44100, 48000, 16000, 20),
2328 std::tr1::make_tuple(44100, 44100, 48000, 20),
2329 std::tr1::make_tuple(44100, 44100, 32000, 20),
2330 std::tr1::make_tuple(44100, 44100, 16000, 20),
2331 std::tr1::make_tuple(44100, 32000, 48000, 20),
2332 std::tr1::make_tuple(44100, 32000, 32000, 20),
2333 std::tr1::make_tuple(44100, 32000, 16000, 20),
2334 std::tr1::make_tuple(44100, 16000, 48000, 20),
2335 std::tr1::make_tuple(44100, 16000, 32000, 20),
2336 std::tr1::make_tuple(44100, 16000, 16000, 20),
2337
2338 std::tr1::make_tuple(32000, 48000, 48000, 25),
2339 std::tr1::make_tuple(32000, 48000, 32000, 25),
2340 std::tr1::make_tuple(32000, 48000, 16000, 25),
2341 std::tr1::make_tuple(32000, 44100, 48000, 20),
2342 std::tr1::make_tuple(32000, 44100, 32000, 20),
2343 std::tr1::make_tuple(32000, 44100, 16000, 20),
2344 std::tr1::make_tuple(32000, 32000, 48000, 30),
2345 std::tr1::make_tuple(32000, 32000, 32000, 0),
2346 std::tr1::make_tuple(32000, 32000, 16000, 30),
2347 std::tr1::make_tuple(32000, 16000, 48000, 25),
2348 std::tr1::make_tuple(32000, 16000, 32000, 25),
2349 std::tr1::make_tuple(32000, 16000, 16000, 25),
2350
2351 std::tr1::make_tuple(16000, 48000, 48000, 25),
2352 std::tr1::make_tuple(16000, 48000, 32000, 25),
2353 std::tr1::make_tuple(16000, 48000, 16000, 25),
2354 std::tr1::make_tuple(16000, 44100, 48000, 15),
2355 std::tr1::make_tuple(16000, 44100, 32000, 15),
2356 std::tr1::make_tuple(16000, 44100, 16000, 15),
2357 std::tr1::make_tuple(16000, 32000, 48000, 25),
2358 std::tr1::make_tuple(16000, 32000, 32000, 25),
2359 std::tr1::make_tuple(16000, 32000, 16000, 25),
2360 std::tr1::make_tuple(16000, 16000, 48000, 30),
2361 std::tr1::make_tuple(16000, 16000, 32000, 30),
2362 std::tr1::make_tuple(16000, 16000, 16000, 0)));
2363
2364 #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
2365 INSTANTIATE_TEST_CASE_P(
2366 CommonFormats, AudioProcessingTest, testing::Values(
2367 std::tr1::make_tuple(48000, 48000, 48000, 20),
2368 std::tr1::make_tuple(48000, 48000, 32000, 20),
2369 std::tr1::make_tuple(48000, 48000, 16000, 20),
2370 std::tr1::make_tuple(48000, 44100, 48000, 15),
2371 std::tr1::make_tuple(48000, 44100, 32000, 15),
2372 std::tr1::make_tuple(48000, 44100, 16000, 15),
2373 std::tr1::make_tuple(48000, 32000, 48000, 20),
2374 std::tr1::make_tuple(48000, 32000, 32000, 20),
2375 std::tr1::make_tuple(48000, 32000, 16000, 20),
2376 std::tr1::make_tuple(48000, 16000, 48000, 20),
2377 std::tr1::make_tuple(48000, 16000, 32000, 20),
2378 std::tr1::make_tuple(48000, 16000, 16000, 20),
2379
2380 std::tr1::make_tuple(44100, 48000, 48000, 19),
2381 std::tr1::make_tuple(44100, 48000, 32000, 19),
2382 std::tr1::make_tuple(44100, 48000, 16000, 19),
2383 std::tr1::make_tuple(44100, 44100, 48000, 15),
2384 std::tr1::make_tuple(44100, 44100, 32000, 15),
2385 std::tr1::make_tuple(44100, 44100, 16000, 15),
2386 std::tr1::make_tuple(44100, 32000, 48000, 19),
2387 std::tr1::make_tuple(44100, 32000, 32000, 19),
2388 std::tr1::make_tuple(44100, 32000, 16000, 19),
2389 std::tr1::make_tuple(44100, 16000, 48000, 19),
2390 std::tr1::make_tuple(44100, 16000, 32000, 19),
2391 std::tr1::make_tuple(44100, 16000, 16000, 19),
2392
2393 std::tr1::make_tuple(32000, 48000, 48000, 19),
2394 std::tr1::make_tuple(32000, 48000, 32000, 19),
2395 std::tr1::make_tuple(32000, 48000, 16000, 19),
2396 std::tr1::make_tuple(32000, 44100, 48000, 15),
2397 std::tr1::make_tuple(32000, 44100, 32000, 15),
2398 std::tr1::make_tuple(32000, 44100, 16000, 15),
2399 std::tr1::make_tuple(32000, 32000, 48000, 19),
2400 std::tr1::make_tuple(32000, 32000, 32000, 19),
2401 std::tr1::make_tuple(32000, 32000, 16000, 19),
2402 std::tr1::make_tuple(32000, 16000, 48000, 19),
2403 std::tr1::make_tuple(32000, 16000, 32000, 19),
2404 std::tr1::make_tuple(32000, 16000, 16000, 19),
2405
2406 std::tr1::make_tuple(16000, 48000, 48000, 25),
2407 std::tr1::make_tuple(16000, 48000, 32000, 25),
2408 std::tr1::make_tuple(16000, 48000, 16000, 25),
2409 std::tr1::make_tuple(16000, 44100, 48000, 15),
2410 std::tr1::make_tuple(16000, 44100, 32000, 15),
2411 std::tr1::make_tuple(16000, 44100, 16000, 15),
2412 std::tr1::make_tuple(16000, 32000, 48000, 25),
2413 std::tr1::make_tuple(16000, 32000, 32000, 25),
2414 std::tr1::make_tuple(16000, 32000, 16000, 25),
2415 std::tr1::make_tuple(16000, 16000, 48000, 30),
2416 std::tr1::make_tuple(16000, 16000, 32000, 30),
2417 std::tr1::make_tuple(16000, 16000, 16000, 0)));
2418 #endif
2419
2420 // TODO(henrike): re-implement functionality lost when removing the old main
2421 // function. See
2422 // https://code.google.com/p/webrtc/issues/detail?id=1981
2423
2424 } // namespace
2425 } // namespace webrtc
2426