• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/vad/voice_activity_detector.h"
12 
13 #include <algorithm>
14 
15 #include "rtc_base/checks.h"
16 
17 namespace webrtc {
18 namespace {
19 
20 const size_t kNumChannels = 1;
21 
22 const double kDefaultVoiceValue = 1.0;
23 const double kNeutralProbability = 0.5;
24 const double kLowProbability = 0.01;
25 
26 }  // namespace
27 
VoiceActivityDetector()28 VoiceActivityDetector::VoiceActivityDetector()
29     : last_voice_probability_(kDefaultVoiceValue),
30       standalone_vad_(StandaloneVad::Create()) {}
31 
32 VoiceActivityDetector::~VoiceActivityDetector() = default;
33 
34 // Because ISAC has a different chunk length, it updates
35 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
36 // Otherwise it clears them.
ProcessChunk(const int16_t * audio,size_t length,int sample_rate_hz)37 void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
38                                          size_t length,
39                                          int sample_rate_hz) {
40   RTC_DCHECK_EQ(length, sample_rate_hz / 100);
41   // Resample to the required rate.
42   const int16_t* resampled_ptr = audio;
43   if (sample_rate_hz != kSampleRateHz) {
44     RTC_CHECK_EQ(
45         resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
46         0);
47     resampler_.Push(audio, length, resampled_, kLength10Ms, length);
48     resampled_ptr = resampled_;
49   }
50   RTC_DCHECK_EQ(length, kLength10Ms);
51 
52   // Each chunk needs to be passed into |standalone_vad_|, because internally it
53   // buffers the audio and processes it all at once when GetActivity() is
54   // called.
55   RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
56 
57   audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
58 
59   chunkwise_voice_probabilities_.resize(features_.num_frames);
60   chunkwise_rms_.resize(features_.num_frames);
61   std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
62             chunkwise_rms_.begin());
63   if (features_.num_frames > 0) {
64     if (features_.silence) {
65       // The other features are invalid, so set the voice probabilities to an
66       // arbitrary low value.
67       std::fill(chunkwise_voice_probabilities_.begin(),
68                 chunkwise_voice_probabilities_.end(), kLowProbability);
69     } else {
70       std::fill(chunkwise_voice_probabilities_.begin(),
71                 chunkwise_voice_probabilities_.end(), kNeutralProbability);
72       RTC_CHECK_GE(
73           standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
74                                        chunkwise_voice_probabilities_.size()),
75           0);
76       RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
77                        features_, &chunkwise_voice_probabilities_[0]),
78                    0);
79     }
80     last_voice_probability_ = chunkwise_voice_probabilities_.back();
81   }
82 }
83 
84 }  // namespace webrtc
85