• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
12 
13 #include <algorithm>
14 
15 #include "webrtc/base/checks.h"
16 
17 namespace webrtc {
18 namespace {
19 
20 const size_t kMaxLength = 320;
21 const size_t kNumChannels = 1;
22 
23 const double kDefaultVoiceValue = 1.0;
24 const double kNeutralProbability = 0.5;
25 const double kLowProbability = 0.01;
26 
27 }  // namespace
28 
VoiceActivityDetector()29 VoiceActivityDetector::VoiceActivityDetector()
30     : last_voice_probability_(kDefaultVoiceValue),
31       standalone_vad_(StandaloneVad::Create()) {
32 }
33 
34 // Because ISAC has a different chunk length, it updates
35 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
36 // Otherwise it clears them.
ProcessChunk(const int16_t * audio,size_t length,int sample_rate_hz)37 void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
38                                          size_t length,
39                                          int sample_rate_hz) {
40   RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100);
41   RTC_DCHECK_LE(length, kMaxLength);
42   // Resample to the required rate.
43   const int16_t* resampled_ptr = audio;
44   if (sample_rate_hz != kSampleRateHz) {
45     RTC_CHECK_EQ(
46         resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
47         0);
48     resampler_.Push(audio, length, resampled_, kLength10Ms, length);
49     resampled_ptr = resampled_;
50   }
51   RTC_DCHECK_EQ(length, kLength10Ms);
52 
53   // Each chunk needs to be passed into |standalone_vad_|, because internally it
54   // buffers the audio and processes it all at once when GetActivity() is
55   // called.
56   RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
57 
58   audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
59 
60   chunkwise_voice_probabilities_.resize(features_.num_frames);
61   chunkwise_rms_.resize(features_.num_frames);
62   std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
63             chunkwise_rms_.begin());
64   if (features_.num_frames > 0) {
65     if (features_.silence) {
66       // The other features are invalid, so set the voice probabilities to an
67       // arbitrary low value.
68       std::fill(chunkwise_voice_probabilities_.begin(),
69                 chunkwise_voice_probabilities_.end(), kLowProbability);
70     } else {
71       std::fill(chunkwise_voice_probabilities_.begin(),
72                 chunkwise_voice_probabilities_.end(), kNeutralProbability);
73       RTC_CHECK_GE(
74           standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
75                                        chunkwise_voice_probabilities_.size()),
76           0);
77       RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
78                        features_, &chunkwise_voice_probabilities_[0]),
79                    0);
80     }
81     last_voice_probability_ = chunkwise_voice_probabilities_.back();
82   }
83 }
84 
85 }  // namespace webrtc
86