1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/voice_detection_impl.h" 12 13 #include "webrtc/common_audio/vad/include/webrtc_vad.h" 14 #include "webrtc/modules/audio_processing/audio_buffer.h" 15 16 namespace webrtc { 17 class VoiceDetectionImpl::Vad { 18 public: Vad()19 Vad() { 20 state_ = WebRtcVad_Create(); 21 RTC_CHECK(state_); 22 int error = WebRtcVad_Init(state_); 23 RTC_DCHECK_EQ(0, error); 24 } ~Vad()25 ~Vad() { 26 WebRtcVad_Free(state_); 27 } state()28 VadInst* state() { return state_; } 29 private: 30 VadInst* state_ = nullptr; 31 RTC_DISALLOW_COPY_AND_ASSIGN(Vad); 32 }; 33 VoiceDetectionImpl(rtc::CriticalSection * crit)34VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) 35 : crit_(crit) { 36 RTC_DCHECK(crit); 37 } 38 ~VoiceDetectionImpl()39VoiceDetectionImpl::~VoiceDetectionImpl() {} 40 Initialize(int sample_rate_hz)41void VoiceDetectionImpl::Initialize(int sample_rate_hz) { 42 rtc::CritScope cs(crit_); 43 sample_rate_hz_ = sample_rate_hz; 44 rtc::scoped_ptr<Vad> new_vad; 45 if (enabled_) { 46 new_vad.reset(new Vad()); 47 } 48 vad_.swap(new_vad); 49 using_external_vad_ = false; 50 frame_size_samples_ = 51 static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; 52 set_likelihood(likelihood_); 53 } 54 ProcessCaptureAudio(AudioBuffer * audio)55void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 56 rtc::CritScope cs(crit_); 57 if (!enabled_) { 58 return; 59 } 60 if (using_external_vad_) { 61 using_external_vad_ = false; 62 return; 63 } 64 65 RTC_DCHECK_GE(160u, audio->num_frames_per_band()); 66 // TODO(ajm): concatenate data in frame buffer here. 67 int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, 68 audio->mixed_low_pass_data(), 69 frame_size_samples_); 70 if (vad_ret == 0) { 71 stream_has_voice_ = false; 72 audio->set_activity(AudioFrame::kVadPassive); 73 } else if (vad_ret == 1) { 74 stream_has_voice_ = true; 75 audio->set_activity(AudioFrame::kVadActive); 76 } else { 77 RTC_NOTREACHED(); 78 } 79 } 80 Enable(bool enable)81int VoiceDetectionImpl::Enable(bool enable) { 82 rtc::CritScope cs(crit_); 83 if (enabled_ != enable) { 84 enabled_ = enable; 85 Initialize(sample_rate_hz_); 86 } 87 return AudioProcessing::kNoError; 88 } 89 is_enabled() const90bool VoiceDetectionImpl::is_enabled() const { 91 rtc::CritScope cs(crit_); 92 return enabled_; 93 } 94 set_stream_has_voice(bool has_voice)95int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 96 rtc::CritScope cs(crit_); 97 using_external_vad_ = true; 98 stream_has_voice_ = has_voice; 99 return AudioProcessing::kNoError; 100 } 101 stream_has_voice() const102bool VoiceDetectionImpl::stream_has_voice() const { 103 rtc::CritScope cs(crit_); 104 // TODO(ajm): enable this assertion? 105 //assert(using_external_vad_ || is_component_enabled()); 106 return stream_has_voice_; 107 } 108 set_likelihood(VoiceDetection::Likelihood likelihood)109int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 110 rtc::CritScope cs(crit_); 111 likelihood_ = likelihood; 112 if (enabled_) { 113 int mode = 2; 114 switch (likelihood) { 115 case VoiceDetection::kVeryLowLikelihood: 116 mode = 3; 117 break; 118 case VoiceDetection::kLowLikelihood: 119 mode = 2; 120 break; 121 case VoiceDetection::kModerateLikelihood: 122 mode = 1; 123 break; 124 case VoiceDetection::kHighLikelihood: 125 mode = 0; 126 break; 127 default: 128 RTC_NOTREACHED(); 129 break; 130 } 131 int error = WebRtcVad_set_mode(vad_->state(), mode); 132 RTC_DCHECK_EQ(0, error); 133 } 134 return AudioProcessing::kNoError; 135 } 136 likelihood() const137VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 138 rtc::CritScope cs(crit_); 139 return likelihood_; 140 } 141 set_frame_size_ms(int size)142int VoiceDetectionImpl::set_frame_size_ms(int size) { 143 rtc::CritScope cs(crit_); 144 RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. 145 frame_size_ms_ = size; 146 Initialize(sample_rate_hz_); 147 return AudioProcessing::kNoError; 148 } 149 frame_size_ms() const150int VoiceDetectionImpl::frame_size_ms() const { 151 rtc::CritScope cs(crit_); 152 return frame_size_ms_; 153 } 154 } // namespace webrtc 155