1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "voice_detection_impl.h"
12
13 #include <cassert>
14
15 #include "critical_section_wrapper.h"
16 #include "webrtc_vad.h"
17
18 #include "audio_processing_impl.h"
19 #include "audio_buffer.h"
20
21 namespace webrtc {
22
23 typedef VadInst Handle;
24
25 namespace {
MapSetting(VoiceDetection::Likelihood likelihood)26 WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) {
27 switch (likelihood) {
28 case VoiceDetection::kVeryLowLikelihood:
29 return 3;
30 break;
31 case VoiceDetection::kLowLikelihood:
32 return 2;
33 break;
34 case VoiceDetection::kModerateLikelihood:
35 return 1;
36 break;
37 case VoiceDetection::kHighLikelihood:
38 return 0;
39 break;
40 default:
41 return -1;
42 }
43 }
44 } // namespace
45
46
VoiceDetectionImpl(const AudioProcessingImpl * apm)47 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
48 : ProcessingComponent(apm),
49 apm_(apm),
50 stream_has_voice_(false),
51 using_external_vad_(false),
52 likelihood_(kLowLikelihood),
53 frame_size_ms_(10),
54 frame_size_samples_(0) {}
55
~VoiceDetectionImpl()56 VoiceDetectionImpl::~VoiceDetectionImpl() {}
57
ProcessCaptureAudio(AudioBuffer * audio)58 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
59 if (!is_component_enabled()) {
60 return apm_->kNoError;
61 }
62
63 if (using_external_vad_) {
64 using_external_vad_ = false;
65 return apm_->kNoError;
66 }
67 assert(audio->samples_per_split_channel() <= 160);
68
69 WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
70 if (audio->num_channels() > 1) {
71 audio->CopyAndMixLowPass(1);
72 mixed_data = audio->mixed_low_pass_data(0);
73 }
74
75 // TODO(ajm): concatenate data in frame buffer here.
76
77 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
78 apm_->split_sample_rate_hz(),
79 mixed_data,
80 frame_size_samples_);
81 if (vad_ret == 0) {
82 stream_has_voice_ = false;
83 audio->set_activity(AudioFrame::kVadPassive);
84 } else if (vad_ret == 1) {
85 stream_has_voice_ = true;
86 audio->set_activity(AudioFrame::kVadActive);
87 } else {
88 return apm_->kUnspecifiedError;
89 }
90
91 return apm_->kNoError;
92 }
93
Enable(bool enable)94 int VoiceDetectionImpl::Enable(bool enable) {
95 CriticalSectionScoped crit_scoped(*apm_->crit());
96 return EnableComponent(enable);
97 }
98
is_enabled() const99 bool VoiceDetectionImpl::is_enabled() const {
100 return is_component_enabled();
101 }
102
set_stream_has_voice(bool has_voice)103 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
104 using_external_vad_ = true;
105 stream_has_voice_ = has_voice;
106 return apm_->kNoError;
107 }
108
stream_has_voice() const109 bool VoiceDetectionImpl::stream_has_voice() const {
110 // TODO(ajm): enable this assertion?
111 //assert(using_external_vad_ || is_component_enabled());
112 return stream_has_voice_;
113 }
114
set_likelihood(VoiceDetection::Likelihood likelihood)115 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
116 CriticalSectionScoped crit_scoped(*apm_->crit());
117 if (MapSetting(likelihood) == -1) {
118 return apm_->kBadParameterError;
119 }
120
121 likelihood_ = likelihood;
122 return Configure();
123 }
124
likelihood() const125 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
126 return likelihood_;
127 }
128
set_frame_size_ms(int size)129 int VoiceDetectionImpl::set_frame_size_ms(int size) {
130 CriticalSectionScoped crit_scoped(*apm_->crit());
131 assert(size == 10); // TODO(ajm): remove when supported.
132 if (size != 10 &&
133 size != 20 &&
134 size != 30) {
135 return apm_->kBadParameterError;
136 }
137
138 frame_size_ms_ = size;
139
140 return Initialize();
141 }
142
frame_size_ms() const143 int VoiceDetectionImpl::frame_size_ms() const {
144 return frame_size_ms_;
145 }
146
Initialize()147 int VoiceDetectionImpl::Initialize() {
148 int err = ProcessingComponent::Initialize();
149 if (err != apm_->kNoError || !is_component_enabled()) {
150 return err;
151 }
152
153 using_external_vad_ = false;
154 frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
155 // TODO(ajm): intialize frame buffer here.
156
157 return apm_->kNoError;
158 }
159
get_version(char * version,int version_len_bytes) const160 int VoiceDetectionImpl::get_version(char* version,
161 int version_len_bytes) const {
162 if (WebRtcVad_get_version(version, version_len_bytes) != 0) {
163 return apm_->kBadParameterError;
164 }
165
166 return apm_->kNoError;
167 }
168
CreateHandle() const169 void* VoiceDetectionImpl::CreateHandle() const {
170 Handle* handle = NULL;
171 if (WebRtcVad_Create(&handle) != apm_->kNoError) {
172 handle = NULL;
173 } else {
174 assert(handle != NULL);
175 }
176
177 return handle;
178 }
179
DestroyHandle(void * handle) const180 int VoiceDetectionImpl::DestroyHandle(void* handle) const {
181 return WebRtcVad_Free(static_cast<Handle*>(handle));
182 }
183
InitializeHandle(void * handle) const184 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
185 return WebRtcVad_Init(static_cast<Handle*>(handle));
186 }
187
ConfigureHandle(void * handle) const188 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
189 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
190 MapSetting(likelihood_));
191 }
192
num_handles_required() const193 int VoiceDetectionImpl::num_handles_required() const {
194 return 1;
195 }
196
GetHandleError(void * handle) const197 int VoiceDetectionImpl::GetHandleError(void* handle) const {
198 // The VAD has no get_error() function.
199 assert(handle != NULL);
200 return apm_->kUnspecifiedError;
201 }
202 } // namespace webrtc
203