1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 /* 13 * This header file includes the descriptions of the core VAD calls. 14 */ 15 16 #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ 17 #define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ 18 19 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 #include "webrtc/typedefs.h" 21 22 enum { kNumChannels = 6 }; // Number of frequency bands (named channels). 23 enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. 24 enum { kTableSize = kNumChannels * kNumGaussians }; 25 enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. 26 27 typedef struct VadInstT_ 28 { 29 30 int vad; 31 int32_t downsampling_filter_states[4]; 32 WebRtcSpl_State48khzTo8khz state_48_to_8; 33 int16_t noise_means[kTableSize]; 34 int16_t speech_means[kTableSize]; 35 int16_t noise_stds[kTableSize]; 36 int16_t speech_stds[kTableSize]; 37 // TODO(bjornv): Change to |frame_count|. 38 int32_t frame_counter; 39 int16_t over_hang; // Over Hang 40 int16_t num_of_speech; 41 // TODO(bjornv): Change to |age_vector|. 42 int16_t index_vector[16 * kNumChannels]; 43 int16_t low_value_vector[16 * kNumChannels]; 44 // TODO(bjornv): Change to |median|. 45 int16_t mean_value[kNumChannels]; 46 int16_t upper_state[5]; 47 int16_t lower_state[5]; 48 int16_t hp_filter_state[4]; 49 int16_t over_hang_max_1[3]; 50 int16_t over_hang_max_2[3]; 51 int16_t individual[3]; 52 int16_t total[3]; 53 54 int init_flag; 55 56 } VadInstT; 57 58 // Initializes the core VAD component. The default aggressiveness mode is 59 // controlled by |kDefaultMode| in vad_core.c. 60 // 61 // - self [i/o] : Instance that should be initialized 62 // 63 // returns : 0 (OK), -1 (NULL pointer in or if the default mode can't be 64 // set) 65 int WebRtcVad_InitCore(VadInstT* self); 66 67 /**************************************************************************** 68 * WebRtcVad_set_mode_core(...) 69 * 70 * This function changes the VAD settings 71 * 72 * Input: 73 * - inst : VAD instance 74 * - mode : Aggressiveness degree 75 * 0 (High quality) - 3 (Highly aggressive) 76 * 77 * Output: 78 * - inst : Changed instance 79 * 80 * Return value : 0 - Ok 81 * -1 - Error 82 */ 83 84 int WebRtcVad_set_mode_core(VadInstT* self, int mode); 85 86 /**************************************************************************** 87 * WebRtcVad_CalcVad48khz(...) 88 * WebRtcVad_CalcVad32khz(...) 89 * WebRtcVad_CalcVad16khz(...) 90 * WebRtcVad_CalcVad8khz(...) 91 * 92 * Calculate probability for active speech and make VAD decision. 93 * 94 * Input: 95 * - inst : Instance that should be initialized 96 * - speech_frame : Input speech frame 97 * - frame_length : Number of input samples 98 * 99 * Output: 100 * - inst : Updated filter states etc. 101 * 102 * Return value : VAD decision 103 * 0 - No active speech 104 * 1-6 - Active speech 105 */ 106 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, 107 size_t frame_length); 108 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, 109 size_t frame_length); 110 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, 111 size_t frame_length); 112 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, 113 size_t frame_length); 114 115 #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ 116