1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * This header file includes the descriptions of the core VAD calls. 13 */ 14 15 #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_ 16 #define COMMON_AUDIO_VAD_VAD_CORE_H_ 17 18 #include "common_audio/signal_processing/include/signal_processing_library.h" 19 20 // TODO(https://bugs.webrtc.org/14476): When converted to C++, remove the macro. 21 #if defined(__cplusplus) 22 #define CONSTEXPR_INT(x) constexpr int x 23 #else 24 #define CONSTEXPR_INT(x) enum { x } 25 #endif 26 27 CONSTEXPR_INT(kNumChannels = 6); // Number of frequency bands (named channels). 28 CONSTEXPR_INT( 29 kNumGaussians = 2); // Number of Gaussians per channel in the GMM. 30 CONSTEXPR_INT(kTableSize = kNumChannels * kNumGaussians); 31 CONSTEXPR_INT( 32 kMinEnergy = 10); // Minimum energy required to trigger audio signal. 33 34 typedef struct VadInstT_ { 35 int vad; 36 int32_t downsampling_filter_states[4]; 37 WebRtcSpl_State48khzTo8khz state_48_to_8; 38 int16_t noise_means[kTableSize]; 39 int16_t speech_means[kTableSize]; 40 int16_t noise_stds[kTableSize]; 41 int16_t speech_stds[kTableSize]; 42 // TODO(bjornv): Change to `frame_count`. 43 int32_t frame_counter; 44 int16_t over_hang; // Over Hang 45 int16_t num_of_speech; 46 // TODO(bjornv): Change to `age_vector`. 47 int16_t index_vector[16 * kNumChannels]; 48 int16_t low_value_vector[16 * kNumChannels]; 49 // TODO(bjornv): Change to `median`. 50 int16_t mean_value[kNumChannels]; 51 int16_t upper_state[5]; 52 int16_t lower_state[5]; 53 int16_t hp_filter_state[4]; 54 int16_t over_hang_max_1[3]; 55 int16_t over_hang_max_2[3]; 56 int16_t individual[3]; 57 int16_t total[3]; 58 59 int init_flag; 60 } VadInstT; 61 62 // Initializes the core VAD component. The default aggressiveness mode is 63 // controlled by `kDefaultMode` in vad_core.c. 64 // 65 // - self [i/o] : Instance that should be initialized 66 // 67 // returns : 0 (OK), -1 (null pointer in or if the default mode can't be 68 // set) 69 int WebRtcVad_InitCore(VadInstT* self); 70 71 /**************************************************************************** 72 * WebRtcVad_set_mode_core(...) 73 * 74 * This function changes the VAD settings 75 * 76 * Input: 77 * - inst : VAD instance 78 * - mode : Aggressiveness degree 79 * 0 (High quality) - 3 (Highly aggressive) 80 * 81 * Output: 82 * - inst : Changed instance 83 * 84 * Return value : 0 - Ok 85 * -1 - Error 86 */ 87 88 int WebRtcVad_set_mode_core(VadInstT* self, int mode); 89 90 /**************************************************************************** 91 * WebRtcVad_CalcVad48khz(...) 92 * WebRtcVad_CalcVad32khz(...) 93 * WebRtcVad_CalcVad16khz(...) 94 * WebRtcVad_CalcVad8khz(...) 95 * 96 * Calculate probability for active speech and make VAD decision. 97 * 98 * Input: 99 * - inst : Instance that should be initialized 100 * - speech_frame : Input speech frame 101 * - frame_length : Number of input samples 102 * 103 * Output: 104 * - inst : Updated filter states etc. 105 * 106 * Return value : VAD decision 107 * 0 - No active speech 108 * 1-6 - Active speech 109 */ 110 int WebRtcVad_CalcVad48khz(VadInstT* inst, 111 const int16_t* speech_frame, 112 size_t frame_length); 113 int WebRtcVad_CalcVad32khz(VadInstT* inst, 114 const int16_t* speech_frame, 115 size_t frame_length); 116 int WebRtcVad_CalcVad16khz(VadInstT* inst, 117 const int16_t* speech_frame, 118 size_t frame_length); 119 int WebRtcVad_CalcVad8khz(VadInstT* inst, 120 const int16_t* speech_frame, 121 size_t frame_length); 122 123 #endif // COMMON_AUDIO_VAD_VAD_CORE_H_ 124