• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This header file includes the descriptions of the core VAD calls.
13  */
14 
15 #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
16 #define COMMON_AUDIO_VAD_VAD_CORE_H_
17 
18 #include "common_audio/signal_processing/include/signal_processing_library.h"
19 
20 // TODO(https://bugs.webrtc.org/14476): When converted to C++, remove the macro.
21 #if defined(__cplusplus)
22 #define CONSTEXPR_INT(x) constexpr int x
23 #else
24 #define CONSTEXPR_INT(x) enum { x }
25 #endif
26 
27 CONSTEXPR_INT(kNumChannels = 6);  // Number of frequency bands (named channels).
28 CONSTEXPR_INT(
29     kNumGaussians = 2);  // Number of Gaussians per channel in the GMM.
30 CONSTEXPR_INT(kTableSize = kNumChannels * kNumGaussians);
31 CONSTEXPR_INT(
32     kMinEnergy = 10);  // Minimum energy required to trigger audio signal.
33 
34 typedef struct VadInstT_ {
35   int vad;
36   int32_t downsampling_filter_states[4];
37   WebRtcSpl_State48khzTo8khz state_48_to_8;
38   int16_t noise_means[kTableSize];
39   int16_t speech_means[kTableSize];
40   int16_t noise_stds[kTableSize];
41   int16_t speech_stds[kTableSize];
42   // TODO(bjornv): Change to `frame_count`.
43   int32_t frame_counter;
44   int16_t over_hang;  // Over Hang
45   int16_t num_of_speech;
46   // TODO(bjornv): Change to `age_vector`.
47   int16_t index_vector[16 * kNumChannels];
48   int16_t low_value_vector[16 * kNumChannels];
49   // TODO(bjornv): Change to `median`.
50   int16_t mean_value[kNumChannels];
51   int16_t upper_state[5];
52   int16_t lower_state[5];
53   int16_t hp_filter_state[4];
54   int16_t over_hang_max_1[3];
55   int16_t over_hang_max_2[3];
56   int16_t individual[3];
57   int16_t total[3];
58 
59   int init_flag;
60 } VadInstT;
61 
62 // Initializes the core VAD component. The default aggressiveness mode is
63 // controlled by `kDefaultMode` in vad_core.c.
64 //
65 // - self [i/o] : Instance that should be initialized
66 //
67 // returns      : 0 (OK), -1 (null pointer in or if the default mode can't be
68 //                set)
69 int WebRtcVad_InitCore(VadInstT* self);
70 
71 /****************************************************************************
72  * WebRtcVad_set_mode_core(...)
73  *
74  * This function changes the VAD settings
75  *
76  * Input:
77  *      - inst      : VAD instance
78  *      - mode      : Aggressiveness degree
79  *                    0 (High quality) - 3 (Highly aggressive)
80  *
81  * Output:
82  *      - inst      : Changed  instance
83  *
84  * Return value     :  0 - Ok
85  *                    -1 - Error
86  */
87 
88 int WebRtcVad_set_mode_core(VadInstT* self, int mode);
89 
90 /****************************************************************************
91  * WebRtcVad_CalcVad48khz(...)
92  * WebRtcVad_CalcVad32khz(...)
93  * WebRtcVad_CalcVad16khz(...)
94  * WebRtcVad_CalcVad8khz(...)
95  *
96  * Calculate probability for active speech and make VAD decision.
97  *
98  * Input:
99  *      - inst          : Instance that should be initialized
100  *      - speech_frame  : Input speech frame
101  *      - frame_length  : Number of input samples
102  *
103  * Output:
104  *      - inst          : Updated filter states etc.
105  *
106  * Return value         : VAD decision
107  *                        0 - No active speech
108  *                        1-6 - Active speech
109  */
110 int WebRtcVad_CalcVad48khz(VadInstT* inst,
111                            const int16_t* speech_frame,
112                            size_t frame_length);
113 int WebRtcVad_CalcVad32khz(VadInstT* inst,
114                            const int16_t* speech_frame,
115                            size_t frame_length);
116 int WebRtcVad_CalcVad16khz(VadInstT* inst,
117                            const int16_t* speech_frame,
118                            size_t frame_length);
119 int WebRtcVad_CalcVad8khz(VadInstT* inst,
120                           const int16_t* speech_frame,
121                           size_t frame_length);
122 
123 #endif  // COMMON_AUDIO_VAD_VAD_CORE_H_
124