• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This header file includes the descriptions of the core VAD calls.
13  */
14 
15 #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
16 #define COMMON_AUDIO_VAD_VAD_CORE_H_
17 
18 #include "common_audio/signal_processing/include/signal_processing_library.h"
19 
20 enum { kNumChannels = 6 };   // Number of frequency bands (named channels).
21 enum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
22 enum { kTableSize = kNumChannels * kNumGaussians };
23 enum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.
24 
25 typedef struct VadInstT_ {
26   int vad;
27   int32_t downsampling_filter_states[4];
28   WebRtcSpl_State48khzTo8khz state_48_to_8;
29   int16_t noise_means[kTableSize];
30   int16_t speech_means[kTableSize];
31   int16_t noise_stds[kTableSize];
32   int16_t speech_stds[kTableSize];
33   // TODO(bjornv): Change to |frame_count|.
34   int32_t frame_counter;
35   int16_t over_hang;  // Over Hang
36   int16_t num_of_speech;
37   // TODO(bjornv): Change to |age_vector|.
38   int16_t index_vector[16 * kNumChannels];
39   int16_t low_value_vector[16 * kNumChannels];
40   // TODO(bjornv): Change to |median|.
41   int16_t mean_value[kNumChannels];
42   int16_t upper_state[5];
43   int16_t lower_state[5];
44   int16_t hp_filter_state[4];
45   int16_t over_hang_max_1[3];
46   int16_t over_hang_max_2[3];
47   int16_t individual[3];
48   int16_t total[3];
49 
50   int init_flag;
51 } VadInstT;
52 
53 // Initializes the core VAD component. The default aggressiveness mode is
54 // controlled by |kDefaultMode| in vad_core.c.
55 //
56 // - self [i/o] : Instance that should be initialized
57 //
58 // returns      : 0 (OK), -1 (null pointer in or if the default mode can't be
59 //                set)
60 int WebRtcVad_InitCore(VadInstT* self);
61 
62 /****************************************************************************
63  * WebRtcVad_set_mode_core(...)
64  *
65  * This function changes the VAD settings
66  *
67  * Input:
68  *      - inst      : VAD instance
69  *      - mode      : Aggressiveness degree
70  *                    0 (High quality) - 3 (Highly aggressive)
71  *
72  * Output:
73  *      - inst      : Changed  instance
74  *
75  * Return value     :  0 - Ok
76  *                    -1 - Error
77  */
78 
79 int WebRtcVad_set_mode_core(VadInstT* self, int mode);
80 
81 /****************************************************************************
82  * WebRtcVad_CalcVad48khz(...)
83  * WebRtcVad_CalcVad32khz(...)
84  * WebRtcVad_CalcVad16khz(...)
85  * WebRtcVad_CalcVad8khz(...)
86  *
87  * Calculate probability for active speech and make VAD decision.
88  *
89  * Input:
90  *      - inst          : Instance that should be initialized
91  *      - speech_frame  : Input speech frame
92  *      - frame_length  : Number of input samples
93  *
94  * Output:
95  *      - inst          : Updated filter states etc.
96  *
97  * Return value         : VAD decision
98  *                        0 - No active speech
99  *                        1-6 - Active speech
100  */
101 int WebRtcVad_CalcVad48khz(VadInstT* inst,
102                            const int16_t* speech_frame,
103                            size_t frame_length);
104 int WebRtcVad_CalcVad32khz(VadInstT* inst,
105                            const int16_t* speech_frame,
106                            size_t frame_length);
107 int WebRtcVad_CalcVad16khz(VadInstT* inst,
108                            const int16_t* speech_frame,
109                            size_t frame_length);
110 int WebRtcVad_CalcVad8khz(VadInstT* inst,
111                           const int16_t* speech_frame,
112                           size_t frame_length);
113 
114 #endif  // COMMON_AUDIO_VAD_VAD_CORE_H_
115