• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
12 #define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
13 
14 #include <stddef.h>
15 #include <stdint.h>
16 
17 #include <memory>
18 
19 #include "modules/audio_processing/vad/common.h"  // AudioFeatures, kSampleR...
20 
21 namespace webrtc {
22 
23 class PoleZeroFilter;
24 
25 class VadAudioProc {
26  public:
27   // Forward declare iSAC structs.
28   struct PitchAnalysisStruct;
29   struct PreFiltBankstr;
30 
31   VadAudioProc();
32   ~VadAudioProc();
33 
34   int ExtractFeatures(const int16_t* audio_frame,
35                       size_t length,
36                       AudioFeatures* audio_features);
37 
38   static constexpr size_t kDftSize = 512;
39 
40  private:
41   void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
42   void SubframeCorrelation(double* corr,
43                            size_t length_corr,
44                            size_t subframe_index);
45   void GetLpcPolynomials(double* lpc, size_t length_lpc);
46   void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
47   void Rms(double* rms, size_t length_rms);
48   void ResetBuffer();
49 
50   // To compute spectral peak we perform LPC analysis to get spectral envelope.
51   // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
52   // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
53   // we need 5 ms of past signal to create the input of LPC analysis.
54   static constexpr size_t kNumPastSignalSamples =
55       static_cast<size_t>(kSampleRateHz / 200);
56 
57   // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
58   // all the code recognize it as "no-error."
59   static constexpr int kNoError = 0;
60 
61   static constexpr size_t kNum10msSubframes = 3;
62   static constexpr size_t kNumSubframeSamples =
63       static_cast<size_t>(kSampleRateHz / 100);
64   // Samples in 30 ms @ given sampling rate.
65   static constexpr size_t kNumSamplesToProcess =
66       size_t{kNum10msSubframes} * kNumSubframeSamples;
67   static constexpr size_t kBufferLength =
68       size_t{kNumPastSignalSamples} + kNumSamplesToProcess;
69   static constexpr size_t kIpLength = kDftSize >> 1;
70   static constexpr size_t kWLength = kDftSize >> 1;
71   static constexpr size_t kLpcOrder = 16;
72 
73   size_t ip_[kIpLength];
74   float w_fft_[kWLength];
75 
76   // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
77   float audio_buffer_[kBufferLength];
78   size_t num_buffer_samples_;
79 
80   double log_old_gain_;
81   double old_lag_;
82 
83   std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
84   std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
85   std::unique_ptr<PoleZeroFilter> high_pass_filter_;
86 };
87 
88 }  // namespace webrtc
89 
90 #endif  // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
91