1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 12 #define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 13 14 #include <stddef.h> 15 #include <stdint.h> 16 17 #include <memory> 18 19 #include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR... 20 21 namespace webrtc { 22 23 class PoleZeroFilter; 24 25 class VadAudioProc { 26 public: 27 // Forward declare iSAC structs. 28 struct PitchAnalysisStruct; 29 struct PreFiltBankstr; 30 31 VadAudioProc(); 32 ~VadAudioProc(); 33 34 int ExtractFeatures(const int16_t* audio_frame, 35 size_t length, 36 AudioFeatures* audio_features); 37 38 static constexpr size_t kDftSize = 512; 39 40 private: 41 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); 42 void SubframeCorrelation(double* corr, 43 size_t length_corr, 44 size_t subframe_index); 45 void GetLpcPolynomials(double* lpc, size_t length_lpc); 46 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); 47 void Rms(double* rms, size_t length_rms); 48 void ResetBuffer(); 49 50 // To compute spectral peak we perform LPC analysis to get spectral envelope. 51 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. 52 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame 53 // we need 5 ms of past signal to create the input of LPC analysis. 54 static constexpr size_t kNumPastSignalSamples = 55 static_cast<size_t>(kSampleRateHz / 200); 56 57 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that 58 // all the code recognize it as "no-error." 59 static constexpr int kNoError = 0; 60 61 static constexpr size_t kNum10msSubframes = 3; 62 static constexpr size_t kNumSubframeSamples = 63 static_cast<size_t>(kSampleRateHz / 100); 64 // Samples in 30 ms @ given sampling rate. 65 static constexpr size_t kNumSamplesToProcess = 66 size_t{kNum10msSubframes} * kNumSubframeSamples; 67 static constexpr size_t kBufferLength = 68 size_t{kNumPastSignalSamples} + kNumSamplesToProcess; 69 static constexpr size_t kIpLength = kDftSize >> 1; 70 static constexpr size_t kWLength = kDftSize >> 1; 71 static constexpr size_t kLpcOrder = 16; 72 73 size_t ip_[kIpLength]; 74 float w_fft_[kWLength]; 75 76 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). 77 float audio_buffer_[kBufferLength]; 78 size_t num_buffer_samples_; 79 80 double log_old_gain_; 81 double old_lag_; 82 83 std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_; 84 std::unique_ptr<PreFiltBankstr> pre_filter_handle_; 85 std::unique_ptr<PoleZeroFilter> high_pass_filter_; 86 }; 87 88 } // namespace webrtc 89 90 #endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 91