1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 12 #define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 13 14 #include <stddef.h> 15 #include <stdint.h> 16 17 #include <memory> 18 19 #include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR... 20 21 namespace webrtc { 22 23 class PoleZeroFilter; 24 25 class VadAudioProc { 26 public: 27 // Forward declare iSAC structs. 28 struct PitchAnalysisStruct; 29 struct PreFiltBankstr; 30 31 VadAudioProc(); 32 ~VadAudioProc(); 33 34 int ExtractFeatures(const int16_t* audio_frame, 35 size_t length, 36 AudioFeatures* audio_features); 37 38 static const size_t kDftSize = 512; 39 40 private: 41 void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); 42 void SubframeCorrelation(double* corr, 43 size_t length_corr, 44 size_t subframe_index); 45 void GetLpcPolynomials(double* lpc, size_t length_lpc); 46 void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); 47 void Rms(double* rms, size_t length_rms); 48 void ResetBuffer(); 49 50 // To compute spectral peak we perform LPC analysis to get spectral envelope. 51 // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. 52 // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame 53 // we need 5 ms of past signal to create the input of LPC analysis. 54 enum : size_t { 55 kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200) 56 }; 57 58 // TODO(turajs): maybe defining this at a higher level (maybe enum) so that 59 // all the code recognize it as "no-error." 60 enum : int { kNoError = 0 }; 61 62 enum : size_t { kNum10msSubframes = 3 }; 63 enum : size_t { 64 kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100) 65 }; 66 enum : size_t { 67 // Samples in 30 ms @ given sampling rate. 68 kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples 69 }; 70 enum : size_t { 71 kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess 72 }; 73 enum : size_t { kIpLength = kDftSize >> 1 }; 74 enum : size_t { kWLength = kDftSize >> 1 }; 75 enum : size_t { kLpcOrder = 16 }; 76 77 size_t ip_[kIpLength]; 78 float w_fft_[kWLength]; 79 80 // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). 81 float audio_buffer_[kBufferLength]; 82 size_t num_buffer_samples_; 83 84 double log_old_gain_; 85 double old_lag_; 86 87 std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_; 88 std::unique_ptr<PreFiltBankstr> pre_filter_handle_; 89 std::unique_ptr<PoleZeroFilter> high_pass_filter_; 90 }; 91 92 } // namespace webrtc 93 94 #endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ 95