1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" 12 13 #include <assert.h> 14 #include <math.h> 15 #include <string.h> 16 17 #include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h" 18 #include "webrtc/modules/audio_processing/vad/common.h" 19 #include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h" 20 #include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h" 21 #include "webrtc/modules/include/module_common_types.h" 22 23 namespace webrtc { 24 25 static_assert(kNoiseGmmDim == kVoiceGmmDim, 26 "noise and voice gmm dimension not equal"); 27 28 // These values should match MATLAB counterparts for unit-tests to pass. 29 static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. 30 static const double kInitialPriorProbability = 0.3; 31 static const int kTransientWidthThreshold = 7; 32 static const double kLowProbabilityThreshold = 0.2; 33 LimitProbability(double p)34static double LimitProbability(double p) { 35 const double kLimHigh = 0.99; 36 const double kLimLow = 0.01; 37 38 if (p > kLimHigh) 39 p = kLimHigh; 40 else if (p < kLimLow) 41 p = kLimLow; 42 return p; 43 } 44 PitchBasedVad()45PitchBasedVad::PitchBasedVad() 46 : p_prior_(kInitialPriorProbability), 47 circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { 48 // Setup noise GMM. 49 noise_gmm_.dimension = kNoiseGmmDim; 50 noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; 51 noise_gmm_.weight = kNoiseGmmWeights; 52 noise_gmm_.mean = &kNoiseGmmMean[0][0]; 53 noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; 54 55 // Setup voice GMM. 56 voice_gmm_.dimension = kVoiceGmmDim; 57 voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; 58 voice_gmm_.weight = kVoiceGmmWeights; 59 voice_gmm_.mean = &kVoiceGmmMean[0][0]; 60 voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; 61 } 62 ~PitchBasedVad()63PitchBasedVad::~PitchBasedVad() { 64 } 65 VoicingProbability(const AudioFeatures & features,double * p_combined)66int PitchBasedVad::VoicingProbability(const AudioFeatures& features, 67 double* p_combined) { 68 double p; 69 double gmm_features[3]; 70 double pdf_features_given_voice; 71 double pdf_features_given_noise; 72 // These limits are the same in matlab implementation 'VoicingProbGMM().' 73 const double kLimLowLogPitchGain = -2.0; 74 const double kLimHighLogPitchGain = -0.9; 75 const double kLimLowSpectralPeak = 200; 76 const double kLimHighSpectralPeak = 2000; 77 const double kEps = 1e-12; 78 for (size_t n = 0; n < features.num_frames; n++) { 79 gmm_features[0] = features.log_pitch_gain[n]; 80 gmm_features[1] = features.spectral_peak[n]; 81 gmm_features[2] = features.pitch_lag_hz[n]; 82 83 pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); 84 pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); 85 86 if (features.spectral_peak[n] < kLimLowSpectralPeak || 87 features.spectral_peak[n] > kLimHighSpectralPeak || 88 features.log_pitch_gain[n] < kLimLowLogPitchGain) { 89 pdf_features_given_voice = kEps * pdf_features_given_noise; 90 } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { 91 pdf_features_given_noise = kEps * pdf_features_given_voice; 92 } 93 94 p = p_prior_ * pdf_features_given_voice / 95 (pdf_features_given_voice * p_prior_ + 96 pdf_features_given_noise * (1 - p_prior_)); 97 98 p = LimitProbability(p); 99 100 // Combine pitch-based probability with standalone probability, before 101 // updating prior probabilities. 102 double prod_active = p * p_combined[n]; 103 double prod_inactive = (1 - p) * (1 - p_combined[n]); 104 p_combined[n] = prod_active / (prod_active + prod_inactive); 105 106 if (UpdatePrior(p_combined[n]) < 0) 107 return -1; 108 // Limit prior probability. With a zero prior probability the posterior 109 // probability is always zero. 110 p_prior_ = LimitProbability(p_prior_); 111 } 112 return 0; 113 } 114 UpdatePrior(double p)115int PitchBasedVad::UpdatePrior(double p) { 116 circular_buffer_->Insert(p); 117 if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, 118 kLowProbabilityThreshold) < 0) 119 return -1; 120 p_prior_ = circular_buffer_->Mean(); 121 return 0; 122 } 123 124 } // namespace webrtc 125