1 /* 2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ 12 #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ 13 14 #include <stddef.h> 15 16 #include <array> 17 18 #include "api/array_view.h" 19 #include "modules/audio_processing/agc2/rnn_vad/common.h" 20 #include "modules/audio_processing/agc2/rnn_vad/pitch_info.h" 21 22 namespace webrtc { 23 namespace rnn_vad { 24 25 // Performs 2x decimation without any anti-aliasing filter. 26 void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src, 27 rtc::ArrayView<float, kBufSize12kHz> dst); 28 29 // Computes a gain threshold for a candidate pitch period given the initial and 30 // the previous pitch period and gain estimates and the pitch period ratio used 31 // to derive the candidate pitch period from the initial period. 32 float ComputePitchGainThreshold(int candidate_pitch_period, 33 int pitch_period_ratio, 34 int initial_pitch_period, 35 float initial_pitch_gain, 36 int prev_pitch_period, 37 float prev_pitch_gain); 38 39 // Computes the sum of squared samples for every sliding frame in the pitch 40 // buffer. |yy_values| indexes are lags. 41 // 42 // The pitch buffer is structured as depicted below: 43 // |.........|...........| 44 // a b 45 // The part on the left, named "a" contains the oldest samples, whereas "b" the 46 // most recent ones. The size of "a" corresponds to the maximum pitch period, 47 // that of "b" to the frame size (e.g., 16 ms and 20 ms respectively). 48 void ComputeSlidingFrameSquareEnergies( 49 rtc::ArrayView<const float, kBufSize24kHz> pitch_buf, 50 rtc::ArrayView<float, kMaxPitch24kHz + 1> yy_values); 51 52 // Given the auto-correlation coefficients stored according to 53 // ComputePitchAutoCorrelation() (i.e., using inverted lags), returns the best 54 // and the second best pitch periods. 55 std::array<size_t, 2> FindBestPitchPeriods( 56 rtc::ArrayView<const float> auto_corr, 57 rtc::ArrayView<const float> pitch_buf, 58 size_t max_pitch_period); 59 60 // Refines the pitch period estimation given the pitch buffer |pitch_buf| and 61 // the initial pitch period estimation |inv_lags|. Returns an inverted lag at 62 // 48 kHz. 63 size_t RefinePitchPeriod48kHz( 64 rtc::ArrayView<const float, kBufSize24kHz> pitch_buf, 65 rtc::ArrayView<const size_t, 2> inv_lags); 66 67 // Refines the pitch period estimation and compute the pitch gain. Returns the 68 // refined pitch estimation data at 48 kHz. 69 PitchInfo CheckLowerPitchPeriodsAndComputePitchGain( 70 rtc::ArrayView<const float, kBufSize24kHz> pitch_buf, 71 int initial_pitch_period_48kHz, 72 PitchInfo prev_pitch_48kHz); 73 74 } // namespace rnn_vad 75 } // namespace webrtc 76 77 #endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ 78