• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_
12 #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_
13 
14 #include <stddef.h>
15 
16 #include <array>
17 
18 #include "api/array_view.h"
19 #include "modules/audio_processing/agc2/rnn_vad/common.h"
20 #include "modules/audio_processing/agc2/rnn_vad/pitch_info.h"
21 
22 namespace webrtc {
23 namespace rnn_vad {
24 
25 // Performs 2x decimation without any anti-aliasing filter.
26 void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src,
27                 rtc::ArrayView<float, kBufSize12kHz> dst);
28 
29 // Computes a gain threshold for a candidate pitch period given the initial and
30 // the previous pitch period and gain estimates and the pitch period ratio used
31 // to derive the candidate pitch period from the initial period.
32 float ComputePitchGainThreshold(int candidate_pitch_period,
33                                 int pitch_period_ratio,
34                                 int initial_pitch_period,
35                                 float initial_pitch_gain,
36                                 int prev_pitch_period,
37                                 float prev_pitch_gain);
38 
39 // Computes the sum of squared samples for every sliding frame in the pitch
40 // buffer. |yy_values| indexes are lags.
41 //
42 // The pitch buffer is structured as depicted below:
43 // |.........|...........|
44 //      a          b
45 // The part on the left, named "a" contains the oldest samples, whereas "b" the
46 // most recent ones. The size of "a" corresponds to the maximum pitch period,
47 // that of "b" to the frame size (e.g., 16 ms and 20 ms respectively).
48 void ComputeSlidingFrameSquareEnergies(
49     rtc::ArrayView<const float, kBufSize24kHz> pitch_buf,
50     rtc::ArrayView<float, kMaxPitch24kHz + 1> yy_values);
51 
52 // Given the auto-correlation coefficients stored according to
53 // ComputePitchAutoCorrelation() (i.e., using inverted lags), returns the best
54 // and the second best pitch periods.
55 std::array<size_t, 2> FindBestPitchPeriods(
56     rtc::ArrayView<const float> auto_corr,
57     rtc::ArrayView<const float> pitch_buf,
58     size_t max_pitch_period);
59 
60 // Refines the pitch period estimation given the pitch buffer |pitch_buf| and
61 // the initial pitch period estimation |inv_lags|. Returns an inverted lag at
62 // 48 kHz.
63 size_t RefinePitchPeriod48kHz(
64     rtc::ArrayView<const float, kBufSize24kHz> pitch_buf,
65     rtc::ArrayView<const size_t, 2> inv_lags);
66 
67 // Refines the pitch period estimation and compute the pitch gain. Returns the
68 // refined pitch estimation data at 48 kHz.
69 PitchInfo CheckLowerPitchPeriodsAndComputePitchGain(
70     rtc::ArrayView<const float, kBufSize24kHz> pitch_buf,
71     int initial_pitch_period_48kHz,
72     PitchInfo prev_pitch_48kHz);
73 
74 }  // namespace rnn_vad
75 }  // namespace webrtc
76 
77 #endif  // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_
78