1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "utils.h"
17 #include <fstream>
18 #include <algorithm>
19
TransposeMel(const std::vector<std::vector<float>> & mels)20 std::vector<std::vector<float>> TransposeMel(const std::vector<std::vector<float>>& mels) {
21 if (mels.empty()) return {};
22
23 size_t rows = mels.size();
24 size_t cols = mels[0].size();
25
26 std::vector<std::vector<float>> result(cols, std::vector<float>(rows));
27
28 for (size_t i = 0; i < rows; ++i) {
29 for (size_t j = 0; j < cols; ++j) {
30 result[j][i] = mels[i][j];
31 }
32 }
33 return result;
34 }
35
ResampleAudio(const std::vector<float> & input_data,int input_sample_rate,int output_sample_rate,int channel,int converter)36 std::vector<float> ResampleAudio(
37 const std::vector<float>& input_data,
38 int input_sample_rate,
39 int output_sample_rate,
40 int channel,
41 int converter
42 ) {
43 if (input_data.empty() || input_sample_rate <= 0 || output_sample_rate <= 0) {
44 throw std::invalid_argument("Invalid input parameters");
45 }
46
47 double ratio = static_cast<double>(output_sample_rate) / input_sample_rate;
48
49 size_t output_size = static_cast<size_t>(input_data.size() * ratio + 0.5);
50 std::vector<float> output_data(output_size);
51
52 SRC_DATA src_data;
53 src_data.data_in = input_data.data();
54 src_data.input_frames = input_data.size();
55 src_data.data_out = output_data.data();
56 src_data.output_frames = output_size;
57 src_data.src_ratio = ratio;
58 src_data.end_of_input = 1;
59
60 int error = src_simple(&src_data, converter, 1);
61 if (error) {
62 std::cout << "error src_simple filed " << std::endl;
63 }
64
65 return output_data;
66 }
67
ProcessMelSpectrogram(std::vector<std::vector<float>> & mels)68 void ProcessMelSpectrogram(std::vector<std::vector<float>>& mels) {
69 // log_spec = np.log10(np.maximum(mel, 1e-10))
70 for (auto& row : mels) {
71 for (auto& val : row) {
72 val = std::log10(std::max(val, 1e-10f));
73 }
74 }
75 // log_spec = np.maximum(log_spec, log_spec.max() - 8.0)
76 float max_val = -std::numeric_limits<float>::infinity();
77 for (const auto& row : mels) {
78 for (const auto& val : row) {
79 if (val > max_val){
80 max_val = val;
81 }
82 }
83 }
84 std::vector<std::vector<float>>& log_spec = mels;
85 const float threshold = max_val - 8.0f;
86 for (auto& row : mels) {
87 for (auto& val : row) {
88 val = (std::max(val, threshold) + 4.0f) /4.0f;
89 }
90 }
91
92 size_t target = 3000;
93 int current_cols = mels[0].size();
94 if (current_cols > target) {
95 for (auto& row : mels) {
96 row.resize(target);
97 }
98 for (auto& row : mels) {
99 int start_zero = std::max(0, static_cast<int>(row.size()) - 50);
100 std::fill(row.begin() + start_zero, row.end(), 0.0f);
101 }
102 } else if (current_cols < target) {
103 int padding = target - current_cols;
104 for (auto& row : mels) {
105 row.insert(row.end(), padding, 0.0f);
106 }
107 }
108 }
109