• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 
16 #include "utils.h"
17 #include <fstream>
18 #include <algorithm>
19 
TransposeMel(const std::vector<std::vector<float>> & mels)20 std::vector<std::vector<float>> TransposeMel(const std::vector<std::vector<float>>& mels) {
21     if (mels.empty()) return {};
22 
23     size_t rows = mels.size();
24     size_t cols = mels[0].size();
25 
26     std::vector<std::vector<float>> result(cols, std::vector<float>(rows));
27 
28     for (size_t i = 0; i < rows; ++i) {
29         for (size_t j = 0; j < cols; ++j) {
30             result[j][i] = mels[i][j];
31         }
32     }
33     return result;
34 }
35 
ResampleAudio(const std::vector<float> & input_data,int input_sample_rate,int output_sample_rate,int channel,int converter)36 std::vector<float> ResampleAudio(
37     const std::vector<float>& input_data,
38     int input_sample_rate,
39     int output_sample_rate,
40     int channel,
41     int converter
42 ) {
43     if (input_data.empty() || input_sample_rate <= 0 || output_sample_rate <= 0) {
44         throw std::invalid_argument("Invalid input parameters");
45     }
46 
47     double ratio = static_cast<double>(output_sample_rate) / input_sample_rate;
48 
49     size_t output_size = static_cast<size_t>(input_data.size() * ratio + 0.5);
50     std::vector<float> output_data(output_size);
51 
52     SRC_DATA src_data;
53     src_data.data_in = input_data.data();
54     src_data.input_frames = input_data.size();
55     src_data.data_out = output_data.data();
56     src_data.output_frames = output_size;
57     src_data.src_ratio = ratio;
58     src_data.end_of_input = 1;
59 
60     int error = src_simple(&src_data, converter, 1);
61     if (error) {
62         std::cout << "error src_simple filed " << std::endl;
63     }
64 
65     return output_data;
66 }
67 
ProcessMelSpectrogram(std::vector<std::vector<float>> & mels)68 void ProcessMelSpectrogram(std::vector<std::vector<float>>& mels) {
69     // log_spec = np.log10(np.maximum(mel, 1e-10))
70     for (auto& row : mels) {
71         for (auto& val : row) {
72             val = std::log10(std::max(val, 1e-10f));
73         }
74     }
75     // log_spec = np.maximum(log_spec, log_spec.max() - 8.0)
76     float max_val = -std::numeric_limits<float>::infinity();
77     for (const auto& row : mels) {
78         for (const auto& val : row) {
79             if (val > max_val){
80                 max_val = val;
81             }
82         }
83     }
84     std::vector<std::vector<float>>& log_spec = mels;
85     const float threshold = max_val - 8.0f;
86     for (auto& row : mels) {
87         for (auto& val : row) {
88             val = (std::max(val, threshold) + 4.0f) /4.0f;
89         }
90     }
91 
92     size_t target = 3000;
93     int current_cols = mels[0].size();
94     if (current_cols > target) {
95         for (auto& row : mels) {
96             row.resize(target);
97         }
98         for (auto& row : mels) {
99             int start_zero = std::max(0, static_cast<int>(row.size()) - 50);
100             std::fill(row.begin() + start_zero, row.end(), 0.0f);
101         }
102     } else if (current_cols < target) {
103         int padding = target - current_cols;
104         for (auto& row : mels) {
105             row.insert(row.end(), padding, 0.0f);
106         }
107     }
108 }
109