• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
17 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
18 
19 #include <algorithm>
20 #include <cmath>
21 #include <limits>
22 #include <memory>
23 #include <random>
24 #include <string>
25 #include <vector>
26 
27 #include "minddata/dataset/core/tensor.h"
28 #include "minddata/dataset/kernels/tensor_op.h"
29 #include "minddata/dataset/util/status.h"
30 
31 constexpr double PI = 3.141592653589793;
32 
33 namespace mindspore {
34 namespace dataset {
35 
36 /// \brief Turn a tensor from the power/amplitude scale to the decibel scale.
37 /// \param input/output: Tensor of shape <..., freq, time>.
38 /// \param multiplier: power - 10, amplitude - 20.
39 /// \param amin: lower bound.
40 /// \param db_multiplier: multiplier for decibels.
41 /// \param top_db: the lower bound for decibels cut-off.
42 /// \return Status code.
43 template <typename T>
AmplitudeToDB(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,T multiplier,T amin,T db_multiplier,T top_db)44 Status AmplitudeToDB(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T multiplier, T amin,
45                      T db_multiplier, T top_db) {
46   TensorShape input_shape = input->shape();
47   TensorShape to_shape = input_shape.Rank() == 2
48                            ? TensorShape({1, 1, input_shape[-2], input_shape[-1]})
49                            : TensorShape({input->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]),
50                                           input_shape[-3], input_shape[-2], input_shape[-1]});
51   RETURN_IF_NOT_OK(input->Reshape(to_shape));
52 
53   std::vector<T> max_val;
54   int step = to_shape[-3] * input_shape[-2] * input_shape[-1];
55   int cnt = 0;
56   T temp_max = std::numeric_limits<T>::lowest();
57   for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
58     // do clamp
59     *itr = *itr < amin ? log10(amin) * multiplier : log10(*itr) * multiplier;
60     *itr -= multiplier * db_multiplier;
61     // calculate max by axis
62     cnt++;
63     if ((*itr) > temp_max) temp_max = *itr;
64     if (cnt % step == 0) {
65       max_val.push_back(temp_max);
66       temp_max = std::numeric_limits<T>::lowest();
67     }
68   }
69 
70   if (!std::isnan(top_db)) {
71     int ind = 0;
72     for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++, ind++) {
73       float lower_bound = max_val[ind / step] - top_db;
74       *itr = std::max((*itr), static_cast<T>(lower_bound));
75     }
76   }
77   RETURN_IF_NOT_OK(input->Reshape(input_shape));
78   *output = input;
79   return Status::OK();
80 }
81 
82 /// \brief Calculate the angles of the complex numbers.
83 /// \param input/output: Tensor of shape <..., time>.
84 template <typename T>
Angle(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output)85 Status Angle(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
86   TensorShape shape = input->shape();
87   std::vector output_shape = shape.AsVector();
88   output_shape.pop_back();
89   std::shared_ptr<Tensor> output_tensor;
90   std::vector<T> out;
91   T o;
92   T x;
93   T y;
94   for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
95     x = static_cast<T>(*itr);
96     itr++;
97     y = static_cast<T>(*itr);
98     o = std::atan2(y, x);
99     out.emplace_back(o);
100   }
101   // Generate multidimensional results corresponding to input
102   Tensor::CreateFromVector(out, TensorShape{output_shape}, &output_tensor);
103   *output = output_tensor;
104   return Status::OK();
105 }
106 
107 /// \brief Perform a biquad filter of input tensor.
108 /// \param input/output: Tensor of shape <..., time>.
109 /// \param a0: denominator coefficient of current output y[n], typically 1.
110 /// \param a1: denominator coefficient of current output y[n-1].
111 /// \param a2: denominator coefficient of current output y[n-2].
112 /// \param b0: numerator coefficient of current input, x[n].
113 /// \param b1: numerator coefficient of input one time step ago x[n-1].
114 /// \param b2: numerator coefficient of input two time steps ago x[n-2].
115 /// \return Status code.
116 template <typename T>
Biquad(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,T b0,T b1,T b2,T a0,T a1,T a2)117 Status Biquad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T b0, T b1, T b2, T a0, T a1,
118               T a2) {
119   std::vector<T> a_coeffs;
120   std::vector<T> b_coeffs;
121   a_coeffs.push_back(a0);
122   a_coeffs.push_back(a1);
123   a_coeffs.push_back(a2);
124   b_coeffs.push_back(b0);
125   b_coeffs.push_back(b1);
126   b_coeffs.push_back(b2);
127   return LFilter(input, output, a_coeffs, b_coeffs, true);
128 }
129 
130 /// \brief Apply contrast effect.
131 /// \param input/output: Tensor of shape <..., time>.
132 /// \param enhancement_amount: controls the amount of the enhancement.
133 /// \return Status code.
134 template <typename T>
Contrast(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,T enhancement_amount)135 Status Contrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T enhancement_amount) {
136   const float enhancement_zoom = 750.0;
137   T enhancement_amount_value = enhancement_amount / enhancement_zoom;
138   TensorShape output_shape{input->shape()};
139   std::shared_ptr<Tensor> out;
140   RETURN_IF_NOT_OK(Tensor::CreateEmpty(output_shape, input->type(), &out));
141   auto itr_out = out->begin<T>();
142   for (auto itr_in = input->begin<T>(); itr_in != input->end<T>(); itr_in++) {
143     T temp1, temp2 = 0;
144     temp1 = static_cast<T>(*itr_in) * (PI / 2);
145     temp2 = enhancement_amount_value * std::sin(temp1 * 4);
146     *itr_out = std::sin(temp1 + temp2);
147     itr_out++;
148   }
149   *output = out;
150   return Status::OK();
151 }
152 
153 /// \brief Apply a DC shift to the audio.
154 /// \param input/output: Tensor of shape <...,time>.
155 /// \param shift: the amount to shift the audio.
156 /// \param limiter_gain: used only on peaks to prevent clipping.
157 /// \return Status code.
158 template <typename T>
DCShift(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,float shift,float limiter_gain)159 Status DCShift(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float shift, float limiter_gain) {
160   float limiter_threshold = 0.0;
161   if (shift != limiter_gain && shift != 0) {
162     limiter_threshold = 1.0 - (std::abs(shift) - limiter_gain);
163     for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
164       if (*itr > limiter_threshold && shift > 0) {
165         T peak = (*itr - limiter_threshold) * limiter_gain / (1 - limiter_threshold);
166         T sample = (peak + limiter_threshold + shift);
167         *itr = sample > limiter_threshold ? limiter_threshold : sample;
168       } else if (*itr < -limiter_threshold && shift < 0) {
169         T peak = (*itr + limiter_threshold) * limiter_gain / (1 - limiter_threshold);
170         T sample = (peak + limiter_threshold + shift);
171         *itr = sample < -limiter_threshold ? -limiter_threshold : sample;
172       } else {
173         T sample = (*itr + shift);
174         *itr = (sample > 1 || sample < -1) ? (sample > 1 ? 1 : -1) : sample;
175       }
176     }
177   } else {
178     for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
179       T sample = (*itr + shift);
180       *itr = sample > 1 || sample < -1 ? (sample > 1 ? 1 : -1) : sample;
181     }
182   }
183   *output = input;
184   return Status::OK();
185 }
186 
187 /// \brief Perform an IIR filter by evaluating difference equation.
188 /// \param input/output: Tensor of shape <..., time>
189 /// \param a_coeffs: denominator coefficients of difference equation of dimension of (n_order + 1).
190 /// \param b_coeffs: numerator coefficients of difference equation of dimension of (n_order + 1).
191 /// \param clamp: If True, clamp the output signal to be in the range [-1, 1] (Default: True).
192 /// \return Status code
193 template <typename T>
LFilter(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,std::vector<T> a_coeffs,std::vector<T> b_coeffs,bool clamp)194 Status LFilter(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, std::vector<T> a_coeffs,
195                std::vector<T> b_coeffs, bool clamp) {
196   //  pack batch
197   TensorShape input_shape = input->shape();
198   TensorShape toShape({input->Size() / input_shape[-1], input_shape[-1]});
199   input->Reshape(toShape);
200   auto shape_0 = input->shape()[0];
201   auto shape_1 = input->shape()[1];
202   std::vector<T> signal;
203   std::shared_ptr<Tensor> out;
204   std::vector<T> out_vect(shape_0 * shape_1);
205   size_t x_idx = 0;
206   size_t channel_idx = 1;
207   size_t m_num_order = b_coeffs.size() - 1;
208   size_t m_den_order = a_coeffs.size() - 1;
209   // init A_coeffs and B_coeffs by div(a0)
210   for (size_t i = 1; i < a_coeffs.size(); i++) {
211     a_coeffs[i] /= a_coeffs[0];
212   }
213   for (size_t i = 0; i < b_coeffs.size(); i++) {
214     b_coeffs[i] /= a_coeffs[0];
215   }
216   // Sliding window
217   T *m_px = new T[m_num_order + 1];
218   T *m_py = new T[m_den_order + 1];
219 
220   // Tensor -> vector
221   for (auto itr = input->begin<T>(); itr != input->end<T>();) {
222     while (x_idx < shape_1 * channel_idx) {
223       signal.push_back(*itr);
224       itr++;
225       x_idx++;
226     }
227     // Sliding window
228     for (size_t j = 0; j < m_den_order; j++) {
229       m_px[j] = static_cast<T>(0);
230     }
231     for (size_t j = 0; j <= m_den_order; j++) {
232       m_py[j] = static_cast<T>(0);
233     }
234     // Each channel is processed with the sliding window
235     for (size_t i = x_idx - shape_1; i < x_idx; i++) {
236       m_px[m_num_order] = signal[i];
237       for (size_t j = 0; j < m_num_order + 1; j++) {
238         m_py[m_num_order] += b_coeffs[j] * m_px[m_num_order - j];
239       }
240       for (size_t j = 1; j < m_den_order + 1; j++) {
241         m_py[m_num_order] -= a_coeffs[j] * m_py[m_num_order - j];
242       }
243       if (clamp) {
244         if (m_py[m_num_order] > static_cast<T>(1.))
245           out_vect[i] = static_cast<T>(1.);
246         else if (m_py[m_num_order] < static_cast<T>(-1.))
247           out_vect[i] = static_cast<T>(-1.);
248         else
249           out_vect[i] = m_py[m_num_order];
250       } else {
251         out_vect[i] = m_py[m_num_order];
252       }
253       if (i + 1 == x_idx) continue;
254       for (size_t j = 0; j < m_num_order; j++) {
255         m_px[j] = m_px[j + 1];
256       }
257       for (size_t j = 0; j < m_num_order; j++) {
258         m_py[j] = m_py[j + 1];
259       }
260       m_py[m_num_order] = static_cast<T>(0);
261     }
262     if (x_idx % shape_1 == 0) {
263       ++channel_idx;
264     }
265   }
266   // unpack batch
267   Tensor::CreateFromVector(out_vect, input_shape, &out);
268   *output = out;
269   delete[] m_px;
270   delete[] m_py;
271   return Status::OK();
272 }
273 
274 /// \brief Stretch STFT in time at a given rate, without changing the pitch.
275 /// \param input: Tensor of shape <..., freq, time>.
276 /// \param rate: Stretch factor.
277 /// \param phase_advance: Expected phase advance in each bin.
278 /// \param output: Tensor after stretch in time domain.
279 /// \return Status code.
280 Status TimeStretch(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, float rate, float hop_length,
281                    float n_freq);
282 
283 /// \brief Apply a mask along axis.
284 /// \param input: Tensor of shape <..., freq, time>.
285 /// \param output: Tensor of shape <..., freq, time>.
286 /// \param mask_param: Number of columns to be masked will be uniformly sampled from [0, mask_param].
287 /// \param mask_value: Value to assign to the masked columns.
288 /// \param axis: Axis to apply masking on (1 -> frequency, 2 -> time).
289 /// \param rnd: Number generator.
290 /// \return Status code.
291 Status RandomMaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t mask_param,
292                            float mask_value, int axis, std::mt19937 rnd);
293 
294 /// \brief Apply a mask along axis. All examples will have the same mask interval.
295 /// \param input: Tensor of shape <..., freq, time>.
296 /// \param output: Tensor of shape <..., freq, time>.
297 /// \param mask_width: The width of the mask.
298 /// \param mask_start: Starting position of the mask.
299 ///     Mask will be applied from indices [mask_start, mask_start + mask_width).
300 /// \param mask_value: Value to assign to the masked columns.
301 /// \param axis: Axis to apply masking on (1 -> frequency, 2 -> time).
302 /// \return Status code.
303 Status MaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t mask_width,
304                      int32_t mask_start, float mask_value, int32_t axis);
305 
306 /// \brief Compute the norm of complex tensor input.
307 /// \param power Power of the norm description (optional).
308 /// \param input Tensor shape of <..., complex=2>.
309 /// \param output Tensor shape of <..., >.
310 /// \return Status code.
311 Status ComplexNorm(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float power);
312 
313 /// \brief Decode mu-law encoded signal.
314 /// \param input Tensor of shape <..., time>.
315 /// \param output Tensor of shape <..., time>.
316 /// \param quantization_channels Number of channels.
317 /// \return Status code.
318 Status MuLawDecoding(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int quantization_channels);
319 
320 /// \brief Add a fade in and/or fade out to an input.
321 /// \param[in] input: The input tensor.
322 /// \param[out] output: Added fade in and/or fade out audio with the same shape.
323 /// \param[in] fade_in_len: Length of fade-in (time frames).
324 /// \param[in] fade_out_len: Length of fade-out (time frames).
325 /// \param[in] fade_shape: Shape of fade.
326 Status Fade(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int32_t fade_in_len,
327             int32_t fade_out_len, FadeShape fade_shape);
328 
329 /// \brief Add a volume to an waveform.
330 /// \param input/output: Tensor of shape <..., time>.
331 /// \param gain: Gain value, varies according to the value of gain_type.
332 /// \param gain_type: Type of gain, should be one of [GainType::kAmplitude, GainType::kDb, GainType::kPower].
333 /// \return Status code.
334 template <typename T>
Vol(const std::shared_ptr<Tensor> & input,std::shared_ptr<Tensor> * output,T gain,GainType gain_type)335 Status Vol(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T gain, GainType gain_type) {
336   const T lower_bound = -1;
337   const T upper_bound = 1;
338 
339   // DB is a unit which converts a numeric value into decibel scale and for conversion, we have to use log10
340   // A(in dB) = 20log10(A in amplitude)
341   // When referring to measurements of power quantities, a ratio can be expressed as a level in decibels by evaluating
342   // ten times the base-10 logarithm of the ratio of the measured quantity to reference value
343   // A(in dB) = 10log10(A in power)
344   const int power_factor_div = 20;
345   const int power_factor_mul = 10;
346   const int base = 10;
347 
348   if (gain_type == GainType::kDb) {
349     if (gain != 0) {
350       gain = std::pow(base, (gain / power_factor_div));
351     }
352   } else if (gain_type == GainType::kPower) {
353     gain = power_factor_mul * std::log10(gain);
354     gain = std::pow(base, (gain / power_factor_div));
355   }
356 
357   for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
358     if (gain != 0 || gain_type == GainType::kAmplitude) {
359       *itr = (*itr) * gain;
360     }
361     *itr = std::min(std::max((*itr), lower_bound), upper_bound);
362   }
363 
364   *output = input;
365 
366   return Status::OK();
367 }
368 
369 /// \brief Separate a complex-valued spectrogram with shape (…, 2) into its magnitude and phase.
370 /// \param input: Complex tensor.
371 /// \param output: The magnitude and phase of the complex tensor.
372 /// \param power: Power of the norm.
373 Status Magphase(const TensorRow &input, TensorRow *output, float power);
374 
375 }  // namespace dataset
376 }  // namespace mindspore
377 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
378