1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/infer/audio_spectrogram_infer.h"
18 #include "nnacl/infer/infer_register.h"
19
Log2Ceil(unsigned length)20 unsigned Log2Ceil(unsigned length) {
21 if (length == 0) {
22 return 0;
23 }
24 int floor = 0;
25 for (int i = 4; i >= 0; --i) {
26 const unsigned shift = (1 << (unsigned)i);
27 unsigned tmp = length >> shift;
28 if (tmp != 0) {
29 length = tmp;
30 floor += shift;
31 }
32 }
33 return length == (length & ~(length - 1)) ? floor : floor + 1;
34 }
35
GetFftLength(unsigned length)36 unsigned GetFftLength(unsigned length) {
37 unsigned shift = Log2Ceil(length);
38 return 1 << shift;
39 }
40
AudioSpectrogramInferShape(const TensorC * const * inputs,size_t inputs_size,TensorC ** outputs,size_t outputs_size,OpParameter * parameter)41 int AudioSpectrogramInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
42 OpParameter *parameter) {
43 int check_ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 1, 1);
44 if (check_ret != NNACL_OK) {
45 return check_ret;
46 }
47
48 const TensorC *input = inputs[0];
49 TensorC *output = outputs[0];
50 SetDataTypeFormat(output, input);
51 if (!InferFlag(inputs, inputs_size)) {
52 return NNACL_INFER_INVALID;
53 }
54 if (input->shape_size_ != 2) {
55 return NNACL_ERR;
56 }
57 AudioSpectrogramParameter *param = (AudioSpectrogramParameter *)parameter;
58 if (param->window_size_ < 2) {
59 return NNACL_ERR;
60 }
61 if (param->stride_ < 1) {
62 return NNACL_ERR;
63 }
64 int output_shape[3];
65 output_shape[0] = input->shape_[1];
66 int sample_sub_window = input->shape_[0] - param->window_size_;
67 output_shape[1] = sample_sub_window < 0 ? 0 : 1 + sample_sub_window / param->stride_;
68 // compute fft length
69 int fft_length = (int)GetFftLength(param->window_size_);
70 output_shape[2] = fft_length / 2 + 1;
71 SetShapeArray(output, output_shape, 3);
72 return NNACL_OK;
73 }
74
75 REG_INFER(AudioSpectrogram, PrimType_AudioSpectrogram, AudioSpectrogramInferShape)
76