1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "ops/audio_spectrogram.h"
18 #include <string>
19 #include <algorithm>
20 #include <memory>
21 #include <set>
22 #include <vector>
23 #include "ops/op_utils.h"
24 #include "utils/check_convert_utils.h"
25 #include "abstract/primitive_infer_map.h"
26
27 namespace mindspore {
28 namespace ops {
29 namespace {
Log2Ceil(int64_t length)30 int64_t Log2Ceil(int64_t length) {
31 if (length == 0) {
32 return -1;
33 }
34 int64_t floor = 0;
35 for (int64_t i = 4; i >= 0; --i) {
36 const int64_t shift = static_cast<int64_t>(1UL << static_cast<unsigned>(i));
37 int64_t tmp = SizeToLong(static_cast<uint64_t>(length) >> static_cast<uint64_t>(shift));
38 if (tmp != 0) {
39 length = tmp;
40 floor += shift;
41 }
42 }
43 auto unsigned_length = LongToUlong(length);
44 return unsigned_length == (unsigned_length & ~(unsigned_length - 1)) ? floor : floor + 1;
45 }
46
GetFftLength(int64_t length)47 int64_t GetFftLength(int64_t length) {
48 int64_t shift = Log2Ceil(length);
49 return SizeToLong(1UL << LongToSize(shift));
50 }
51
AudioSpectrogramInferShape(const PrimitivePtr & primitive,const std::vector<AbstractBasePtr> & input_args)52 abstract::ShapePtr AudioSpectrogramInferShape(const PrimitivePtr &primitive,
53 const std::vector<AbstractBasePtr> &input_args) {
54 auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
55 if (input_shape.size() != 2) {
56 MS_LOG(ERROR) << "input shape is error, which need to be 2 dimensions";
57 }
58 auto window_size = GetValue<int64_t>(primitive->GetAttr(kWindowSize));
59 if (window_size < 2) {
60 MS_LOG(ERROR) << "window size is too short, now is " << window_size;
61 }
62 auto stride_size = GetValue<int64_t>(primitive->GetAttr(kStride));
63 if (stride_size < 1) {
64 MS_LOG(ERROR) << "stride must be positive, now is " << stride_size;
65 }
66 std::vector<int64_t> infer_shape;
67 infer_shape.push_back(input_shape[1]);
68 int64_t sample_sub_window = input_shape[0] - window_size;
69 infer_shape.push_back(sample_sub_window < 0 ? 0 : 1 + sample_sub_window / stride_size);
70 int64_t fft_length = GetFftLength(window_size);
71 infer_shape.push_back(fft_length / 2 + 1);
72 MS_LOG(ERROR) << infer_shape;
73 return std::make_shared<abstract::Shape>(infer_shape);
74 }
75
AudioSpectrogramInferType(const PrimitivePtr & prim,const std::vector<AbstractBasePtr> & input_args)76 TypePtr AudioSpectrogramInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
77 const int64_t x_index = 0;
78 return CheckAndConvertUtils::GetInputTensorType(input_args, x_index, prim->name());
79 }
80 } // namespace
81
set_window_size(const int64_t window_size)82 void AudioSpectrogram::set_window_size(const int64_t window_size) {
83 (void)this->AddAttr(kWindowSize, MakeValue(window_size));
84 }
get_window_size() const85 int64_t AudioSpectrogram::get_window_size() const {
86 auto value_ptr = GetAttr(kWindowSize);
87 return GetValue<int64_t>(value_ptr);
88 }
89
set_stride(const int64_t stride)90 void AudioSpectrogram::set_stride(const int64_t stride) { (void)this->AddAttr(kStride, MakeValue(stride)); }
get_stride() const91 int64_t AudioSpectrogram::get_stride() const {
92 auto value_ptr = GetAttr(kStride);
93 return GetValue<int64_t>(value_ptr);
94 }
95
set_mag_square(const bool mag_square)96 void AudioSpectrogram::set_mag_square(const bool mag_square) { (void)this->AddAttr(kMagSquare, MakeValue(mag_square)); }
get_mag_square() const97 bool AudioSpectrogram::get_mag_square() const {
98 auto value_ptr = GetAttr(kMagSquare);
99 return GetValue<bool>(value_ptr);
100 }
Init(const int64_t window_size,const int64_t stride,const bool mag_square)101 void AudioSpectrogram::Init(const int64_t window_size, const int64_t stride, const bool mag_square) {
102 this->set_window_size(window_size);
103 this->set_stride(stride);
104 this->set_mag_square(mag_square);
105 }
106
AudioSpectrogramInfer(const abstract::AnalysisEnginePtr &,const PrimitivePtr & primitive,const std::vector<AbstractBasePtr> & input_args)107 AbstractBasePtr AudioSpectrogramInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
108 const std::vector<AbstractBasePtr> &input_args) {
109 MS_EXCEPTION_IF_NULL(primitive);
110 const int64_t input_num = 1;
111 CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, input_num, primitive->name());
112 return std::make_shared<abstract::AbstractTensor>(AudioSpectrogramInferType(primitive, input_args),
113 AudioSpectrogramInferShape(primitive, input_args));
114 }
115 REGISTER_PRIMITIVE_C(kNameAudioSpectrogram, AudioSpectrogram);
116 } // namespace ops
117 } // namespace mindspore
118