• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ops/audio_spectrogram.h"
18 #include <string>
19 #include <algorithm>
20 #include <memory>
21 #include <set>
22 #include <vector>
23 #include "ops/op_utils.h"
24 #include "utils/check_convert_utils.h"
25 #include "abstract/primitive_infer_map.h"
26 
27 namespace mindspore {
28 namespace ops {
29 namespace {
Log2Ceil(int64_t length)30 int64_t Log2Ceil(int64_t length) {
31   if (length == 0) {
32     return -1;
33   }
34   int64_t floor = 0;
35   for (int64_t i = 4; i >= 0; --i) {
36     const int64_t shift = static_cast<int64_t>(1UL << static_cast<unsigned>(i));
37     int64_t tmp = SizeToLong(static_cast<uint64_t>(length) >> static_cast<uint64_t>(shift));
38     if (tmp != 0) {
39       length = tmp;
40       floor += shift;
41     }
42   }
43   auto unsigned_length = LongToUlong(length);
44   return unsigned_length == (unsigned_length & ~(unsigned_length - 1)) ? floor : floor + 1;
45 }
46 
GetFftLength(int64_t length)47 int64_t GetFftLength(int64_t length) {
48   int64_t shift = Log2Ceil(length);
49   return SizeToLong(1UL << LongToSize(shift));
50 }
51 
AudioSpectrogramInferShape(const PrimitivePtr & primitive,const std::vector<AbstractBasePtr> & input_args)52 abstract::ShapePtr AudioSpectrogramInferShape(const PrimitivePtr &primitive,
53                                               const std::vector<AbstractBasePtr> &input_args) {
54   auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
55   if (input_shape.size() != 2) {
56     MS_LOG(ERROR) << "input shape is error, which need to be 2 dimensions";
57   }
58   auto window_size = GetValue<int64_t>(primitive->GetAttr(kWindowSize));
59   if (window_size < 2) {
60     MS_LOG(ERROR) << "window size is too short, now is " << window_size;
61   }
62   auto stride_size = GetValue<int64_t>(primitive->GetAttr(kStride));
63   if (stride_size < 1) {
64     MS_LOG(ERROR) << "stride must be positive, now is " << stride_size;
65   }
66   std::vector<int64_t> infer_shape;
67   infer_shape.push_back(input_shape[1]);
68   int64_t sample_sub_window = input_shape[0] - window_size;
69   infer_shape.push_back(sample_sub_window < 0 ? 0 : 1 + sample_sub_window / stride_size);
70   int64_t fft_length = GetFftLength(window_size);
71   infer_shape.push_back(fft_length / 2 + 1);
72   MS_LOG(ERROR) << infer_shape;
73   return std::make_shared<abstract::Shape>(infer_shape);
74 }
75 
AudioSpectrogramInferType(const PrimitivePtr & prim,const std::vector<AbstractBasePtr> & input_args)76 TypePtr AudioSpectrogramInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
77   const int64_t x_index = 0;
78   return CheckAndConvertUtils::GetInputTensorType(input_args, x_index, prim->name());
79 }
80 }  // namespace
81 
set_window_size(const int64_t window_size)82 void AudioSpectrogram::set_window_size(const int64_t window_size) {
83   (void)this->AddAttr(kWindowSize, MakeValue(window_size));
84 }
get_window_size() const85 int64_t AudioSpectrogram::get_window_size() const {
86   auto value_ptr = GetAttr(kWindowSize);
87   return GetValue<int64_t>(value_ptr);
88 }
89 
set_stride(const int64_t stride)90 void AudioSpectrogram::set_stride(const int64_t stride) { (void)this->AddAttr(kStride, MakeValue(stride)); }
get_stride() const91 int64_t AudioSpectrogram::get_stride() const {
92   auto value_ptr = GetAttr(kStride);
93   return GetValue<int64_t>(value_ptr);
94 }
95 
set_mag_square(const bool mag_square)96 void AudioSpectrogram::set_mag_square(const bool mag_square) { (void)this->AddAttr(kMagSquare, MakeValue(mag_square)); }
get_mag_square() const97 bool AudioSpectrogram::get_mag_square() const {
98   auto value_ptr = GetAttr(kMagSquare);
99   return GetValue<bool>(value_ptr);
100 }
Init(const int64_t window_size,const int64_t stride,const bool mag_square)101 void AudioSpectrogram::Init(const int64_t window_size, const int64_t stride, const bool mag_square) {
102   this->set_window_size(window_size);
103   this->set_stride(stride);
104   this->set_mag_square(mag_square);
105 }
106 
AudioSpectrogramInfer(const abstract::AnalysisEnginePtr &,const PrimitivePtr & primitive,const std::vector<AbstractBasePtr> & input_args)107 AbstractBasePtr AudioSpectrogramInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
108                                       const std::vector<AbstractBasePtr> &input_args) {
109   MS_EXCEPTION_IF_NULL(primitive);
110   const int64_t input_num = 1;
111   CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, input_num, primitive->name());
112   return std::make_shared<abstract::AbstractTensor>(AudioSpectrogramInferType(primitive, input_args),
113                                                     AudioSpectrogramInferShape(primitive, input_args));
114 }
115 REGISTER_PRIMITIVE_C(kNameAudioSpectrogram, AudioSpectrogram);
116 }  // namespace ops
117 }  // namespace mindspore
118