• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h"
18 #include "nnacl/fp32/conv_winograd_fp32.h"
19 #include "nnacl/pack.h"
20 #include "include/errorcode.h"
21 
22 using mindspore::lite::RET_ERROR;
23 using mindspore::lite::RET_MEMORY_FAILED;
24 using mindspore::lite::RET_NULL_PTR;
25 using mindspore::lite::RET_OK;
26 
27 namespace mindspore::kernel {
WinogradFilterTransform(const float * weight_data,float * matrix_g,const float * matrix_gt,int oc_block)28 int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_data, float *matrix_g,
29                                                           const float *matrix_gt, int oc_block) {
30   if (oc_block == 0) {
31     MS_LOG(ERROR) << "Divide by zero";
32     return RET_ERROR;
33   }
34 
35   return WinogradWeightTransform(weight_data, reinterpret_cast<float *>(packed_weight_), matrix_g, matrix_gt, oc_block,
36                                  input_unit_, kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_,
37                                  true);
38 }
39 
InitTmpBuffer()40 int ConvolutionWinogradCPUKernel::InitTmpBuffer() {
41   MS_ASSERT(ctx_->allocator != nullptr);
42   size_t tile_buffer_size =
43     thread_count_ * tile_num_ * input_unit_ * input_unit_ * conv_param_->input_channel_ * sizeof(float);
44   trans_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(tile_buffer_size));
45   if (trans_input_ == nullptr) {
46     MS_LOG(ERROR) << "malloc trans_input_ failed.";
47     return RET_MEMORY_FAILED;
48   }
49 
50   int oc8 = UP_ROUND(conv_param_->output_channel_, C8NUM);
51   gemm_out_ = reinterpret_cast<float *>(
52     ctx_->allocator->Malloc(thread_count_ * tile_num_ * input_unit_ * input_unit_ * oc8 * sizeof(float)));
53   if (gemm_out_ == nullptr) {
54     MS_LOG(ERROR) << "malloc gemm_out_ failed.";
55     return RET_ERROR;
56   }
57 
58   tmp_data_ = reinterpret_cast<float *>(
59     ctx_->allocator->Malloc(thread_count_ * C4NUM * input_unit_ * input_unit_ * sizeof(float)));
60   if (tmp_data_ == nullptr) {
61     MS_LOG(ERROR) << "malloc tmp_data_ failed.";
62     return RET_MEMORY_FAILED;
63   }
64 
65   col_buffer_ = reinterpret_cast<float *>(
66     ctx_->allocator->Malloc(thread_count_ * tile_num_ * conv_param_->input_channel_ * sizeof(float)));
67   if (col_buffer_ == nullptr) {
68     MS_LOG(ERROR) << "malloc col_buffer_ failed.";
69     return RET_ERROR;
70   }
71 
72   tmp_buffer_address_list_[0] = trans_input_;
73   tmp_buffer_address_list_[1] = gemm_out_;
74   tmp_buffer_address_list_[2] = tmp_data_;
75   tmp_buffer_address_list_[3] = col_buffer_;
76   return RET_OK;
77 }
78 
ConfigInputOutput()79 int ConvolutionWinogradCPUKernel::ConfigInputOutput() {
80   in_func_ = GetInputTransFunc(input_unit_);
81   if (in_func_ == nullptr) {
82     MS_LOG(ERROR) << "in_func_ is null.";
83     return RET_ERROR;
84   }
85   out_func_ = GetOutputTransFunc(input_unit_, output_unit_, conv_param_->act_type_);
86   if (out_func_ == nullptr) {
87     MS_LOG(ERROR) << "out_func_ is null.";
88     return RET_ERROR;
89   }
90   return RET_OK;
91 }
92 
Init()93 int ConvolutionWinogradCPUKernel::Init() {
94   CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
95   CHECK_LESS_RETURN(out_tensors_.size(), 1);
96   tile_num_ = C12NUM;
97 #ifdef ENABLE_AVX
98   oc_block_ = C16NUM;
99 #else
100   oc_block_ = C8NUM;
101 #endif
102   kernel_unit_ = conv_param_->kernel_h_;
103   input_unit_ = output_unit_ + kernel_unit_ - 1;
104   conv_param_->input_unit_ = input_unit_;
105   conv_param_->output_unit_ = output_unit_;
106   if (op_parameter_->is_train_session_) {
107     auto filter_tensor = in_tensors_.at(kWeightIndex);
108     CHECK_NULL_RETURN(filter_tensor);
109     int in_channel = filter_tensor->Channel();
110     int out_channel = filter_tensor->Batch();
111     auto trans_matrix_data_size =
112       input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
113     set_workspace_size(trans_matrix_data_size);
114   }
115   auto ret = InitConvWeightBias();
116   if (ret != RET_OK) {
117     MS_LOG(ERROR) << "Init weight bias failed.";
118     return RET_ERROR;
119   }
120   return RET_OK;
121 }
122 
ReSize()123 int ConvolutionWinogradCPUKernel::ReSize() {
124   auto ret = ConvolutionBaseCPUKernel::CheckResizeValid();
125   if (ret != RET_OK) {
126     MS_LOG(ERROR) << "Resize is invalid.";
127     return ret;
128   }
129   ret = ConvolutionBaseCPUKernel::Init();
130   if (ret != RET_OK) {
131     MS_LOG(ERROR) << "conv base init failed.";
132     return ret;
133   }
134   ret = ConfigInputOutput();
135   if (ret != RET_OK) {
136     MS_LOG(ERROR) << "ConfigInputOutput failed.";
137     return RET_ERROR;
138   }
139   conv_param_->out_format_ = out_tensors_[0]->format();
140   return RET_OK;
141 }
142 
RunImpl(int task_id)143 int ConvolutionWinogradCPUKernel::RunImpl(int task_id) {
144   auto input_tensor = in_tensors_.at(kInputIndex);
145   CHECK_NULL_RETURN(input_tensor);
146   auto ori_input_data = reinterpret_cast<float *>(input_tensor->data());
147   CHECK_NULL_RETURN(ori_input_data);
148   CHECK_NULL_RETURN(out_tensors_.front());
149   auto output_data = reinterpret_cast<float *>(out_tensors_.front()->data());
150   CHECK_NULL_RETURN(output_data);
151   ConvWinogardFp32(ori_input_data, reinterpret_cast<float *>(packed_weight_),
152                    reinterpret_cast<const float *>(bias_data_), output_data, tmp_buffer_address_list_, task_id,
153                    conv_param_, in_func_, out_func_);
154   return RET_OK;
155 }
156 
ConvolutionWinogradImpl(void * cdata,int task_id,float lhs_scale,float rhs_scale)157 int ConvolutionWinogradImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
158   auto conv = reinterpret_cast<ConvolutionWinogradCPUKernel *>(cdata);
159   auto error_code = conv->RunImpl(task_id);
160   if (error_code != RET_OK) {
161     MS_LOG(ERROR) << "ConvolutionWinograd Run error task_id[" << task_id << "] error_code[" << error_code << "]";
162     return RET_ERROR;
163   }
164   return RET_OK;
165 }
166 
Run()167 int ConvolutionWinogradCPUKernel::Run() {
168   auto ret = InitTmpBuffer();
169   if (ret != RET_OK) {
170     MS_LOG(ERROR) << "Init tmp buffer failed.";
171     FreeTmpBuffer();
172     return RET_ERROR;
173   }
174   if (RepackWeight() != RET_OK) {
175     MS_LOG(ERROR) << "Repack weight failed.";
176     return RET_ERROR;
177   }
178 
179   ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradImpl, this, thread_count_);
180   if (ret != RET_OK) {
181     MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
182   }
183 
184   FreeTmpBuffer();
185   return ret;
186 }
187 
MallocWeightBiasData()188 int ConvolutionWinogradCPUKernel::MallocWeightBiasData() {
189   auto filter_tensor = in_tensors_.at(kWeightIndex);
190   int in_channel = filter_tensor->Channel();
191   if (in_channel < 0) {
192     MS_LOG(ERROR) << "get channel from filter tensor failed.";
193     return RET_ERROR;
194   }
195   int out_channel = filter_tensor->Batch();
196   if (out_channel < 0) {
197     MS_LOG(ERROR) << "get batch from filter tensor failed.";
198     return RET_ERROR;
199   }
200   conv_param_->input_channel_ = in_channel;
201   conv_param_->output_channel_ = out_channel;
202 
203   // set data
204   auto trans_matrix_data_size =
205     input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
206   if (!op_parameter_->is_train_session_) {
207     if (packed_weight_ == nullptr) {
208       packed_weight_ = malloc(trans_matrix_data_size);
209       if (packed_weight_ == nullptr) {
210         MS_LOG(ERROR) << "malloc matrix_buffer failed.";
211         return RET_MEMORY_FAILED;
212       }
213     }
214     memset(packed_weight_, 0, trans_matrix_data_size);
215   }
216 
217   float matrix_a[64];
218   float matrix_at[64];
219   float matrix_b[64];
220   float matrix_bt[64];
221   float coef = 1.0f;
222   if (input_unit_ == CONV_INPUT_UNIT_SIZE) {
223     coef = 0.5f;
224   }
225   auto ret =
226     CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_);
227   if (ret != RET_OK) {
228     MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
229     return ret;
230   }
231 
232   // init bias
233   size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float);
234   if (bias_data_ == nullptr) {
235     bias_data_ = malloc(new_bias_size);
236     if (bias_data_ == nullptr) {
237       MS_LOG(ERROR) << "malloc bias_data_ failed.";
238       return RET_MEMORY_FAILED;
239     }
240   }
241   memset(bias_data_, 0, new_bias_size);
242   return RET_OK;
243 }
244 
PackWeight()245 void ConvolutionWinogradCPUKernel::PackWeight() {
246   auto weight_tensor = in_tensors_.at(kWeightIndex);
247   void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_;
248   MS_ASSERT(origin_weight != nullptr);
249   WinogradFilterTransform(reinterpret_cast<float *>(origin_weight), matrix_g_, matrix_gt_, oc_block_);
250 }
251 }  // namespace mindspore::kernel
252