1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h"
18 #include "nnacl/fp32/conv_winograd_fp32.h"
19 #include "nnacl/pack.h"
20 #include "include/errorcode.h"
21
22 using mindspore::lite::RET_ERROR;
23 using mindspore::lite::RET_MEMORY_FAILED;
24 using mindspore::lite::RET_NULL_PTR;
25 using mindspore::lite::RET_OK;
26
27 namespace mindspore::kernel {
WinogradFilterTransform(const float * weight_data,float * matrix_g,const float * matrix_gt,int oc_block)28 int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_data, float *matrix_g,
29 const float *matrix_gt, int oc_block) {
30 if (oc_block == 0) {
31 MS_LOG(ERROR) << "Divide by zero";
32 return RET_ERROR;
33 }
34
35 return WinogradWeightTransform(weight_data, reinterpret_cast<float *>(packed_weight_), matrix_g, matrix_gt, oc_block,
36 input_unit_, kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_,
37 true);
38 }
39
InitTmpBuffer()40 int ConvolutionWinogradCPUKernel::InitTmpBuffer() {
41 MS_ASSERT(ctx_->allocator != nullptr);
42 size_t tile_buffer_size =
43 thread_count_ * tile_num_ * input_unit_ * input_unit_ * conv_param_->input_channel_ * sizeof(float);
44 trans_input_ = reinterpret_cast<float *>(ctx_->allocator->Malloc(tile_buffer_size));
45 if (trans_input_ == nullptr) {
46 MS_LOG(ERROR) << "malloc trans_input_ failed.";
47 return RET_MEMORY_FAILED;
48 }
49
50 int oc8 = UP_ROUND(conv_param_->output_channel_, C8NUM);
51 gemm_out_ = reinterpret_cast<float *>(
52 ctx_->allocator->Malloc(thread_count_ * tile_num_ * input_unit_ * input_unit_ * oc8 * sizeof(float)));
53 if (gemm_out_ == nullptr) {
54 MS_LOG(ERROR) << "malloc gemm_out_ failed.";
55 return RET_ERROR;
56 }
57
58 tmp_data_ = reinterpret_cast<float *>(
59 ctx_->allocator->Malloc(thread_count_ * C4NUM * input_unit_ * input_unit_ * sizeof(float)));
60 if (tmp_data_ == nullptr) {
61 MS_LOG(ERROR) << "malloc tmp_data_ failed.";
62 return RET_MEMORY_FAILED;
63 }
64
65 col_buffer_ = reinterpret_cast<float *>(
66 ctx_->allocator->Malloc(thread_count_ * tile_num_ * conv_param_->input_channel_ * sizeof(float)));
67 if (col_buffer_ == nullptr) {
68 MS_LOG(ERROR) << "malloc col_buffer_ failed.";
69 return RET_ERROR;
70 }
71
72 tmp_buffer_address_list_[0] = trans_input_;
73 tmp_buffer_address_list_[1] = gemm_out_;
74 tmp_buffer_address_list_[2] = tmp_data_;
75 tmp_buffer_address_list_[3] = col_buffer_;
76 return RET_OK;
77 }
78
ConfigInputOutput()79 int ConvolutionWinogradCPUKernel::ConfigInputOutput() {
80 in_func_ = GetInputTransFunc(input_unit_);
81 if (in_func_ == nullptr) {
82 MS_LOG(ERROR) << "in_func_ is null.";
83 return RET_ERROR;
84 }
85 out_func_ = GetOutputTransFunc(input_unit_, output_unit_, conv_param_->act_type_);
86 if (out_func_ == nullptr) {
87 MS_LOG(ERROR) << "out_func_ is null.";
88 return RET_ERROR;
89 }
90 return RET_OK;
91 }
92
Init()93 int ConvolutionWinogradCPUKernel::Init() {
94 CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
95 CHECK_LESS_RETURN(out_tensors_.size(), 1);
96 tile_num_ = C12NUM;
97 #ifdef ENABLE_AVX
98 oc_block_ = C16NUM;
99 #else
100 oc_block_ = C8NUM;
101 #endif
102 kernel_unit_ = conv_param_->kernel_h_;
103 input_unit_ = output_unit_ + kernel_unit_ - 1;
104 conv_param_->input_unit_ = input_unit_;
105 conv_param_->output_unit_ = output_unit_;
106 if (op_parameter_->is_train_session_) {
107 auto filter_tensor = in_tensors_.at(kWeightIndex);
108 CHECK_NULL_RETURN(filter_tensor);
109 int in_channel = filter_tensor->Channel();
110 int out_channel = filter_tensor->Batch();
111 auto trans_matrix_data_size =
112 input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
113 set_workspace_size(trans_matrix_data_size);
114 }
115 auto ret = InitConvWeightBias();
116 if (ret != RET_OK) {
117 MS_LOG(ERROR) << "Init weight bias failed.";
118 return RET_ERROR;
119 }
120 return RET_OK;
121 }
122
ReSize()123 int ConvolutionWinogradCPUKernel::ReSize() {
124 auto ret = ConvolutionBaseCPUKernel::CheckResizeValid();
125 if (ret != RET_OK) {
126 MS_LOG(ERROR) << "Resize is invalid.";
127 return ret;
128 }
129 ret = ConvolutionBaseCPUKernel::Init();
130 if (ret != RET_OK) {
131 MS_LOG(ERROR) << "conv base init failed.";
132 return ret;
133 }
134 ret = ConfigInputOutput();
135 if (ret != RET_OK) {
136 MS_LOG(ERROR) << "ConfigInputOutput failed.";
137 return RET_ERROR;
138 }
139 conv_param_->out_format_ = out_tensors_[0]->format();
140 return RET_OK;
141 }
142
RunImpl(int task_id)143 int ConvolutionWinogradCPUKernel::RunImpl(int task_id) {
144 auto input_tensor = in_tensors_.at(kInputIndex);
145 CHECK_NULL_RETURN(input_tensor);
146 auto ori_input_data = reinterpret_cast<float *>(input_tensor->data());
147 CHECK_NULL_RETURN(ori_input_data);
148 CHECK_NULL_RETURN(out_tensors_.front());
149 auto output_data = reinterpret_cast<float *>(out_tensors_.front()->data());
150 CHECK_NULL_RETURN(output_data);
151 ConvWinogardFp32(ori_input_data, reinterpret_cast<float *>(packed_weight_),
152 reinterpret_cast<const float *>(bias_data_), output_data, tmp_buffer_address_list_, task_id,
153 conv_param_, in_func_, out_func_);
154 return RET_OK;
155 }
156
ConvolutionWinogradImpl(void * cdata,int task_id,float lhs_scale,float rhs_scale)157 int ConvolutionWinogradImpl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
158 auto conv = reinterpret_cast<ConvolutionWinogradCPUKernel *>(cdata);
159 auto error_code = conv->RunImpl(task_id);
160 if (error_code != RET_OK) {
161 MS_LOG(ERROR) << "ConvolutionWinograd Run error task_id[" << task_id << "] error_code[" << error_code << "]";
162 return RET_ERROR;
163 }
164 return RET_OK;
165 }
166
Run()167 int ConvolutionWinogradCPUKernel::Run() {
168 auto ret = InitTmpBuffer();
169 if (ret != RET_OK) {
170 MS_LOG(ERROR) << "Init tmp buffer failed.";
171 FreeTmpBuffer();
172 return RET_ERROR;
173 }
174 if (RepackWeight() != RET_OK) {
175 MS_LOG(ERROR) << "Repack weight failed.";
176 return RET_ERROR;
177 }
178
179 ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradImpl, this, thread_count_);
180 if (ret != RET_OK) {
181 MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]";
182 }
183
184 FreeTmpBuffer();
185 return ret;
186 }
187
MallocWeightBiasData()188 int ConvolutionWinogradCPUKernel::MallocWeightBiasData() {
189 auto filter_tensor = in_tensors_.at(kWeightIndex);
190 int in_channel = filter_tensor->Channel();
191 if (in_channel < 0) {
192 MS_LOG(ERROR) << "get channel from filter tensor failed.";
193 return RET_ERROR;
194 }
195 int out_channel = filter_tensor->Batch();
196 if (out_channel < 0) {
197 MS_LOG(ERROR) << "get batch from filter tensor failed.";
198 return RET_ERROR;
199 }
200 conv_param_->input_channel_ = in_channel;
201 conv_param_->output_channel_ = out_channel;
202
203 // set data
204 auto trans_matrix_data_size =
205 input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
206 if (!op_parameter_->is_train_session_) {
207 if (packed_weight_ == nullptr) {
208 packed_weight_ = malloc(trans_matrix_data_size);
209 if (packed_weight_ == nullptr) {
210 MS_LOG(ERROR) << "malloc matrix_buffer failed.";
211 return RET_MEMORY_FAILED;
212 }
213 }
214 memset(packed_weight_, 0, trans_matrix_data_size);
215 }
216
217 float matrix_a[64];
218 float matrix_at[64];
219 float matrix_b[64];
220 float matrix_bt[64];
221 float coef = 1.0f;
222 if (input_unit_ == CONV_INPUT_UNIT_SIZE) {
223 coef = 0.5f;
224 }
225 auto ret =
226 CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_);
227 if (ret != RET_OK) {
228 MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
229 return ret;
230 }
231
232 // init bias
233 size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float);
234 if (bias_data_ == nullptr) {
235 bias_data_ = malloc(new_bias_size);
236 if (bias_data_ == nullptr) {
237 MS_LOG(ERROR) << "malloc bias_data_ failed.";
238 return RET_MEMORY_FAILED;
239 }
240 }
241 memset(bias_data_, 0, new_bias_size);
242 return RET_OK;
243 }
244
PackWeight()245 void ConvolutionWinogradCPUKernel::PackWeight() {
246 auto weight_tensor = in_tensors_.at(kWeightIndex);
247 void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_;
248 MS_ASSERT(origin_weight != nullptr);
249 WinogradFilterTransform(reinterpret_cast<float *>(origin_weight), matrix_g_, matrix_gt_, oc_block_);
250 }
251 } // namespace mindspore::kernel
252