1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/runtime/kernel/arm/fp32_grad/convolution.h"
18 #include "nnacl/fp32_grad/pack_ext.h"
19 #include "nnacl/fp32_grad/gemm.h"
20 #include "include/errorcode.h"
21 #include "nnacl/pack.h"
22
23 using mindspore::kernel::KERNEL_ARCH;
24 using mindspore::lite::RET_ERROR;
25 using mindspore::lite::RET_OK;
26
27 namespace mindspore::kernel {
ReSize()28 int ConvolutionTrainCPUKernel::ReSize() {
29 if (in_tensors_.size() < 2) {
30 MS_LOG(ERROR) << "Convolution should have at least two inputs";
31 return RET_ERROR;
32 }
33 if (out_tensors_.size() != 1) {
34 MS_LOG(ERROR) << "Convolution should have one output";
35 return RET_ERROR;
36 }
37 auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
38 auto *input_x = in_tensors_.at(kInputIndex);
39 auto *input_weight = in_tensors_.at(kWeightIndex);
40 auto *out_y = out_tensors_.at(kOutputIndex);
41
42 conv_param_->output_batch_ = out_y->shape().at(kNHWC_N);
43 conv_param_->input_batch_ = input_x->shape().at(kNHWC_N);
44 conv_param_->input_h_ = input_x->shape().at(kNHWC_H);
45 conv_param_->input_w_ = input_x->shape().at(kNHWC_W);
46 conv_param_->output_h_ = out_y->shape().at(kNHWC_H);
47 conv_param_->output_w_ = out_y->shape().at(kNHWC_W);
48 conv_param_->input_channel_ = input_x->shape().at(kNHWC_C);
49 conv_param_->output_channel_ = input_weight->shape().at(kNHWC_N);
50 conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H);
51 conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);
52
53 conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_;
54 const int n = conv_param_->output_channel_ * conv_param_->group_;
55 const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_;
56
57 do_img2col_ = (conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
58 (conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
59 (conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) &&
60 (conv_param_->stride_h_ == 1) && (conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1)
61 ? false
62 : true;
63 do_dw_ = (conv_param_->output_channel_ == conv_param_->group_) &&
64 (conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
65 (conv_param_->dilation_w_ == 1)
66 ? true
67 : false;
68
69 ws_size_ = chunk_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_;
70 ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param_->group_;
71 int mat_alloc = MatSizeTotal(chunk_, n, k, 0);
72 set_workspace_size((ws_size_ + mat_alloc) * sizeof(float));
73
74 return RET_OK;
75 }
76
Init()77 int ConvolutionTrainCPUKernel::Init() { return ReSize(); }
78
Execute(int task_id)79 int ConvolutionTrainCPUKernel::Execute(int task_id) {
80 auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
81 auto *input_x = in_tensors_.at(kInputIndex);
82 auto *input_w = in_tensors_.at(kWeightIndex);
83 auto *out_y = out_tensors_.at(kOutputIndex);
84
85 auto x_addr = reinterpret_cast<float *>(input_x->MutableData());
86 auto y_addr = reinterpret_cast<float *>(out_y->MutableData());
87 auto w_addr = reinterpret_cast<float *>(input_w->MutableData());
88
89 const int nweights = input_w->ElementsNum();
90 const int in_ch = conv_param_->input_channel_;
91 const int in_h = conv_param_->input_h_;
92 const int in_w = conv_param_->input_w_;
93 const int k_h = conv_param_->kernel_h_;
94 const int k_w = conv_param_->kernel_w_;
95 const int batch = conv_param_->output_batch_;
96 const int out_ch = conv_param_->output_channel_;
97 const int groups = conv_param_->group_;
98 const int out_h = conv_param_->output_h_;
99 const int out_w = conv_param_->output_w_;
100 const int m = out_h * out_w;
101 const int n = out_ch / groups;
102 const int k = k_h * k_w * in_ch / groups;
103 float *workspace_temp = static_cast<float *>(workspace());
104 float *mat_workspace = workspace_temp + ws_size_;
105 int real_chunk;
106 float *mat_a = nullptr;
107 float *im = nullptr;
108 const float *mat_b = nullptr;
109 float *mat_c = nullptr;
110 if (do_dw_) {
111 const int kernel_spatial = k_h * k_w;
112 for (int i = 0; i < batch; ++i) {
113 for (int ci = 0; ci < m; ci += chunk_) {
114 real_chunk = MSMIN(m - ci, chunk_);
115 mat_a = workspace_temp;
116 im = x_addr + (i * in_ch * in_h * in_w);
117 RollingIm2ColPackDwUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
118 for (int j = 0; j < groups; ++j) {
119 mat_b = w_addr + j * nweights / groups;
120 mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
121 GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a + (j * kernel_spatial), k * groups, mat_b, k, 0, mat_c, out_ch,
122 mat_workspace);
123 }
124 }
125 }
126 } else if (do_img2col_) {
127 for (int i = 0; i < batch; ++i) {
128 for (int j = 0; j < groups; ++j) {
129 for (int ci = 0; ci < m; ci += chunk_) {
130 real_chunk = MSMIN(m - ci, chunk_);
131 mat_a = workspace_temp;
132 mat_b = w_addr + j * nweights / groups;
133 mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
134 im = x_addr + i * in_ch * in_h * in_w + j * (in_ch / groups);
135 RollingIm2ColPackUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
136 GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
137 }
138 }
139 }
140 } else {
141 mat_b = w_addr;
142 const size_t in_plane_size = in_ch * in_h * in_w;
143 for (int i = 0; i < batch; ++i) {
144 im = x_addr + i * in_plane_size;
145 for (int ci = 0; ci < m; ci += chunk_) {
146 real_chunk = MSMIN(m - ci, chunk_);
147 mat_c = y_addr + i * n * m + ci * out_ch;
148 int input_height = ci / out_w * conv_param_->stride_h_;
149 int input_width = ci % out_w * conv_param_->stride_w_;
150 int offset = (input_height * in_w + input_width) * in_ch;
151 GemmMatmul(0, 1, real_chunk, n, k, 1, im + offset, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
152 }
153 }
154 }
155 return RET_OK;
156 }
157
ConvolutionTrainRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)158 int ConvolutionTrainRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
159 MS_ASSERT(cdata != nullptr);
160 auto conv_kernel = reinterpret_cast<ConvolutionTrainCPUKernel *>(cdata);
161 auto error_code = conv_kernel->Execute(task_id);
162 if (error_code != RET_OK) {
163 MS_LOG(ERROR) << "ConvolutionTrainRun error task_id[" << task_id << "] error_code[" << error_code << "]";
164 return RET_ERROR;
165 }
166 return RET_OK;
167 }
168
Run()169 int ConvolutionTrainCPUKernel::Run() {
170 int error_code = ParallelLaunch(this->ms_context_, ConvolutionTrainRun, this, 1);
171 if (error_code != RET_OK) {
172 MS_LOG(ERROR) << "conv train function error error_code[" << error_code << "]";
173 return RET_ERROR;
174 }
175 return RET_OK;
176 }
177
CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor * > & inputs,const std::vector<lite::Tensor * > & outputs,OpParameter * opParameter,const lite::Context * ctx,const kernel::KernelKey & desc)178 kernel::InnerKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
179 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
180 const lite::Context *ctx, const kernel::KernelKey &desc) {
181 MS_ASSERT(opParameter != nullptr);
182 MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
183
184 auto *kernel = new (std::nothrow)
185 ConvolutionTrainCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
186 if (kernel == nullptr) {
187 MS_LOG(ERROR) << "new ConvolutionTrainCPUKernel failed!";
188 free(opParameter);
189 return nullptr;
190 }
191 return kernel;
192 }
193 } // namespace mindspore::kernel
194