1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/litert/kernel/cpu/fp32_grad/convolution.h"
18 #include "nnacl/fp32_grad/pack_ext.h"
19 #include "nnacl/fp32_grad/gemm.h"
20 #include "include/errorcode.h"
21 #include "nnacl/pack.h"
22
23 using mindspore::kernel::KERNEL_ARCH;
24 using mindspore::lite::RET_ERROR;
25 using mindspore::lite::RET_OK;
26
27 namespace mindspore::kernel {
ReSize()28 int ConvolutionTrainCPUKernel::ReSize() {
29 if (in_tensors_.size() < 2) {
30 MS_LOG(ERROR) << "Convolution should have at least two inputs";
31 return RET_ERROR;
32 }
33 if (out_tensors_.size() != 1) {
34 MS_LOG(ERROR) << "Convolution should have one output";
35 return RET_ERROR;
36 }
37 auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
38 auto *input_x = in_tensors_.at(kInputIndex);
39 auto *input_weight = in_tensors_.at(kWeightIndex);
40 auto *out_y = out_tensors_.at(kOutputIndex);
41
42 conv_param_->output_batch_ = out_y->shape().at(kNHWC_N);
43 conv_param_->input_batch_ = input_x->shape().at(kNHWC_N);
44 conv_param_->input_h_ = input_x->shape().at(kNHWC_H);
45 conv_param_->input_w_ = input_x->shape().at(kNHWC_W);
46 conv_param_->output_h_ = out_y->shape().at(kNHWC_H);
47 conv_param_->output_w_ = out_y->shape().at(kNHWC_W);
48 conv_param_->input_channel_ = input_x->shape().at(kNHWC_C);
49 conv_param_->output_channel_ = input_weight->shape().at(kNHWC_N);
50 conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H);
51 conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);
52
53 conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_;
54 const int n = conv_param_->output_channel_ * conv_param_->group_;
55 const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_;
56
57 do_img2col_ = !((conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
58 (conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
59 (conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) && (conv_param_->stride_h_ == 1) &&
60 (conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1));
61 do_dw_ = (conv_param_->output_channel_ == conv_param_->group_) &&
62 (conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
63 (conv_param_->dilation_w_ == 1);
64
65 ws_size_ = chunk_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_;
66 ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param_->group_;
67 int mat_alloc = MatSizeTotal(chunk_, n, k, 0);
68 set_workspace_size(static_cast<size_t>(ws_size_ + mat_alloc) * sizeof(float));
69
70 return RET_OK;
71 }
72
Prepare()73 int ConvolutionTrainCPUKernel::Prepare() { return ReSize(); }
74
DoExecute(int task_id)75 int ConvolutionTrainCPUKernel::DoExecute(int task_id) {
76 auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
77 auto *input_x = in_tensors_.at(kInputIndex);
78 auto *input_w = in_tensors_.at(kWeightIndex);
79 auto *out_y = out_tensors_.at(kOutputIndex);
80
81 auto x_addr = reinterpret_cast<float *>(input_x->MutableData());
82 auto y_addr = reinterpret_cast<float *>(out_y->MutableData());
83 auto w_addr = reinterpret_cast<float *>(input_w->MutableData());
84
85 const int nweights = input_w->ElementsNum();
86 const int in_ch = conv_param_->input_channel_;
87 const int in_h = conv_param_->input_h_;
88 const int in_w = conv_param_->input_w_;
89 const int k_h = conv_param_->kernel_h_;
90 const int k_w = conv_param_->kernel_w_;
91 const int batch = conv_param_->output_batch_;
92 const int out_ch = conv_param_->output_channel_;
93 const int groups = conv_param_->group_;
94 const int out_h = conv_param_->output_h_;
95 const int out_w = conv_param_->output_w_;
96 const int m = out_h * out_w;
97 const int n = out_ch / groups;
98 const int k = k_h * k_w * in_ch / groups;
99 float *workspace_temp = static_cast<float *>(workspace());
100 float *mat_workspace = workspace_temp + ws_size_;
101 int real_chunk;
102 float *mat_a = nullptr;
103 float *im = nullptr;
104 const float *mat_b = nullptr;
105 float *mat_c = nullptr;
106 if (do_dw_) {
107 const int kernel_spatial = k_h * k_w;
108 for (int i = 0; i < batch; ++i) {
109 for (int ci = 0; ci < m; ci += chunk_) {
110 real_chunk = MSMIN(m - ci, chunk_);
111 mat_a = workspace_temp;
112 im = x_addr + (i * in_ch * in_h * in_w);
113 RollingIm2ColPackDwUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
114 for (int j = 0; j < groups; ++j) {
115 mat_b = w_addr + j * nweights / groups;
116 mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
117 GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a + (j * kernel_spatial), k * groups, mat_b, k, 0, mat_c, out_ch,
118 mat_workspace);
119 }
120 }
121 }
122 } else if (do_img2col_) {
123 for (int i = 0; i < batch; ++i) {
124 for (int j = 0; j < groups; ++j) {
125 for (int ci = 0; ci < m; ci += chunk_) {
126 real_chunk = MSMIN(m - ci, chunk_);
127 mat_a = workspace_temp;
128 mat_b = w_addr + j * nweights / groups;
129 mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
130 im = x_addr + i * in_ch * in_h * in_w + j * (in_ch / groups);
131 RollingIm2ColPackUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
132 GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
133 }
134 }
135 }
136 } else {
137 mat_b = w_addr;
138 const int in_plane_size = in_ch * in_h * in_w;
139 for (int i = 0; i < batch; ++i) {
140 im = x_addr + i * in_plane_size;
141 for (int ci = 0; ci < m; ci += chunk_) {
142 real_chunk = MSMIN(m - ci, chunk_);
143 mat_c = y_addr + i * n * m + ci * out_ch;
144 int input_height = ci / out_w * conv_param_->stride_h_;
145 int input_width = ci % out_w * conv_param_->stride_w_;
146 int offset = (input_height * in_w + input_width) * in_ch;
147 GemmMatmul(0, 1, real_chunk, n, k, 1, im + offset, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
148 }
149 }
150 }
151 return RET_OK;
152 }
153
ConvolutionTrainRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)154 int ConvolutionTrainRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
155 MS_ASSERT(cdata != nullptr);
156 auto conv_kernel = reinterpret_cast<ConvolutionTrainCPUKernel *>(cdata);
157 auto error_code = conv_kernel->DoExecute(task_id);
158 if (error_code != RET_OK) {
159 MS_LOG(ERROR) << "ConvolutionTrainRun error task_id[" << task_id << "] error_code[" << error_code << "]";
160 return RET_ERROR;
161 }
162 return RET_OK;
163 }
164
Run()165 int ConvolutionTrainCPUKernel::Run() {
166 int error_code = ParallelLaunch(this->ms_context_, ConvolutionTrainRun, this, 1);
167 if (error_code != RET_OK) {
168 MS_LOG(ERROR) << "conv train function error error_code[" << error_code << "]";
169 return RET_ERROR;
170 }
171 return RET_OK;
172 }
173
CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor * > & inputs,const std::vector<lite::Tensor * > & outputs,OpParameter * opParameter,const lite::InnerContext * ctx,const kernel::KernelKey & desc)174 kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
175 const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
176 const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
177 MS_ASSERT(opParameter != nullptr);
178 MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
179
180 auto *kernel = new (std::nothrow) ConvolutionTrainCPUKernel(opParameter, inputs, outputs, ctx);
181 if (kernel == nullptr) {
182 MS_LOG(ERROR) << "new ConvolutionTrainCPUKernel failed!";
183 free(opParameter);
184 return nullptr;
185 }
186 return kernel;
187 }
188 } // namespace mindspore::kernel
189