• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/litert/kernel/cpu/fp32_grad/convolution.h"
18 #include "nnacl/fp32_grad/pack_ext.h"
19 #include "nnacl/fp32_grad/gemm.h"
20 #include "include/errorcode.h"
21 #include "nnacl/pack.h"
22 
23 using mindspore::kernel::KERNEL_ARCH;
24 using mindspore::lite::RET_ERROR;
25 using mindspore::lite::RET_OK;
26 
27 namespace mindspore::kernel {
ReSize()28 int ConvolutionTrainCPUKernel::ReSize() {
29   if (in_tensors_.size() < 2) {
30     MS_LOG(ERROR) << "Convolution should have at least two inputs";
31     return RET_ERROR;
32   }
33   if (out_tensors_.size() != 1) {
34     MS_LOG(ERROR) << "Convolution should have one output";
35     return RET_ERROR;
36   }
37   auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
38   auto *input_x = in_tensors_.at(kInputIndex);
39   auto *input_weight = in_tensors_.at(kWeightIndex);
40   auto *out_y = out_tensors_.at(kOutputIndex);
41 
42   conv_param_->output_batch_ = out_y->shape().at(kNHWC_N);
43   conv_param_->input_batch_ = input_x->shape().at(kNHWC_N);
44   conv_param_->input_h_ = input_x->shape().at(kNHWC_H);
45   conv_param_->input_w_ = input_x->shape().at(kNHWC_W);
46   conv_param_->output_h_ = out_y->shape().at(kNHWC_H);
47   conv_param_->output_w_ = out_y->shape().at(kNHWC_W);
48   conv_param_->input_channel_ = input_x->shape().at(kNHWC_C);
49   conv_param_->output_channel_ = input_weight->shape().at(kNHWC_N);
50   conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H);
51   conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);
52 
53   conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_;
54   const int n = conv_param_->output_channel_ * conv_param_->group_;
55   const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_;
56 
57   do_img2col_ = !((conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
58                   (conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
59                   (conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) && (conv_param_->stride_h_ == 1) &&
60                   (conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1));
61   do_dw_ = (conv_param_->output_channel_ == conv_param_->group_) &&
62            (conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
63            (conv_param_->dilation_w_ == 1);
64 
65   ws_size_ = chunk_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_;
66   ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param_->group_;
67   int mat_alloc = MatSizeTotal(chunk_, n, k, 0);
68   set_workspace_size(static_cast<size_t>(ws_size_ + mat_alloc) * sizeof(float));
69 
70   return RET_OK;
71 }
72 
Prepare()73 int ConvolutionTrainCPUKernel::Prepare() { return ReSize(); }
74 
DoExecute(int task_id)75 int ConvolutionTrainCPUKernel::DoExecute(int task_id) {
76   auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
77   auto *input_x = in_tensors_.at(kInputIndex);
78   auto *input_w = in_tensors_.at(kWeightIndex);
79   auto *out_y = out_tensors_.at(kOutputIndex);
80 
81   auto x_addr = reinterpret_cast<float *>(input_x->MutableData());
82   auto y_addr = reinterpret_cast<float *>(out_y->MutableData());
83   auto w_addr = reinterpret_cast<float *>(input_w->MutableData());
84 
85   const int nweights = input_w->ElementsNum();
86   const int in_ch = conv_param_->input_channel_;
87   const int in_h = conv_param_->input_h_;
88   const int in_w = conv_param_->input_w_;
89   const int k_h = conv_param_->kernel_h_;
90   const int k_w = conv_param_->kernel_w_;
91   const int batch = conv_param_->output_batch_;
92   const int out_ch = conv_param_->output_channel_;
93   const int groups = conv_param_->group_;
94   const int out_h = conv_param_->output_h_;
95   const int out_w = conv_param_->output_w_;
96   const int m = out_h * out_w;
97   const int n = out_ch / groups;
98   const int k = k_h * k_w * in_ch / groups;
99   float *workspace_temp = static_cast<float *>(workspace());
100   float *mat_workspace = workspace_temp + ws_size_;
101   int real_chunk;
102   float *mat_a = nullptr;
103   float *im = nullptr;
104   const float *mat_b = nullptr;
105   float *mat_c = nullptr;
106   if (do_dw_) {
107     const int kernel_spatial = k_h * k_w;
108     for (int i = 0; i < batch; ++i) {
109       for (int ci = 0; ci < m; ci += chunk_) {
110         real_chunk = MSMIN(m - ci, chunk_);
111         mat_a = workspace_temp;
112         im = x_addr + (i * in_ch * in_h * in_w);
113         RollingIm2ColPackDwUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
114         for (int j = 0; j < groups; ++j) {
115           mat_b = w_addr + j * nweights / groups;
116           mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
117           GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a + (j * kernel_spatial), k * groups, mat_b, k, 0, mat_c, out_ch,
118                      mat_workspace);
119         }
120       }
121     }
122   } else if (do_img2col_) {
123     for (int i = 0; i < batch; ++i) {
124       for (int j = 0; j < groups; ++j) {
125         for (int ci = 0; ci < m; ci += chunk_) {
126           real_chunk = MSMIN(m - ci, chunk_);
127           mat_a = workspace_temp;
128           mat_b = w_addr + j * nweights / groups;
129           mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
130           im = x_addr + i * in_ch * in_h * in_w + j * (in_ch / groups);
131           RollingIm2ColPackUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
132           GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
133         }
134       }
135     }
136   } else {
137     mat_b = w_addr;
138     const int in_plane_size = in_ch * in_h * in_w;
139     for (int i = 0; i < batch; ++i) {
140       im = x_addr + i * in_plane_size;
141       for (int ci = 0; ci < m; ci += chunk_) {
142         real_chunk = MSMIN(m - ci, chunk_);
143         mat_c = y_addr + i * n * m + ci * out_ch;
144         int input_height = ci / out_w * conv_param_->stride_h_;
145         int input_width = ci % out_w * conv_param_->stride_w_;
146         int offset = (input_height * in_w + input_width) * in_ch;
147         GemmMatmul(0, 1, real_chunk, n, k, 1, im + offset, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
148       }
149     }
150   }
151   return RET_OK;
152 }
153 
ConvolutionTrainRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)154 int ConvolutionTrainRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
155   MS_ASSERT(cdata != nullptr);
156   auto conv_kernel = reinterpret_cast<ConvolutionTrainCPUKernel *>(cdata);
157   auto error_code = conv_kernel->DoExecute(task_id);
158   if (error_code != RET_OK) {
159     MS_LOG(ERROR) << "ConvolutionTrainRun error task_id[" << task_id << "] error_code[" << error_code << "]";
160     return RET_ERROR;
161   }
162   return RET_OK;
163 }
164 
Run()165 int ConvolutionTrainCPUKernel::Run() {
166   int error_code = ParallelLaunch(this->ms_context_, ConvolutionTrainRun, this, 1);
167   if (error_code != RET_OK) {
168     MS_LOG(ERROR) << "conv train function error error_code[" << error_code << "]";
169     return RET_ERROR;
170   }
171   return RET_OK;
172 }
173 
CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor * > & inputs,const std::vector<lite::Tensor * > & outputs,OpParameter * opParameter,const lite::InnerContext * ctx,const kernel::KernelKey & desc)174 kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
175                                                   const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
176                                                   const lite::InnerContext *ctx, const kernel::KernelKey &desc) {
177   MS_ASSERT(opParameter != nullptr);
178   MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
179 
180   auto *kernel = new (std::nothrow) ConvolutionTrainCPUKernel(opParameter, inputs, outputs, ctx);
181   if (kernel == nullptr) {
182     MS_LOG(ERROR) << "new ConvolutionTrainCPUKernel failed!";
183     free(opParameter);
184     return nullptr;
185   }
186   return kernel;
187 }
188 }  // namespace mindspore::kernel
189