• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/fp32_grad/convolution.h"
18 #include "nnacl/fp32_grad/pack_ext.h"
19 #include "nnacl/fp32_grad/gemm.h"
20 #include "include/errorcode.h"
21 #include "nnacl/pack.h"
22 
23 using mindspore::kernel::KERNEL_ARCH;
24 using mindspore::lite::RET_ERROR;
25 using mindspore::lite::RET_OK;
26 
27 namespace mindspore::kernel {
ReSize()28 int ConvolutionTrainCPUKernel::ReSize() {
29   if (in_tensors_.size() < 2) {
30     MS_LOG(ERROR) << "Convolution should have at least two inputs";
31     return RET_ERROR;
32   }
33   if (out_tensors_.size() != 1) {
34     MS_LOG(ERROR) << "Convolution should have one output";
35     return RET_ERROR;
36   }
37   auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
38   auto *input_x = in_tensors_.at(kInputIndex);
39   auto *input_weight = in_tensors_.at(kWeightIndex);
40   auto *out_y = out_tensors_.at(kOutputIndex);
41 
42   conv_param_->output_batch_ = out_y->shape().at(kNHWC_N);
43   conv_param_->input_batch_ = input_x->shape().at(kNHWC_N);
44   conv_param_->input_h_ = input_x->shape().at(kNHWC_H);
45   conv_param_->input_w_ = input_x->shape().at(kNHWC_W);
46   conv_param_->output_h_ = out_y->shape().at(kNHWC_H);
47   conv_param_->output_w_ = out_y->shape().at(kNHWC_W);
48   conv_param_->input_channel_ = input_x->shape().at(kNHWC_C);
49   conv_param_->output_channel_ = input_weight->shape().at(kNHWC_N);
50   conv_param_->kernel_h_ = input_weight->shape().at(kNHWC_H);
51   conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W);
52 
53   conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_;
54   const int n = conv_param_->output_channel_ * conv_param_->group_;
55   const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_;
56 
57   do_img2col_ = (conv_param_->kernel_h_ == 1) && (conv_param_->kernel_w_ == 1) && (conv_param_->pad_d_ == 0) &&
58                     (conv_param_->pad_u_ == 0) && (conv_param_->pad_l_ == 0) && (conv_param_->pad_r_ == 0) &&
59                     (conv_param_->dilation_h_ == 1) && (conv_param_->dilation_w_ == 1) &&
60                     (conv_param_->stride_h_ == 1) && (conv_param_->stride_w_ == 1) && (conv_param_->group_ == 1)
61                   ? false
62                   : true;
63   do_dw_ = (conv_param_->output_channel_ == conv_param_->group_) &&
64                (conv_param_->input_channel_ == conv_param_->output_channel_) && (conv_param_->dilation_h_ == 1) &&
65                (conv_param_->dilation_w_ == 1)
66              ? true
67              : false;
68 
69   ws_size_ = chunk_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_;
70   ws_size_ = do_dw_ ? ws_size_ : ws_size_ / conv_param_->group_;
71   int mat_alloc = MatSizeTotal(chunk_, n, k, 0);
72   set_workspace_size((ws_size_ + mat_alloc) * sizeof(float));
73 
74   return RET_OK;
75 }
76 
Init()77 int ConvolutionTrainCPUKernel::Init() { return ReSize(); }
78 
Execute(int task_id)79 int ConvolutionTrainCPUKernel::Execute(int task_id) {
80   auto conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
81   auto *input_x = in_tensors_.at(kInputIndex);
82   auto *input_w = in_tensors_.at(kWeightIndex);
83   auto *out_y = out_tensors_.at(kOutputIndex);
84 
85   auto x_addr = reinterpret_cast<float *>(input_x->MutableData());
86   auto y_addr = reinterpret_cast<float *>(out_y->MutableData());
87   auto w_addr = reinterpret_cast<float *>(input_w->MutableData());
88 
89   const int nweights = input_w->ElementsNum();
90   const int in_ch = conv_param_->input_channel_;
91   const int in_h = conv_param_->input_h_;
92   const int in_w = conv_param_->input_w_;
93   const int k_h = conv_param_->kernel_h_;
94   const int k_w = conv_param_->kernel_w_;
95   const int batch = conv_param_->output_batch_;
96   const int out_ch = conv_param_->output_channel_;
97   const int groups = conv_param_->group_;
98   const int out_h = conv_param_->output_h_;
99   const int out_w = conv_param_->output_w_;
100   const int m = out_h * out_w;
101   const int n = out_ch / groups;
102   const int k = k_h * k_w * in_ch / groups;
103   float *workspace_temp = static_cast<float *>(workspace());
104   float *mat_workspace = workspace_temp + ws_size_;
105   int real_chunk;
106   float *mat_a = nullptr;
107   float *im = nullptr;
108   const float *mat_b = nullptr;
109   float *mat_c = nullptr;
110   if (do_dw_) {
111     const int kernel_spatial = k_h * k_w;
112     for (int i = 0; i < batch; ++i) {
113       for (int ci = 0; ci < m; ci += chunk_) {
114         real_chunk = MSMIN(m - ci, chunk_);
115         mat_a = workspace_temp;
116         im = x_addr + (i * in_ch * in_h * in_w);
117         RollingIm2ColPackDwUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
118         for (int j = 0; j < groups; ++j) {
119           mat_b = w_addr + j * nweights / groups;
120           mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
121           GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a + (j * kernel_spatial), k * groups, mat_b, k, 0, mat_c, out_ch,
122                      mat_workspace);
123         }
124       }
125     }
126   } else if (do_img2col_) {
127     for (int i = 0; i < batch; ++i) {
128       for (int j = 0; j < groups; ++j) {
129         for (int ci = 0; ci < m; ci += chunk_) {
130           real_chunk = MSMIN(m - ci, chunk_);
131           mat_a = workspace_temp;
132           mat_b = w_addr + j * nweights / groups;
133           mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch;
134           im = x_addr + i * in_ch * in_h * in_w + j * (in_ch / groups);
135           RollingIm2ColPackUnitFp32(im, conv_param_, mat_a, real_chunk, ci);
136           GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
137         }
138       }
139     }
140   } else {
141     mat_b = w_addr;
142     const size_t in_plane_size = in_ch * in_h * in_w;
143     for (int i = 0; i < batch; ++i) {
144       im = x_addr + i * in_plane_size;
145       for (int ci = 0; ci < m; ci += chunk_) {
146         real_chunk = MSMIN(m - ci, chunk_);
147         mat_c = y_addr + i * n * m + ci * out_ch;
148         int input_height = ci / out_w * conv_param_->stride_h_;
149         int input_width = ci % out_w * conv_param_->stride_w_;
150         int offset = (input_height * in_w + input_width) * in_ch;
151         GemmMatmul(0, 1, real_chunk, n, k, 1, im + offset, k, mat_b, k, 0, mat_c, out_ch, mat_workspace);
152       }
153     }
154   }
155   return RET_OK;
156 }
157 
ConvolutionTrainRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)158 int ConvolutionTrainRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
159   MS_ASSERT(cdata != nullptr);
160   auto conv_kernel = reinterpret_cast<ConvolutionTrainCPUKernel *>(cdata);
161   auto error_code = conv_kernel->Execute(task_id);
162   if (error_code != RET_OK) {
163     MS_LOG(ERROR) << "ConvolutionTrainRun error task_id[" << task_id << "] error_code[" << error_code << "]";
164     return RET_ERROR;
165   }
166   return RET_OK;
167 }
168 
Run()169 int ConvolutionTrainCPUKernel::Run() {
170   int error_code = ParallelLaunch(this->ms_context_, ConvolutionTrainRun, this, 1);
171   if (error_code != RET_OK) {
172     MS_LOG(ERROR) << "conv train function error error_code[" << error_code << "]";
173     return RET_ERROR;
174   }
175   return RET_OK;
176 }
177 
CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor * > & inputs,const std::vector<lite::Tensor * > & outputs,OpParameter * opParameter,const lite::Context * ctx,const kernel::KernelKey & desc)178 kernel::InnerKernel *CpuConvTrainFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
179                                                    const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
180                                                    const lite::Context *ctx, const kernel::KernelKey &desc) {
181   MS_ASSERT(opParameter != nullptr);
182   MS_ASSERT(desc.type == schema::PrimitiveType_Conv2DFusion);
183 
184   auto *kernel = new (std::nothrow)
185     ConvolutionTrainCPUKernel(opParameter, inputs, outputs, static_cast<const lite::InnerContext *>(ctx));
186   if (kernel == nullptr) {
187     MS_LOG(ERROR) << "new ConvolutionTrainCPUKernel failed!";
188     free(opParameter);
189     return nullptr;
190   }
191   return kernel;
192 }
193 }  // namespace mindspore::kernel
194