• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifdef ENABLE_AVX
17 #include "src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h"
18 #include "include/errorcode.h"
19 
20 using mindspore::lite::RET_ERROR;
21 using mindspore::lite::RET_INFER_INVALID;
22 using mindspore::lite::RET_NULL_PTR;
23 using mindspore::lite::RET_OK;
24 
25 namespace mindspore::kernel {
~ConvolutionDepthwiseSWCPUKernelX86()26 ConvolutionDepthwiseSWCPUKernelX86::~ConvolutionDepthwiseSWCPUKernelX86() {
27   if (sliding_ != nullptr) {
28     delete sliding_;
29     sliding_ = nullptr;
30   }
31 }
32 
InitPackedInputOutput()33 int ConvolutionDepthwiseSWCPUKernelX86::InitPackedInputOutput() {
34   CHECK_NULL_RETURN(conv_param_);
35   if (conv_param_->input_channel_ % oc_tile_ != 0) {
36     input_need_align_ = true;
37     int ic_algin = UP_DIV(conv_param_->input_channel_, oc_tile_);
38     int pack_input_size =
39       conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * oc_tile_ * ic_algin;
40     packed_input_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(pack_input_size * sizeof(float)));
41     if (packed_input_ == nullptr) {
42       MS_LOG(ERROR) << "Malloc packed_input_ buffer is failed.";
43       return RET_NULL_PTR;
44     }
45   }
46   if (conv_param_->output_channel_ % oc_tile_ != 0) {
47     output_need_align_ = true;
48     int oc_algin = UP_DIV(conv_param_->output_channel_, oc_tile_);
49     int pack_output_size =
50       conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * oc_tile_ * oc_algin;
51     packed_output_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(pack_output_size * sizeof(float)));
52     if (packed_output_ == nullptr) {
53       MS_LOG(ERROR) << "Malloc packed_output_ buffer is failed.";
54       return RET_NULL_PTR;
55     }
56   }
57   return RET_OK;
58 }
59 
Init()60 int ConvolutionDepthwiseSWCPUKernelX86::Init() {
61   CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
62   CHECK_LESS_RETURN(out_tensors_.size(), 1);
63 #ifdef ENABLE_AVX
64   oc_tile_ = C8NUM;
65 #endif
66   if (op_parameter_->is_train_session_) {
67     auto weight_tensor = in_tensors_.at(kWeightIndex);
68     int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
69     int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width();
70     set_workspace_size(pack_weight_size * sizeof(float));
71   }
72   sliding_ = new (std::nothrow) SlidingWindowParam;
73   if (sliding_ == nullptr) {
74     MS_LOG(ERROR) << "new sliding window param failed.";
75     return RET_ERROR;
76   }
77 
78   auto ret = InitConvWeightBias();
79   if (ret != 0) {
80     MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed.";
81     return RET_ERROR;
82   }
83   if (!InferShapeDone()) {
84     return RET_OK;
85   }
86   return ReSize();
87 }
88 
ReSize()89 int ConvolutionDepthwiseSWCPUKernelX86::ReSize() {
90   ConvolutionBaseCPUKernel::Init();
91   InitSlidingParamConvDw(sliding_, conv_param_, oc_tile_);
92   return RET_OK;
93 }
94 
Execute(int task_id)95 int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
96   DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
97                      reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
98   return RET_OK;
99 }
100 
ConvDwSWAvxRun(void * cdata,int task_id,float lhs_scale,float rhs_scale)101 int ConvDwSWAvxRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
102   auto conv_dw = reinterpret_cast<ConvolutionDepthwiseSWCPUKernelX86 *>(cdata);
103   auto ret = conv_dw->Execute(task_id);
104   if (ret != RET_OK) {
105     MS_LOG(ERROR) << "ConvolutionDepthwiseSWRun in x86 error task_id[" << task_id << "] error_code[" << ret << "]";
106     return RET_ERROR;
107   }
108   return RET_OK;
109 }
110 
Run()111 int ConvolutionDepthwiseSWCPUKernelX86::Run() {
112   auto ret = InitPackedInputOutput();
113   if (ret != 0) {
114     MS_LOG(ERROR) << "Convolution depthwise x86 fp32 InitPackedInputOutput failed.";
115     FreePackedInputOutput();
116     return RET_ERROR;
117   }
118   if (RepackWeight() != RET_OK) {
119     MS_LOG(ERROR) << "Repack weight failed.";
120     return RET_ERROR;
121   }
122   auto input_tensor = in_tensors_.at(kInputIndex);
123   CHECK_NULL_RETURN(input_tensor);
124   auto input_ptr = reinterpret_cast<float *>(input_tensor->data());
125   CHECK_NULL_RETURN(input_ptr);
126 
127   if (input_need_align_) {
128     PackNHWCToNHWCXFp32(input_ptr, packed_input_, conv_param_->input_batch_,
129                         conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_, oc_tile_);
130   } else {
131     packed_input_ = input_ptr;
132   }
133 
134   auto output_tensor = out_tensors_.at(kOutputIndex);
135   CHECK_NULL_RETURN(output_tensor);
136   auto output_ptr = reinterpret_cast<float *>(output_tensor->data());
137   CHECK_NULL_RETURN(output_ptr);
138 
139   if (!output_need_align_) {
140     packed_output_ = output_ptr;
141   }
142 
143   ret = ParallelLaunch(this->ms_context_, ConvDwSWAvxRun, this, conv_param_->thread_num_);
144   if (ret != RET_OK) {
145     MS_LOG(ERROR) << "ConvDwSWAvxRun error: error_code[" << ret << "]";
146   }
147 
148   if (output_need_align_) {
149     PackNHWCXToNHWCFp32(packed_output_, output_ptr, conv_param_->output_batch_,
150                         conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_, oc_tile_);
151   }
152   FreePackedInputOutput();
153   return ret;
154 }
155 
FreePackedInputOutput()156 void ConvolutionDepthwiseSWCPUKernelX86::FreePackedInputOutput() {
157   if (input_need_align_) {
158     ms_context_->allocator->Free(packed_input_);
159     packed_input_ = nullptr;
160   }
161   if (output_need_align_) {
162     ms_context_->allocator->Free(packed_output_);
163     packed_output_ = nullptr;
164   }
165 }
166 
PackWeight()167 void ConvolutionDepthwiseSWCPUKernelX86::PackWeight() {
168   auto weight_tensor = in_tensors_.at(kWeightIndex);
169   int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
170   void *origin_weight = IsTrainable() ? weight_tensor->data() : origin_weight_;
171   MS_ASSERT(origin_weight != nullptr);
172   PackNHWCToNXHWCXFp32(weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch(), oc_algin,
173                        weight_tensor->Channel(), reinterpret_cast<float *>(packed_weight_),
174                        reinterpret_cast<float *>(origin_weight));
175 }
176 
MallocWeightBiasData()177 int ConvolutionDepthwiseSWCPUKernelX86::MallocWeightBiasData() {
178   auto weight_tensor = in_tensors_.at(kWeightIndex);
179   int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
180   int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width();
181   if (!op_parameter_->is_train_session_) {
182     packed_weight_ = malloc(pack_weight_size * sizeof(float));
183     if (packed_weight_ == nullptr) {
184       MS_LOG(ERROR) << "Malloc packed_weight_ is failed!";
185       return RET_NULL_PTR;
186     }
187   }
188 
189   if (in_tensors_.size() == kInputSize2) {
190     auto bias_size = oc_algin * oc_tile_;
191     bias_data_ = malloc(bias_size * sizeof(float));
192     if (bias_data_ == nullptr) {
193       MS_LOG(ERROR) << "Malloc bias_data buffer failed.";
194       return RET_NULL_PTR;
195     }
196     memset(bias_data_, 0, bias_size * sizeof(float));
197   }
198   return RET_OK;
199 }
200 }  // namespace mindspore::kernel
201 #endif
202