• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/runtime/kernel/arm/fp16/convolution_fp16.h"
18 #include <vector>
19 #include "include/errorcode.h"
20 #include "nnacl/fp16/conv_fp16.h"
21 #include "nnacl/fp16/matmul_fp16.h"
22 #include "nnacl/fp16/cast_fp16.h"
23 #include "nnacl/fp16/pack_fp16.h"
24 #include "nnacl/fp16/winograd_utils_fp16.h"
25 
26 using mindspore::lite::RET_ERROR;
27 using mindspore::lite::RET_OK;
28 
29 namespace mindspore::kernel {
PackWeight()30 void ConvolutionFP16CPUKernel::PackWeight() {
31   auto filter_tensor = in_tensors_.at(kWeightIndex);
32   int in_channel = filter_tensor->Channel();
33   int out_channel = filter_tensor->Batch();
34   int kernel_plane = filter_tensor->Height() * filter_tensor->Width();
35   void *weight_origin = (op_parameter_->is_train_session_) ? filter_tensor->data() : origin_weight_;
36   MS_ASSERT(weight_origin != nullptr);
37   RowMajor2Col8MajorFp16(weight_origin, reinterpret_cast<float16_t *>(packed_weight_), out_channel,
38                          in_channel * kernel_plane, false);
39 }
40 
MallocWeightBiasData()41 int ConvolutionFP16CPUKernel::MallocWeightBiasData() {
42   auto filter_tensor = in_tensors_.at(kWeightIndex);
43   int in_channel = filter_tensor->Channel();
44   int out_channel = filter_tensor->Batch();
45   conv_param_->input_channel_ = in_channel;
46   conv_param_->output_channel_ = out_channel;
47   int oc8 = UP_ROUND(out_channel, col_tile_);
48   int kernel_plane = filter_tensor->Height() * filter_tensor->Width();
49   int pack_weight_size = oc8 * in_channel * kernel_plane;
50 
51   // init weight
52   if (!op_parameter_->is_train_session_) {
53     if (packed_weight_ == nullptr) {
54       packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
55       if (packed_weight_ == nullptr) {
56         packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
57         if (packed_weight_ == nullptr) {
58           MS_LOG(ERROR) << "malloc packed_weight_ failed.";
59           return RET_ERROR;
60         }
61       }
62     }
63     memset(packed_weight_, 0, pack_weight_size * sizeof(float16_t));
64   }
65   // init bias
66   if (bias_data_ == nullptr) {
67     bias_data_ = malloc(oc8 * sizeof(float16_t));
68     if (bias_data_ == nullptr) {
69       MS_LOG(ERROR) << "malloc bias_data_ failed.";
70       return RET_ERROR;
71     }
72   }
73   memset(bias_data_, 0, oc8 * sizeof(float16_t));
74   return RET_OK;
75 }
76 
InitTmpBuffer()77 int ConvolutionFP16CPUKernel::InitTmpBuffer() {
78   int unit_size =
79     conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ * row_tile_ * thread_count_;
80 
81   packed_input_ = reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(unit_size * sizeof(float16_t)));
82   if (packed_input_ == nullptr) {
83     MS_LOG(ERROR) << "malloc packed_input_ failed.";
84     return RET_ERROR;
85   }
86 
87   col_major_input_ = reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(unit_size * sizeof(float16_t)));
88   if (col_major_input_ == nullptr) {
89     MS_LOG(ERROR) << "malloc col_major_input_ failed.";
90     return RET_ERROR;
91   }
92   return RET_OK;
93 }
94 
Init()95 int ConvolutionFP16CPUKernel::Init() {
96   CHECK_LESS_RETURN(in_tensors_.size(), 2);
97   CHECK_LESS_RETURN(out_tensors_.size(), 1);
98   UpdateOriginWeightAndBias();
99   if (op_parameter_->is_train_session_) {
100     auto filter_tensor = in_tensors_.at(kWeightIndex);
101     CHECK_NULL_RETURN(filter_tensor);
102     int in_channel = filter_tensor->Channel();
103     int out_channel = filter_tensor->Batch();
104     int oc8 = UP_ROUND(out_channel, col_tile_);
105     int kernel_plane = filter_tensor->Height() * filter_tensor->Width();
106     int pack_weight_size = oc8 * in_channel * kernel_plane;
107     set_workspace_size(pack_weight_size * sizeof(float16_t));
108   }
109 #ifdef ENABLE_ARM64
110   row_tile_ = C16NUM;
111 #else
112   row_tile_ = C12NUM;
113 #endif
114   col_tile_ = C8NUM;
115   auto ret = InitConvWeightBias();
116   if (ret != RET_OK) {
117     MS_LOG(ERROR) << "Init weight bias failed.";
118     return RET_ERROR;
119   }
120   return RET_OK;
121 }
122 
AdjustNumberOfThread()123 int ConvolutionFP16CPUKernel::AdjustNumberOfThread() {
124   auto out_tensor = out_tensors_.front();
125   CHECK_NULL_RETURN(out_tensor);
126   int out_plane = out_tensor->Height() * out_tensor->Width();
127   thread_count_ = MSMIN(op_parameter_->thread_num_, UP_DIV(out_plane, row_tile_));
128   conv_param_->thread_num_ = thread_count_;
129   return RET_OK;
130 }
131 
ReSize()132 int ConvolutionFP16CPUKernel::ReSize() {
133   auto ret = ConvolutionBaseCPUKernel::CheckResizeValid();
134   if (ret != RET_OK) {
135     MS_LOG(ERROR) << "Resize is invalid.";
136     return ret;
137   }
138   ret = ConvolutionBaseCPUKernel::Init();
139   if (ret != RET_OK) {
140     MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret;
141     return ret;
142   }
143   return RET_OK;
144 }
145 
RunImpl(int task_id)146 int ConvolutionFP16CPUKernel::RunImpl(int task_id) {
147   auto input_tensor = in_tensors_[0];
148   auto output_tensor = out_tensors_[0];
149   MS_ASSERT(input_tensor != nullptr);
150   MS_ASSERT(output_tensor != nullptr);
151   auto input_ptr = reinterpret_cast<float16_t *>(input_tensor->data());
152   auto output_ptr = reinterpret_cast<float16_t *>(output_tensor->data());
153   if (output_tensor->format() == NC4HW4) {
154     ConvOutNc8hw8Fp16(input_ptr, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
155                       reinterpret_cast<float16_t *>(bias_data_), col_major_input_, output_ptr, task_id, conv_param_);
156   } else {
157     ConvFp16(input_ptr, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
158              reinterpret_cast<float16_t *>(bias_data_), col_major_input_, output_ptr, task_id, conv_param_);
159   }
160   return RET_OK;
161 }
162 
ConvolutionFp16Impl(void * cdata,int task_id,float lhs_scale,float rhs_scale)163 static int ConvolutionFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
164   auto conv = reinterpret_cast<ConvolutionFP16CPUKernel *>(cdata);
165   auto error_code = conv->RunImpl(task_id);
166   if (error_code != RET_OK) {
167     MS_LOG(ERROR) << "ConvolutionFp16 Run error task_id[" << task_id << "] error_code[" << error_code << "]";
168     return RET_ERROR;
169   }
170   return RET_OK;
171 }
172 
Run()173 int ConvolutionFP16CPUKernel::Run() {
174   auto ret = InitTmpBuffer();
175   if (ret != RET_OK) {
176     MS_LOG(ERROR) << "Init tmp buffer failed.";
177     FreeTmpBuffer();
178     return RET_ERROR;
179   }
180   if (RepackWeight() != RET_OK) {
181     MS_LOG(ERROR) << "Repack weight failed.";
182     return RET_ERROR;
183   }
184   ret = ParallelLaunch(this->ms_context_, ConvolutionFp16Impl, this, thread_count_);
185   if (ret != RET_OK) {
186     MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]";
187   }
188 
189   FreeTmpBuffer();
190   return ret;
191 }
192 }  // namespace mindspore::kernel
193