• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifdef ENABLE_ARM
18 #include "src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h"
19 #include "include/errorcode.h"
20 #include "nnacl/fp16/pack_fp16.h"
21 #include "nnacl/fp16/conv_depthwise_fp16.h"
22 
23 using mindspore::lite::RET_ERROR;
24 using mindspore::lite::RET_INFER_INVALID;
25 using mindspore::lite::RET_MEMORY_FAILED;
26 using mindspore::lite::RET_OK;
27 
28 namespace mindspore::kernel {
PackWeight()29 void ConvolutionDepthwise3x3Fp16CPUKernel::PackWeight() {
30   auto weight_tensor = in_tensors_.at(kWeightIndex);
31   int channel = weight_tensor->Batch();
32   void *origin_weight = (op_parameter_->is_train_session_) ? weight_tensor->data() : origin_weight_;
33   MS_ASSERT(origin_weight != nullptr);
34   PackWeightConvDw3x3Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
35                           channel);
36 }
37 
MallocWeightBiasData()38 int ConvolutionDepthwise3x3Fp16CPUKernel::MallocWeightBiasData() {
39   auto weight_tensor = in_tensors_.at(kWeightIndex);
40   int channel = weight_tensor->Batch();
41   int c8 = UP_ROUND(channel, C8NUM);
42   int pack_weight_size = c8 * C12NUM;
43   if (!op_parameter_->is_train_session_) {
44     if (packed_weight_ == nullptr) {
45       packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
46       if (packed_weight_ == nullptr) {
47         packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
48         if (packed_weight_ == nullptr) {
49           MS_LOG(ERROR) << "Malloc buffer failed.";
50           return RET_ERROR;
51         }
52       }
53     }
54   }
55   if (bias_data_ == nullptr) {
56     bias_data_ = malloc(c8 * sizeof(float16_t));
57     if (bias_data_ == nullptr) {
58       MS_LOG(ERROR) << "Malloc buffer failed.";
59       return RET_ERROR;
60     }
61   }
62   memset(bias_data_, 0, c8 * sizeof(float16_t));
63   return RET_OK;
64 }
65 
Init()66 int ConvolutionDepthwise3x3Fp16CPUKernel::Init() {
67   UpdateOriginWeightAndBias();
68   if (op_parameter_->is_train_session_) {
69     auto weight_tensor = in_tensors_.at(kWeightIndex);
70     CHECK_NULL_RETURN(weight_tensor);
71     int channel = weight_tensor->Batch();
72     int c8 = UP_ROUND(channel, C8NUM);
73     int pack_weight_size = c8 * C12NUM;
74     set_workspace_size(pack_weight_size * sizeof(float16_t));
75   }
76   auto ret = InitConvWeightBias();
77   if (ret != 0) {
78     MS_LOG(ERROR) << "Convolution depthwise 3x3 fp16 InitConvWeightBias failed.";
79     return RET_ERROR;
80   }
81   if (!InferShapeDone()) {
82     return RET_OK;
83   }
84   return ReSize();
85 }
86 
ReSize()87 int ConvolutionDepthwise3x3Fp16CPUKernel::ReSize() {
88   auto ret = ConvolutionBaseCPUKernel::Init();
89   if (ret != RET_OK) {
90     MS_LOG(ERROR) << "ConvolutionBaseCPUKernel::Init() failed!";
91     return ret;
92   }
93   conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_);
94   return RET_OK;
95 }
96 
DoExecute(int task_id)97 int ConvolutionDepthwise3x3Fp16CPUKernel::DoExecute(int task_id) {
98   int units = UP_DIV(conv_param_->output_w_, C2NUM);  // F(2, 3) contains 2 conv units
99   int c8 = UP_ROUND(conv_param_->input_channel_, C8NUM);
100   auto buffer = buffer_ + C12NUM * c8 * units * task_id;
101   int step_oh = UP_DIV(conv_param_->output_h_, conv_param_->thread_num_);
102   int start_oh = step_oh * task_id;
103   int end_oh = MSMIN(start_oh + step_oh, conv_param_->output_h_);
104   ConvDw3x3Fp16(output_ptr_, buffer, input_ptr_, reinterpret_cast<float16_t *>(packed_weight_),
105                 reinterpret_cast<float16_t *>(bias_data_), conv_param_, start_oh, end_oh);
106   return RET_OK;
107 }
108 
ConvDw3x3Fp16Run(void * cdata,int task_id,float lhs_scale,float rhs_scale)109 int ConvDw3x3Fp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
110   auto conv_dw = reinterpret_cast<ConvolutionDepthwise3x3Fp16CPUKernel *>(cdata);
111   auto ret = conv_dw->DoExecute(task_id);
112   if (ret != RET_OK) {
113     MS_LOG(ERROR) << "ConvolutionDepthwise3x3Run error task_id[" << task_id << "] error_code[" << ret << "]";
114     return RET_ERROR;
115   }
116   return RET_OK;
117 }
118 
Run()119 int ConvolutionDepthwise3x3Fp16CPUKernel::Run() {
120   if (RepackWeight() != RET_OK) {
121     MS_LOG(ERROR) << "Repack weight failed.";
122     return RET_ERROR;
123   }
124 
125   int units = UP_DIV(conv_param_->output_w_, C2NUM);  // F(2, 3) contains 2 conv units
126   int c8 = UP_ROUND(conv_param_->input_channel_, C8NUM);
127   int buffer_size = units * c8 * C12NUM * conv_param_->thread_num_;
128   buffer_ = reinterpret_cast<float16_t *>(ctx_->allocator->Malloc(buffer_size * sizeof(float16_t)));
129   if (buffer_ == nullptr) {
130     MS_LOG(ERROR) << "ConvDw3x3Fp16Run failed to allocate buffer";
131     return RET_MEMORY_FAILED;
132   }
133 
134   auto input_tensor = in_tensors_.at(kInputIndex);
135   CHECK_NULL_RETURN(input_tensor);
136   input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data());
137   CHECK_NULL_RETURN(input_ptr_);
138   auto output_tensor = out_tensors_.at(kOutputIndex);
139   CHECK_NULL_RETURN(output_tensor);
140   output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data());
141   CHECK_NULL_RETURN(output_ptr_);
142   auto ret = ParallelLaunch(this->ms_context_, ConvDw3x3Fp16Run, this, conv_param_->thread_num_);
143   ctx_->allocator->Free(buffer_);
144   if (ret != RET_OK) {
145     MS_LOG(ERROR) << "ConvDw3x3Run error: error_code[" << ret << "]";
146     return RET_ERROR;
147   }
148   return RET_OK;
149 }
150 }  // namespace mindspore::kernel
151 #endif
152