• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "nnacl/kernel/convolution_depthwise_sw.h"
18 #include "nnacl/kernel/convolution_base.h"
19 #include "nnacl/fp32/conv_depthwise_fp32.h"
20 #include "nnacl/fp32/pack_fp32.h"
21 
ConvDwSWMallocWeightBiasData(ConvolutionBaseStruct * conv)22 int ConvDwSWMallocWeightBiasData(ConvolutionBaseStruct *conv) {
23   int OC4 = UP_DIV(conv->compute_.out_c_, C4NUM);
24   int pack_weight_size = C4NUM * OC4 * conv->compute_.kernel_hw_;
25   if (!conv->base_.train_session_) {
26     NNACL_CHECK_MALLOC_SIZE(pack_weight_size * sizeof(float));
27     conv->packed_weight_ = ConvBaseGetConvPackWeightData(conv, pack_weight_size * sizeof(float));
28     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->packed_weight_);
29   }
30 
31   int malloc_size = NNACL_MAX(conv->compute_.out_c_, C4NUM * OC4);
32   if (conv->bias_data_ == NULL) {
33     NNACL_CHECK_MALLOC_SIZE(malloc_size * sizeof(float));
34     conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, malloc_size * sizeof(float));
35     NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
36   }
37   memset(conv->bias_data_, 0, malloc_size * sizeof(float));
38   conv->base_.thread_nr_ = NNACL_MIN(conv->base_.thread_nr_, OC4);
39   return NNACL_OK;
40 }
41 
ConvDwSWInitPackedInputOutput(ConvolutionDepthwiseSWStruct * conv_dw)42 int ConvDwSWInitPackedInputOutput(ConvolutionDepthwiseSWStruct *conv_dw) {
43   if (conv_dw->conv_.compute_.in_c_ % C4NUM == 0) {
44     conv_dw->need_align_ = false;
45     return NNACL_OK;
46   }
47 
48   conv_dw->need_align_ = true;
49   int IC4 = UP_DIV(conv_dw->conv_.compute_.in_c_, C4NUM);
50   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(conv_dw->conv_.compute_.in_n_, conv_dw->conv_.compute_.in_hw_, NNACL_ERR);
51   int conv_input_bhw = conv_dw->conv_.compute_.in_n_ * conv_dw->conv_.compute_.in_hw_;
52   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(conv_input_bhw, C4NUM * IC4, NNACL_ERR);
53   int pack_input_size = conv_input_bhw * C4NUM * IC4;
54   NNACL_CHECK_MALLOC_SIZE(pack_input_size * sizeof(float));
55   conv_dw->packed_input_ =
56     (float *)conv_dw->conv_.base_.env_->Alloc(conv_dw->conv_.base_.env_->allocator_, pack_input_size * sizeof(float));
57   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->packed_input_);
58 
59   int OC4 = UP_DIV(conv_dw->conv_.compute_.out_c_, C4NUM);
60   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(conv_dw->conv_.compute_.out_n_, conv_dw->conv_.compute_.out_hw_, NNACL_ERR);
61   int output_bhw = conv_dw->conv_.compute_.out_n_ * conv_dw->conv_.compute_.out_hw_;
62   NNACL_CHECK_INT_MUL_NOT_OVERFLOW(output_bhw, C4NUM * OC4, NNACL_ERR);
63   int pack_output_size = output_bhw * C4NUM * OC4;
64   NNACL_CHECK_MALLOC_SIZE(pack_output_size * sizeof(float));
65   conv_dw->packed_output_ =
66     (float *)conv_dw->conv_.base_.env_->Alloc(conv_dw->conv_.base_.env_->allocator_, pack_output_size * sizeof(float));
67   NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->packed_output_);
68   return NNACL_OK;
69 }
70 
ConvDwSWRun(void * cdata,int task_id,float l,float r)71 int ConvDwSWRun(void *cdata, int task_id, float l, float r) {
72   ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)cdata;
73   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
74   ConvParameter *conv_param = (ConvParameter *)conv_dw->conv_.base_.param_;
75   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
76 
77   ConvDwSWFp32(conv_dw->packed_output_, conv_dw->packed_input_, (float *)conv_dw->conv_.packed_weight_,
78                (float *)conv_dw->conv_.bias_data_, conv_param, &conv_dw->sliding_, task_id);
79   return NNACL_OK;
80 }
81 
ConvDwSWFreePackedInputOutput(ConvolutionDepthwiseSWStruct * conv_dw)82 void ConvDwSWFreePackedInputOutput(ConvolutionDepthwiseSWStruct *conv_dw) {
83   if (conv_dw->need_align_) {
84     conv_dw->conv_.base_.env_->Free(conv_dw->conv_.base_.env_->allocator_, conv_dw->packed_input_);
85     conv_dw->packed_input_ = NULL;
86     conv_dw->conv_.base_.env_->Free(conv_dw->conv_.base_.env_->allocator_, conv_dw->packed_output_);
87     conv_dw->packed_output_ = NULL;
88   }
89 }
90 
ConvDwSWPackWeight(ConvolutionBaseStruct * conv)91 void ConvDwSWPackWeight(ConvolutionBaseStruct *conv) {
92   void *origin_weight = (conv->base_.train_session_) ? conv->base_.in_[SECOND_INPUT]->data_ : conv->origin_weight_;
93   NNACL_CHECK_NULL_RETURN_VOID(origin_weight);
94   PackNCHWToNC4HW4Fp32(origin_weight, conv->packed_weight_, 1, conv->compute_.kernel_hw_, conv->compute_.out_c_);
95 }
96 
ConvolutionDepthwiseSWResize(KernelBase * self)97 int ConvolutionDepthwiseSWResize(KernelBase *self) {
98   ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)self;
99   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
100   int ret = ConvBasePrepare(&conv_dw->conv_);
101   if (ret != NNACL_OK) {
102     return ret;
103   }
104 
105   ConvParameter *conv_param = (ConvParameter *)self->param_;
106   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
107   InitSlidingParamConvDw(&conv_dw->sliding_, conv_param, C4NUM);
108 
109   self->thread_nr_ = NNACL_MIN(self->thread_nr_, conv_dw->conv_.compute_.out_h_);
110   NNACL_CHECK_ZERO_RETURN_ERR(self->thread_nr_);
111   return NNACL_OK;
112 }
113 
ConvolutionDepthwiseSWCompute(KernelBase * self)114 int ConvolutionDepthwiseSWCompute(KernelBase *self) {
115   ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)self;
116   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
117 
118   int ret = ConvDwSWInitPackedInputOutput(conv_dw);
119   if (ret != NNACL_OK) {
120     ConvDwSWFreePackedInputOutput(conv_dw);
121     return ret;
122   }
123 
124   ret = ConvBaseRepackWeight(&conv_dw->conv_);
125   if (ret != NNACL_OK) {
126     ConvDwSWFreePackedInputOutput(conv_dw);
127     return ret;
128   }
129 
130   TensorC *input_tensor = self->in_[FIRST_INPUT];
131   NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
132   float *input_ptr = (float *)input_tensor->data_;
133   NNACL_CHECK_NULL_RETURN_ERR(input_ptr);
134   if (conv_dw->need_align_) {
135     PackNHWCToNHWC4Fp32(input_ptr, conv_dw->packed_input_, conv_dw->conv_.compute_.in_n_,
136                         conv_dw->conv_.compute_.in_hw_, conv_dw->conv_.compute_.in_c_);
137   } else {
138     conv_dw->packed_input_ = input_ptr;
139   }
140 
141   TensorC *output_tensor = self->out_[OUTPUT_INDEX];
142   NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
143   float *output_ptr = (float *)output_tensor->data_;
144   NNACL_CHECK_NULL_RETURN_ERR(output_ptr);
145   if (!conv_dw->need_align_) {
146     conv_dw->packed_output_ = output_ptr;
147   }
148 
149   ret = self->env_->ParallelLaunch(self->env_->thread_pool_, ConvDwSWRun, self, self->thread_nr_);
150 
151   if (conv_dw->need_align_) {
152     PackNHWCXToNHWCFp32(conv_dw->packed_output_, output_ptr, conv_dw->conv_.compute_.out_n_,
153                         conv_dw->conv_.compute_.out_hw_, conv_dw->conv_.compute_.out_c_, C4NUM);
154   }
155 
156   ConvDwSWFreePackedInputOutput(conv_dw);
157   return ret;
158 }
159 
ConvolutionDdepthwiseSWPrepare(KernelBase * self)160 int ConvolutionDdepthwiseSWPrepare(KernelBase *self) {
161   NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
162   NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
163 
164   ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)self;
165   NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
166   ConvParameter *conv_param = (ConvParameter *)self->param_;
167   NNACL_CHECK_NULL_RETURN_ERR(conv_param);
168 
169   ConvBaseUpdateOriginWeightAndBias(&conv_dw->conv_);
170 
171   if (self->train_session_) {
172     int OC4 = UP_DIV(conv_dw->conv_.compute_.out_c_, C4NUM);
173     NNACL_CHECK_INT_MUL_NOT_OVERFLOW(C4NUM * OC4, conv_dw->conv_.compute_.kernel_hw_, NNACL_ERR);
174     int pack_weight_size = C4NUM * OC4 * conv_dw->conv_.compute_.kernel_hw_;
175     self->work_size_ = pack_weight_size * sizeof(float);
176   }
177 
178   return ConvBaseInitConvWeightBias(&conv_dw->conv_);
179 }
180 
ConvolutionDepthwiseSWRelease(KernelBase * self)181 int ConvolutionDepthwiseSWRelease(KernelBase *self) {
182   ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)self;
183   NNACL_CHECK_NULL_RETURN_ERR(conv);
184   ConvBaseRelease(conv);
185   return NNACL_OK;
186 }
187 
CreateConvDwSW(ConvParameter * conv_param)188 KernelBase *CreateConvDwSW(ConvParameter *conv_param) {
189   ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)malloc(sizeof(ConvolutionDepthwiseSWStruct));
190   NNACL_CHECK_NULL_RETURN_NULL(conv_dw);
191   memset(conv_dw, 0, sizeof(ConvolutionDepthwiseSWStruct));
192 
193   conv_dw->conv_.malloc_weight_bias_ = ConvDwSWMallocWeightBiasData;
194   conv_dw->conv_.pack_weight_ = ConvDwSWPackWeight;
195   conv_dw->conv_.base_.Resize = ConvolutionDepthwiseSWResize;
196   conv_dw->conv_.base_.Compute = ConvolutionDepthwiseSWCompute;
197   conv_dw->conv_.base_.Prepare = ConvolutionDdepthwiseSWPrepare;
198   conv_dw->conv_.base_.Release = ConvolutionDepthwiseSWRelease;
199   return (KernelBase *)conv_dw;
200 }
201