1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either convolutionress or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "nnacl/kernel/convolution_depthwise_sw.h"
18 #include "nnacl/kernel/convolution_base.h"
19 #include "nnacl/fp32/conv_depthwise_fp32.h"
20 #include "nnacl/fp32/pack_fp32.h"
21
ConvDwSWMallocWeightBiasData(ConvolutionBaseStruct * conv)22 int ConvDwSWMallocWeightBiasData(ConvolutionBaseStruct *conv) {
23 int OC4 = UP_DIV(conv->compute_.out_c_, C4NUM);
24 int pack_weight_size = C4NUM * OC4 * conv->compute_.kernel_hw_;
25 if (!conv->base_.train_session_) {
26 NNACL_CHECK_MALLOC_SIZE(pack_weight_size * sizeof(float));
27 conv->packed_weight_ = ConvBaseGetConvPackWeightData(conv, pack_weight_size * sizeof(float));
28 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->packed_weight_);
29 }
30
31 int malloc_size = NNACL_MAX(conv->compute_.out_c_, C4NUM * OC4);
32 if (conv->bias_data_ == NULL) {
33 NNACL_CHECK_MALLOC_SIZE(malloc_size * sizeof(float));
34 conv->bias_data_ = conv->base_.env_->Alloc(conv->base_.env_->allocator_, malloc_size * sizeof(float));
35 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv->bias_data_);
36 }
37 memset(conv->bias_data_, 0, malloc_size * sizeof(float));
38 conv->base_.thread_nr_ = NNACL_MIN(conv->base_.thread_nr_, OC4);
39 return NNACL_OK;
40 }
41
ConvDwSWInitPackedInputOutput(ConvolutionDepthwiseSWStruct * conv_dw)42 int ConvDwSWInitPackedInputOutput(ConvolutionDepthwiseSWStruct *conv_dw) {
43 if (conv_dw->conv_.compute_.in_c_ % C4NUM == 0) {
44 conv_dw->need_align_ = false;
45 return NNACL_OK;
46 }
47
48 conv_dw->need_align_ = true;
49 int IC4 = UP_DIV(conv_dw->conv_.compute_.in_c_, C4NUM);
50 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(conv_dw->conv_.compute_.in_n_, conv_dw->conv_.compute_.in_hw_, NNACL_ERR);
51 int conv_input_bhw = conv_dw->conv_.compute_.in_n_ * conv_dw->conv_.compute_.in_hw_;
52 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(conv_input_bhw, C4NUM * IC4, NNACL_ERR);
53 int pack_input_size = conv_input_bhw * C4NUM * IC4;
54 NNACL_CHECK_MALLOC_SIZE(pack_input_size * sizeof(float));
55 conv_dw->packed_input_ =
56 (float *)conv_dw->conv_.base_.env_->Alloc(conv_dw->conv_.base_.env_->allocator_, pack_input_size * sizeof(float));
57 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->packed_input_);
58
59 int OC4 = UP_DIV(conv_dw->conv_.compute_.out_c_, C4NUM);
60 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(conv_dw->conv_.compute_.out_n_, conv_dw->conv_.compute_.out_hw_, NNACL_ERR);
61 int output_bhw = conv_dw->conv_.compute_.out_n_ * conv_dw->conv_.compute_.out_hw_;
62 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(output_bhw, C4NUM * OC4, NNACL_ERR);
63 int pack_output_size = output_bhw * C4NUM * OC4;
64 NNACL_CHECK_MALLOC_SIZE(pack_output_size * sizeof(float));
65 conv_dw->packed_output_ =
66 (float *)conv_dw->conv_.base_.env_->Alloc(conv_dw->conv_.base_.env_->allocator_, pack_output_size * sizeof(float));
67 NNACL_MALLOC_CHECK_NULL_RETURN_ERR(conv_dw->packed_output_);
68 return NNACL_OK;
69 }
70
ConvDwSWRun(void * cdata,int task_id,float l,float r)71 int ConvDwSWRun(void *cdata, int task_id, float l, float r) {
72 ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)cdata;
73 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
74 ConvParameter *conv_param = (ConvParameter *)conv_dw->conv_.base_.param_;
75 NNACL_CHECK_NULL_RETURN_ERR(conv_param);
76
77 ConvDwSWFp32(conv_dw->packed_output_, conv_dw->packed_input_, (float *)conv_dw->conv_.packed_weight_,
78 (float *)conv_dw->conv_.bias_data_, conv_param, &conv_dw->sliding_, task_id);
79 return NNACL_OK;
80 }
81
ConvDwSWFreePackedInputOutput(ConvolutionDepthwiseSWStruct * conv_dw)82 void ConvDwSWFreePackedInputOutput(ConvolutionDepthwiseSWStruct *conv_dw) {
83 if (conv_dw->need_align_) {
84 conv_dw->conv_.base_.env_->Free(conv_dw->conv_.base_.env_->allocator_, conv_dw->packed_input_);
85 conv_dw->packed_input_ = NULL;
86 conv_dw->conv_.base_.env_->Free(conv_dw->conv_.base_.env_->allocator_, conv_dw->packed_output_);
87 conv_dw->packed_output_ = NULL;
88 }
89 }
90
ConvDwSWPackWeight(ConvolutionBaseStruct * conv)91 void ConvDwSWPackWeight(ConvolutionBaseStruct *conv) {
92 void *origin_weight = (conv->base_.train_session_) ? conv->base_.in_[SECOND_INPUT]->data_ : conv->origin_weight_;
93 NNACL_CHECK_NULL_RETURN_VOID(origin_weight);
94 PackNCHWToNC4HW4Fp32(origin_weight, conv->packed_weight_, 1, conv->compute_.kernel_hw_, conv->compute_.out_c_);
95 }
96
ConvolutionDepthwiseSWResize(KernelBase * self)97 int ConvolutionDepthwiseSWResize(KernelBase *self) {
98 ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)self;
99 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
100 int ret = ConvBasePrepare(&conv_dw->conv_);
101 if (ret != NNACL_OK) {
102 return ret;
103 }
104
105 ConvParameter *conv_param = (ConvParameter *)self->param_;
106 NNACL_CHECK_NULL_RETURN_ERR(conv_param);
107 InitSlidingParamConvDw(&conv_dw->sliding_, conv_param, C4NUM);
108
109 self->thread_nr_ = NNACL_MIN(self->thread_nr_, conv_dw->conv_.compute_.out_h_);
110 NNACL_CHECK_ZERO_RETURN_ERR(self->thread_nr_);
111 return NNACL_OK;
112 }
113
ConvolutionDepthwiseSWCompute(KernelBase * self)114 int ConvolutionDepthwiseSWCompute(KernelBase *self) {
115 ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)self;
116 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
117
118 int ret = ConvDwSWInitPackedInputOutput(conv_dw);
119 if (ret != NNACL_OK) {
120 ConvDwSWFreePackedInputOutput(conv_dw);
121 return ret;
122 }
123
124 ret = ConvBaseRepackWeight(&conv_dw->conv_);
125 if (ret != NNACL_OK) {
126 ConvDwSWFreePackedInputOutput(conv_dw);
127 return ret;
128 }
129
130 TensorC *input_tensor = self->in_[FIRST_INPUT];
131 NNACL_CHECK_NULL_RETURN_ERR(input_tensor);
132 float *input_ptr = (float *)input_tensor->data_;
133 NNACL_CHECK_NULL_RETURN_ERR(input_ptr);
134 if (conv_dw->need_align_) {
135 PackNHWCToNHWC4Fp32(input_ptr, conv_dw->packed_input_, conv_dw->conv_.compute_.in_n_,
136 conv_dw->conv_.compute_.in_hw_, conv_dw->conv_.compute_.in_c_);
137 } else {
138 conv_dw->packed_input_ = input_ptr;
139 }
140
141 TensorC *output_tensor = self->out_[OUTPUT_INDEX];
142 NNACL_CHECK_NULL_RETURN_ERR(output_tensor);
143 float *output_ptr = (float *)output_tensor->data_;
144 NNACL_CHECK_NULL_RETURN_ERR(output_ptr);
145 if (!conv_dw->need_align_) {
146 conv_dw->packed_output_ = output_ptr;
147 }
148
149 ret = self->env_->ParallelLaunch(self->env_->thread_pool_, ConvDwSWRun, self, self->thread_nr_);
150
151 if (conv_dw->need_align_) {
152 PackNHWCXToNHWCFp32(conv_dw->packed_output_, output_ptr, conv_dw->conv_.compute_.out_n_,
153 conv_dw->conv_.compute_.out_hw_, conv_dw->conv_.compute_.out_c_, C4NUM);
154 }
155
156 ConvDwSWFreePackedInputOutput(conv_dw);
157 return ret;
158 }
159
ConvolutionDdepthwiseSWPrepare(KernelBase * self)160 int ConvolutionDdepthwiseSWPrepare(KernelBase *self) {
161 NNACL_CHECK_FALSE(self->in_size_ < TWO_TENSOR, NNACL_INPUT_TENSOR_ERROR);
162 NNACL_CHECK_FALSE(self->out_size_ < ONE_TENSOR, NNACL_OUTPUT_TENSOR_ERROR);
163
164 ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)self;
165 NNACL_CHECK_NULL_RETURN_ERR(conv_dw);
166 ConvParameter *conv_param = (ConvParameter *)self->param_;
167 NNACL_CHECK_NULL_RETURN_ERR(conv_param);
168
169 ConvBaseUpdateOriginWeightAndBias(&conv_dw->conv_);
170
171 if (self->train_session_) {
172 int OC4 = UP_DIV(conv_dw->conv_.compute_.out_c_, C4NUM);
173 NNACL_CHECK_INT_MUL_NOT_OVERFLOW(C4NUM * OC4, conv_dw->conv_.compute_.kernel_hw_, NNACL_ERR);
174 int pack_weight_size = C4NUM * OC4 * conv_dw->conv_.compute_.kernel_hw_;
175 self->work_size_ = pack_weight_size * sizeof(float);
176 }
177
178 return ConvBaseInitConvWeightBias(&conv_dw->conv_);
179 }
180
ConvolutionDepthwiseSWRelease(KernelBase * self)181 int ConvolutionDepthwiseSWRelease(KernelBase *self) {
182 ConvolutionBaseStruct *conv = (ConvolutionBaseStruct *)self;
183 NNACL_CHECK_NULL_RETURN_ERR(conv);
184 ConvBaseRelease(conv);
185 return NNACL_OK;
186 }
187
CreateConvDwSW(ConvParameter * conv_param)188 KernelBase *CreateConvDwSW(ConvParameter *conv_param) {
189 ConvolutionDepthwiseSWStruct *conv_dw = (ConvolutionDepthwiseSWStruct *)malloc(sizeof(ConvolutionDepthwiseSWStruct));
190 NNACL_CHECK_NULL_RETURN_NULL(conv_dw);
191 memset(conv_dw, 0, sizeof(ConvolutionDepthwiseSWStruct));
192
193 conv_dw->conv_.malloc_weight_bias_ = ConvDwSWMallocWeightBiasData;
194 conv_dw->conv_.pack_weight_ = ConvDwSWPackWeight;
195 conv_dw->conv_.base_.Resize = ConvolutionDepthwiseSWResize;
196 conv_dw->conv_.base_.Compute = ConvolutionDepthwiseSWCompute;
197 conv_dw->conv_.base_.Prepare = ConvolutionDdepthwiseSWPrepare;
198 conv_dw->conv_.base_.Release = ConvolutionDepthwiseSWRelease;
199 return (KernelBase *)conv_dw;
200 }
201