1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "CpuOperationUtils.h"
18 #include "Operations.h"
19
20 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h"
21 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
22
23 #include "Tracing.h"
24
25 namespace android {
26 namespace nn {
27
depthwiseConvFloat16(const _Float16 * inputData,const Shape & inputShape,const _Float16 * filterData,const Shape & filterShape,const _Float16 * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,_Float16 * outputData,const Shape & outputShape)28 bool depthwiseConvFloat16(const _Float16* inputData, const Shape& inputShape,
29 const _Float16* filterData, const Shape& filterShape,
30 const _Float16* biasData, const Shape& biasShape, int32_t paddingLeft,
31 int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
32 int32_t strideWidth, int32_t strideHeight, int32_t dilationWidthFactor,
33 int32_t dilationHeightFactor, int32_t depthMultiplier, int32_t activation,
34 _Float16* outputData, const Shape& outputShape) {
35 NNTRACE_TRANS("depthwiseConvFloat16");
36 std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
37 convertFloat16ToFloat32(inputData, &inputDataFloat32);
38 std::vector<float> filterDataFloat32(getNumberOfElements(filterShape));
39 convertFloat16ToFloat32(filterData, &filterDataFloat32);
40 std::vector<float> biasDataFloat32(getNumberOfElements(biasShape));
41 convertFloat16ToFloat32(biasData, &biasDataFloat32);
42
43 std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
44 depthwiseConvFloat32(inputDataFloat32.data(), inputShape, filterDataFloat32.data(), filterShape,
45 biasDataFloat32.data(), biasShape, paddingLeft, paddingRight, paddingTop,
46 paddingBottom, strideWidth, strideHeight, dilationWidthFactor,
47 dilationHeightFactor, depthMultiplier, activation,
48 outputDataFloat32.data(), outputShape);
49
50 convertFloat32ToFloat16(outputDataFloat32, outputData);
51 return true;
52 }
53
54 #define ANDROID_NN_DEPTHWISE_CONV_PARAMETERS \
55 uint32_t height = getSizeOfDimension(inputShape, 1); \
56 uint32_t width = getSizeOfDimension(inputShape, 2); \
57 uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
58 uint32_t filterWidth = getSizeOfDimension(filterShape, 2); \
59 uint32_t outHeight = getSizeOfDimension(outputShape, 1); \
60 uint32_t outWidth = getSizeOfDimension(outputShape, 2); \
61 \
62 uint32_t paddingHeight = (uint32_t)paddingTop; \
63 uint32_t paddingWidth = (uint32_t)paddingLeft;
64
depthwiseConvFloat32(const float * inputData,const Shape & inputShape,const float * filterData,const Shape & filterShape,const float * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,float * outputData,const Shape & outputShape)65 bool depthwiseConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
66 const Shape& filterShape, const float* biasData, const Shape& biasShape,
67 int32_t paddingLeft, int32_t paddingRight, int32_t paddingTop,
68 int32_t paddingBottom, int32_t strideWidth, int32_t strideHeight,
69 int32_t dilationWidthFactor, int32_t dilationHeightFactor,
70 int32_t depthMultiplier, int32_t activation, float* outputData,
71 const Shape& outputShape) {
72 NNTRACE_TRANS("depthwiseConvFloat32");
73
74 ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
75
76 float output_activation_min, output_activation_max;
77 CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
78
79 tflite::DepthwiseParams params{
80 .padding_values = {static_cast<int16>(paddingWidth), static_cast<int16>(paddingHeight)},
81 .stride_width = static_cast<int16>(strideWidth),
82 .stride_height = static_cast<int16>(strideHeight),
83 .depth_multiplier = static_cast<int16>(depthMultiplier),
84 .float_activation_min = output_activation_min,
85 .float_activation_max = output_activation_max,
86 .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
87 .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
88 };
89 NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
90 tflite::optimized_ops::DepthwiseConv(params, convertShapeToTflshape(inputShape), inputData,
91 convertShapeToTflshape(filterShape), filterData,
92 convertShapeToTflshape(biasShape), biasData,
93 convertShapeToTflshape(outputShape), outputData);
94
95 return true;
96 }
97
depthwiseConvQuant8(const uint8_t * inputData,const Shape & inputShape,const uint8_t * filterData,const Shape & filterShape,const int32_t * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,uint8_t * outputData,const Shape & outputShape)98 bool depthwiseConvQuant8(const uint8_t* inputData, const Shape& inputShape,
99 const uint8_t* filterData, const Shape& filterShape,
100 const int32_t* biasData, const Shape& biasShape, int32_t paddingLeft,
101 int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
102 int32_t strideWidth, int32_t strideHeight, int32_t dilationWidthFactor,
103 int32_t dilationHeightFactor, int32_t depthMultiplier, int32_t activation,
104 uint8_t* outputData, const Shape& outputShape) {
105 NNTRACE_TRANS("depthwiseConvQuant8");
106
107 ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
108
109 double real_multiplier = 0.0;
110 int32_t output_multiplier = 0;
111 int32_t output_shift = 0;
112 int32_t output_activation_min = 0;
113 int32_t output_activation_max = 0;
114
115 NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
116 &real_multiplier));
117 int exponent;
118 NN_RET_CHECK(QuantizeMultiplier(real_multiplier, &output_multiplier, &exponent));
119 output_shift = -exponent;
120 CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
121 &output_activation_max);
122
123 tflite::DepthwiseParams params{
124 .padding_values = {static_cast<int16>(paddingWidth), static_cast<int16>(paddingHeight)},
125 .stride_width = static_cast<int16>(strideWidth),
126 .stride_height = static_cast<int16>(strideHeight),
127 .depth_multiplier = static_cast<int16>(depthMultiplier),
128 .quantized_activation_min = output_activation_min,
129 .quantized_activation_max = output_activation_max,
130 .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
131 .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
132 .input_offset = -inputShape.offset,
133 .weights_offset = -filterShape.offset,
134 .output_offset = outputShape.offset,
135 .output_shift = -output_shift,
136 .output_multiplier = output_multiplier,
137 };
138 NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
139 tflite::optimized_ops::DepthwiseConv(params, convertShapeToTflshape(inputShape), inputData,
140 convertShapeToTflshape(filterShape), filterData,
141 convertShapeToTflshape(biasShape), biasData,
142 convertShapeToTflshape(outputShape), outputData);
143 return true;
144 }
145
depthwiseConvQuant8PerChannel(const uint8_t * inputData,const Shape & inputShape,const int8_t * filterData,const Shape & filterShape,const float * filterScales,const int32_t * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,uint8_t * outputData,const Shape & outputShape)146 bool depthwiseConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
147 const int8_t* filterData, const Shape& filterShape,
148 const float* filterScales, const int32_t* biasData,
149 const Shape& biasShape, int32_t paddingLeft,
150 int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
151 int32_t strideWidth, int32_t strideHeight,
152 int32_t dilationWidthFactor, int32_t dilationHeightFactor,
153
154 int32_t depthMultiplier, int32_t activation, uint8_t* outputData,
155 const Shape& outputShape) {
156 NNTRACE_TRANS("depthwiseConvQuant8");
157
158 uint32_t paddingHeight = (uint32_t)paddingTop;
159 uint32_t paddingWidth = (uint32_t)paddingLeft;
160
161 uint32_t numBatches = getSizeOfDimension(inputShape, 0);
162 uint32_t inputHeight = getSizeOfDimension(inputShape, 1);
163 uint32_t inputWidth = getSizeOfDimension(inputShape, 2);
164 uint32_t inputDepth = getSizeOfDimension(inputShape, 3);
165 uint32_t filterHeight = getSizeOfDimension(filterShape, 1);
166 uint32_t filterWidth = getSizeOfDimension(filterShape, 2);
167 uint32_t filterDepth = getSizeOfDimension(filterShape, 3);
168 uint32_t outputHeight = getSizeOfDimension(outputShape, 1);
169 uint32_t outputWidth = getSizeOfDimension(outputShape, 2);
170 uint32_t outputDepth = getSizeOfDimension(outputShape, 3);
171
172 int32_t inputOffset = -inputShape.offset;
173 int32_t outputOffset = outputShape.offset;
174
175 auto realMultiplier = std::vector<double>(outputDepth, .0f);
176 auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
177 auto outputShift = std::vector<int32_t>(outputDepth, .0f);
178
179 for (int i = 0; i < outputDepth; ++i) {
180 Shape filterChannelShape = filterShape;
181 filterChannelShape.scale = filterScales[i];
182 Shape biasChannelShape = biasShape;
183 biasChannelShape.scale = filterScales[i] * inputShape.scale;
184 NN_RET_CHECK(GetQuantizedConvolutionMultipler(
185 inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
186 int exponent;
187 NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
188 outputShift[i] = -exponent;
189 }
190
191 int32_t output_activation_min = 0, output_activation_max = 0;
192 CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
193 &output_activation_max);
194
195 const uint8_t* inputBase = inputData;
196 uint8_t* outPtr = outputData;
197 for (uint32_t b = 0; b < numBatches; b++) {
198 for (uint32_t h = 0; h < outputHeight; h++) {
199 for (uint32_t w = 0; w < outputWidth; w++) {
200 for (uint32_t ic = 0; ic < inputDepth; ic++) {
201 for (uint32_t m = 0; m < depthMultiplier; m++) {
202 int32_t wInputOrigin = static_cast<int32_t>(w) * strideWidth - paddingLeft;
203 int32_t hInputOrigin = static_cast<int32_t>(h) * strideHeight - paddingTop;
204 const int oc = m + ic * depthMultiplier;
205
206 int32_t sum = 0.0f;
207 for (uint32_t i = 0; i < filterHeight; i++) {
208 for (uint32_t j = 0; j < filterWidth; j++) {
209 int32_t hInput = hInputOrigin +
210 dilationHeightFactor * static_cast<int32_t>(i);
211 int32_t wInput = wInputOrigin +
212 dilationWidthFactor * static_cast<int32_t>(j);
213
214 if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
215 wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
216 uint32_t filterIndex =
217 i * filterWidth * filterDepth + j * filterDepth + oc;
218 uint32_t inputIndex = hInput * inputWidth * inputDepth +
219 wInput * inputDepth + ic;
220 sum += (static_cast<int32_t>(filterData[filterIndex])) *
221 (static_cast<int32_t>(inputBase[inputIndex]) +
222 inputOffset);
223 }
224 }
225 }
226
227 sum += biasData[oc];
228 sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier[oc],
229 -outputShift[oc]);
230 sum += outputOffset;
231 sum = std::max(std::min(sum, output_activation_max), output_activation_min);
232 outPtr[m] = static_cast<uint8_t>(sum);
233 }
234 outPtr += depthMultiplier;
235 }
236 }
237 }
238 inputBase += inputHeight * inputWidth * inputDepth;
239 }
240
241 return true;
242 }
243
244 #undef ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
245 } // namespace nn
246 } // namespace android
247