1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h"
17
18 #include <stddef.h>
19 #include <stdint.h>
20
21 #include <vector>
22
23 #include "tensorflow/lite/c/builtin_op_data.h"
24 #include "tensorflow/lite/c/common.h"
25 #include "tensorflow/lite/kernels/cpu_backend_context.h"
26 #include "tensorflow/lite/kernels/internal/compatibility.h"
27 #include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
28 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_multithread.h"
29 #include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_hybrid.h"
30 #include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
31 #include "tensorflow/lite/kernels/internal/quantization_util.h"
32 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
33 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
34 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
35 #include "tensorflow/lite/kernels/internal/tensor.h"
36 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
37 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
38 #include "tensorflow/lite/kernels/internal/types.h"
39 #include "tensorflow/lite/kernels/kernel_util.h"
40 #include "tensorflow/lite/kernels/padding.h"
41
42 namespace tflite {
43 namespace ops {
44 namespace builtin {
45 namespace depthwise_conv {
46
47 constexpr int kInputTensor = 0;
48 constexpr int kFilterTensor = 1;
49 constexpr int kBiasTensor = 2;
50 constexpr int kOutputTensor = 0;
51
52 // This file has three implementation of DepthwiseConv.
53 enum KernelType {
54 kReference,
55 kGenericOptimized, // Neon-free
56 kNeonOptimized,
57 };
58
59 const int kTensorNotAllocated = -1;
60
61 struct OpData {
62 TfLitePaddingValues padding;
63 // The scaling factor from input to output (aka the 'real multiplier') can
64 // be represented as a fixed point multiplier plus a left shift.
65 int32_t output_multiplier;
66 int output_shift;
67 // The range of the fused activation layer. For example for kNone and
68 // uint8_t these would be 0 and 255.
69 int32_t output_activation_min;
70 int32_t output_activation_max;
71
72 // Per channel output multiplier and shift.
73 std::vector<int32_t> per_channel_output_multiplier;
74 std::vector<int> per_channel_output_shift;
75
76 // Hybrid per channel temporary tensors.
77 int input_quantized_id = kTensorNotAllocated;
78 int scaling_factors_id = kTensorNotAllocated;
79 int input_offset_id = kTensorNotAllocated;
80 int32_t input_quantized_index;
81 int32_t scaling_factors_index;
82 int32_t input_offset_index;
83 };
84
Init(TfLiteContext * context,const char * buffer,size_t length)85 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
86 // This is a builtin op, so we don't use the contents in 'buffer', if any.
87 // Instead, we allocate a new object to carry information from Prepare() to
88 // Eval().
89 return new OpData;
90 }
91
Free(TfLiteContext * context,void * buffer)92 void Free(TfLiteContext* context, void* buffer) {
93 delete reinterpret_cast<OpData*>(buffer);
94 }
95
Prepare(TfLiteContext * context,TfLiteNode * node)96 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
97 auto* params =
98 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
99 OpData* data = reinterpret_cast<OpData*>(node->user_data);
100
101 bool has_bias = NumInputs(node) == 3;
102
103 TF_LITE_ENSURE(context, has_bias || NumInputs(node) == 2);
104 const TfLiteTensor* input;
105 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
106 const TfLiteTensor* filter;
107 TF_LITE_ENSURE_OK(context,
108 GetInputSafe(context, node, kFilterTensor, &filter));
109 const TfLiteTensor* bias = nullptr;
110
111 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
112 TfLiteTensor* output;
113 TF_LITE_ENSURE_OK(context,
114 GetOutputSafe(context, node, kOutputTensor, &output));
115
116 TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
117 TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4);
118
119 const TfLiteType data_type = input->type;
120
121 const TfLiteType filter_type = filter->type;
122 const bool is_hybrid =
123 data_type == kTfLiteFloat32 && filter_type == kTfLiteInt8;
124 TF_LITE_ENSURE(context,
125 data_type == kTfLiteFloat32 || data_type == kTfLiteUInt8 ||
126 data_type == kTfLiteInt8 || data_type == kTfLiteInt16);
127 TF_LITE_ENSURE_TYPES_EQ(context, output->type, data_type);
128 if (!is_hybrid) {
129 TF_LITE_ENSURE(context,
130 filter->type == data_type || data_type == kTfLiteInt16);
131 }
132
133 if (data_type == kTfLiteInt16) {
134 TF_LITE_ENSURE_EQ(context, input->params.zero_point, 0);
135 TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
136 }
137
138 // Filter in DepthwiseConv is expected to be [1, H, W, O].
139 TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 0), 1);
140
141 if (has_bias) {
142 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kBiasTensor, &bias));
143 if (data_type == kTfLiteUInt8 || data_type == kTfLiteInt8) {
144 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt32);
145 TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
146 } else if (data_type == kTfLiteInt16) {
147 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, kTfLiteInt64);
148 TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
149 } else {
150 TF_LITE_ENSURE_TYPES_EQ(context, bias->type, data_type);
151 }
152 TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1);
153 TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 3),
154 SizeOfDimension(bias, 0));
155 }
156
157 int channels_out = SizeOfDimension(filter, 3);
158 int width = SizeOfDimension(input, 2);
159 int height = SizeOfDimension(input, 1);
160 int filter_width = SizeOfDimension(filter, 2);
161 int filter_height = SizeOfDimension(filter, 1);
162 int batches = SizeOfDimension(input, 0);
163
164 // Matching GetWindowedOutputSize in TensorFlow.
165 auto padding = params->padding;
166 int out_width, out_height;
167
168 data->padding = ComputePaddingHeightWidth(
169 params->stride_height, params->stride_width,
170 params->dilation_height_factor, params->dilation_width_factor, height,
171 width, filter_height, filter_width, padding, &out_height, &out_width);
172
173 // Note that quantized inference requires that all tensors have their
174 // parameters set. This is usually done during quantized training or
175 // calibration.
176 if (data_type != kTfLiteFloat32) {
177 TF_LITE_ENSURE_EQ(context, filter->quantization.type,
178 kTfLiteAffineQuantization);
179 TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
180 const auto* affine_quantization =
181 reinterpret_cast<TfLiteAffineQuantization*>(
182 filter->quantization.params);
183 TF_LITE_ENSURE(context, affine_quantization);
184 TF_LITE_ENSURE(context, affine_quantization->scale);
185 TF_LITE_ENSURE(context, (affine_quantization->scale->size == 1 ||
186 affine_quantization->scale->size == channels_out));
187
188 data->per_channel_output_multiplier.resize(channels_out);
189 data->per_channel_output_shift.resize(channels_out);
190 TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
191 context, input, filter, bias, output, params->activation,
192 &data->output_multiplier, &data->output_shift,
193 &data->output_activation_min, &data->output_activation_max,
194 data->per_channel_output_multiplier.data(),
195 data->per_channel_output_shift.data(), channels_out));
196 }
197
198 if (is_hybrid) {
199 TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
200 const auto* affine_quantization =
201 reinterpret_cast<TfLiteAffineQuantization*>(
202 filter->quantization.params);
203 TF_LITE_ENSURE(context, affine_quantization);
204 TF_LITE_ENSURE(context, affine_quantization->scale);
205 TF_LITE_ENSURE_EQ(
206 context, affine_quantization->scale->size,
207 filter->dims->data[affine_quantization->quantized_dimension]);
208
209 int temporaries_count = 0;
210 data->input_quantized_index = temporaries_count;
211 if (data->input_quantized_id == kTensorNotAllocated) {
212 TF_LITE_ENSURE_OK(
213 context, context->AddTensors(context, 1, &data->input_quantized_id));
214 }
215 ++temporaries_count;
216 data->scaling_factors_index = temporaries_count;
217 if (data->scaling_factors_id == kTensorNotAllocated) {
218 TF_LITE_ENSURE_OK(
219 context, context->AddTensors(context, 1, &data->scaling_factors_id));
220 }
221 ++temporaries_count;
222 data->input_offset_index = temporaries_count;
223 if (data->input_offset_id == kTensorNotAllocated) {
224 TF_LITE_ENSURE_OK(
225 context, context->AddTensors(context, 1, &data->input_offset_id));
226 }
227 ++temporaries_count;
228
229 TfLiteIntArrayFree(node->temporaries);
230 node->temporaries = TfLiteIntArrayCreate(temporaries_count);
231
232 node->temporaries->data[data->input_quantized_index] =
233 data->input_quantized_id;
234 TfLiteTensor* input_quantized;
235 TF_LITE_ENSURE_OK(
236 context, GetTemporarySafe(context, node, data->input_quantized_index,
237 &input_quantized));
238 input_quantized->type = kTfLiteInt8;
239 input_quantized->allocation_type = kTfLiteArenaRw;
240 if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) {
241 TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims);
242 TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized,
243 input_quantized_size));
244 }
245 node->temporaries->data[data->scaling_factors_index] =
246 data->scaling_factors_id;
247 TfLiteTensor* scaling_factors;
248 TF_LITE_ENSURE_OK(
249 context, GetTemporarySafe(context, node, data->scaling_factors_index,
250 &scaling_factors));
251 scaling_factors->type = kTfLiteFloat32;
252 scaling_factors->allocation_type = kTfLiteArenaRw;
253 const int batch_size = SizeOfDimension(input, 0);
254 int scaling_dims[1] = {batch_size};
255 if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
256 TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1);
257 scaling_factors_size->data[0] = batch_size;
258 TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors,
259 scaling_factors_size));
260 }
261 node->temporaries->data[data->input_offset_index] = data->input_offset_id;
262 TfLiteTensor* input_offsets;
263 TF_LITE_ENSURE_OK(context,
264 GetTemporarySafe(context, node, data->input_offset_index,
265 &input_offsets));
266 input_offsets->type = kTfLiteInt32;
267 input_offsets->allocation_type = kTfLiteArenaRw;
268 if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1, scaling_dims)) {
269 TfLiteIntArray* input_offsets_size = TfLiteIntArrayCreate(1);
270 input_offsets_size->data[0] = batch_size;
271 TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_offsets,
272 input_offsets_size));
273 }
274 }
275
276 TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4);
277 outputSize->data[0] = batches;
278 outputSize->data[1] = out_height;
279 outputSize->data[2] = out_width;
280 outputSize->data[3] = channels_out;
281 return context->ResizeTensor(context, output, outputSize);
282 }
283
ComputeDepthMultiplier(TfLiteContext * context,const TfLiteTensor * input,const TfLiteTensor * filter,int16 * depth_multiplier)284 TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context,
285 const TfLiteTensor* input,
286 const TfLiteTensor* filter,
287 int16* depth_multiplier) {
288 int num_filter_channels = SizeOfDimension(filter, 3);
289 int num_input_channels = SizeOfDimension(input, 3);
290 TF_LITE_ENSURE(context, num_input_channels != 0);
291 TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0);
292 *depth_multiplier = num_filter_channels / num_input_channels;
293 return kTfLiteOk;
294 }
295
296 template <KernelType kernel_type>
EvalFloat(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)297 TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
298 TfLiteDepthwiseConvParams* params, OpData* data,
299 const TfLiteTensor* input, const TfLiteTensor* filter,
300 const TfLiteTensor* bias, TfLiteTensor* output) {
301 float output_activation_min, output_activation_max;
302 CalculateActivationRange(params->activation, &output_activation_min,
303 &output_activation_max);
304
305 DepthwiseParams op_params;
306 op_params.padding_type = PaddingType::kSame;
307 op_params.padding_values.width = data->padding.width;
308 op_params.padding_values.height = data->padding.height;
309 op_params.stride_width = params->stride_width;
310 op_params.stride_height = params->stride_height;
311 op_params.dilation_width_factor = params->dilation_width_factor;
312 op_params.dilation_height_factor = params->dilation_height_factor;
313 op_params.float_activation_min = output_activation_min;
314 op_params.float_activation_max = output_activation_max;
315 TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
316 &op_params.depth_multiplier));
317 if (kernel_type == kReference) {
318 reference_ops::DepthwiseConv(
319 op_params, GetTensorShape(input), GetTensorData<float>(input),
320 GetTensorShape(filter), GetTensorData<float>(filter),
321 GetTensorShape(bias), GetTensorData<float>(bias),
322 GetTensorShape(output), GetTensorData<float>(output));
323 } else {
324 optimized_ops::DepthwiseConv<float, float>(
325 op_params, GetTensorShape(input), GetTensorData<float>(input),
326 GetTensorShape(filter), GetTensorData<float>(filter),
327 GetTensorShape(bias), GetTensorData<float>(bias),
328 GetTensorShape(output), GetTensorData<float>(output),
329 CpuBackendContext::GetFromContext(context));
330 }
331 return kTfLiteOk;
332 }
333
334 template <KernelType kernel_type>
EvalQuantized(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)335 TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
336 TfLiteDepthwiseConvParams* params, OpData* data,
337 const TfLiteTensor* input,
338 const TfLiteTensor* filter, const TfLiteTensor* bias,
339 TfLiteTensor* output) {
340 auto input_offset = -input->params.zero_point;
341 auto filter_offset = -filter->params.zero_point;
342 auto output_offset = output->params.zero_point;
343
344 DepthwiseParams op_params;
345 op_params.padding_type = PaddingType::kSame;
346 op_params.padding_values.width = data->padding.width;
347 op_params.padding_values.height = data->padding.height;
348 op_params.stride_width = params->stride_width;
349 op_params.stride_height = params->stride_height;
350 op_params.dilation_width_factor = params->dilation_width_factor;
351 op_params.dilation_height_factor = params->dilation_height_factor;
352 op_params.input_offset = input_offset;
353 op_params.weights_offset = filter_offset;
354 op_params.output_offset = output_offset;
355 op_params.output_multiplier = data->output_multiplier;
356 op_params.output_shift = -data->output_shift;
357 op_params.quantized_activation_min = data->output_activation_min;
358 op_params.quantized_activation_max = data->output_activation_max;
359 TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
360 &op_params.depth_multiplier));
361 if (kernel_type == kReference) {
362 reference_ops::DepthwiseConv(
363 op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
364 GetTensorShape(filter), GetTensorData<uint8_t>(filter),
365 GetTensorShape(bias), GetTensorData<int32_t>(bias),
366 GetTensorShape(output), GetTensorData<uint8_t>(output));
367 } else {
368 optimized_ops::DepthwiseConv<uint8, int32>(
369 op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
370 GetTensorShape(filter), GetTensorData<uint8_t>(filter),
371 GetTensorShape(bias), GetTensorData<int32_t>(bias),
372 GetTensorShape(output), GetTensorData<uint8_t>(output),
373 CpuBackendContext::GetFromContext(context));
374 }
375 return kTfLiteOk;
376 }
377
378 template <KernelType kernel_type>
EvalQuantizedPerChannel(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)379 TfLiteStatus EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
380 TfLiteDepthwiseConvParams* params,
381 OpData* data, const TfLiteTensor* input,
382 const TfLiteTensor* filter,
383 const TfLiteTensor* bias,
384 TfLiteTensor* output) {
385 DepthwiseParams op_params;
386 op_params.padding_type = PaddingType::kSame;
387 op_params.padding_values.width = data->padding.width;
388 op_params.padding_values.height = data->padding.height;
389 op_params.stride_width = params->stride_width;
390 op_params.stride_height = params->stride_height;
391 op_params.dilation_width_factor = params->dilation_width_factor;
392 op_params.dilation_height_factor = params->dilation_height_factor;
393 op_params.input_offset = -input->params.zero_point;
394 op_params.weights_offset = 0;
395 op_params.output_offset = output->params.zero_point;
396 op_params.quantized_activation_min = data->output_activation_min;
397 op_params.quantized_activation_max = data->output_activation_max;
398 TF_LITE_ENSURE_STATUS(ComputeDepthMultiplier(context, input, filter,
399 &op_params.depth_multiplier));
400
401 if (kernel_type == kReference) {
402 reference_integer_ops::DepthwiseConvPerChannel(
403 op_params, data->per_channel_output_multiplier.data(),
404 data->per_channel_output_shift.data(), GetTensorShape(input),
405 GetTensorData<int8>(input), GetTensorShape(filter),
406 GetTensorData<int8>(filter), GetTensorShape(bias),
407 GetTensorData<int32>(bias), GetTensorShape(output),
408 GetTensorData<int8>(output));
409 } else {
410 optimized_integer_ops::DepthwiseConvPerChannel(
411 op_params, data->per_channel_output_multiplier.data(),
412 data->per_channel_output_shift.data(), GetTensorShape(input),
413 GetTensorData<int8>(input), GetTensorShape(filter),
414 GetTensorData<int8>(filter), GetTensorShape(bias),
415 GetTensorData<int32>(bias), GetTensorShape(output),
416 GetTensorData<int8>(output),
417 CpuBackendContext::GetFromContext(context));
418 }
419 return kTfLiteOk;
420 }
421
EvalQuantizedPerChannel16x8(const TfLiteDepthwiseConvParams * params,const OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)422 TfLiteStatus EvalQuantizedPerChannel16x8(
423 const TfLiteDepthwiseConvParams* params, const OpData* data,
424 const TfLiteTensor* input, const TfLiteTensor* filter,
425 const TfLiteTensor* bias, TfLiteTensor* output) {
426 DepthwiseParams op_params;
427 op_params.padding_type = PaddingType::kSame;
428 op_params.padding_values.width = data->padding.width;
429 op_params.padding_values.height = data->padding.height;
430 op_params.stride_width = params->stride_width;
431 op_params.stride_height = params->stride_height;
432 op_params.dilation_width_factor = params->dilation_width_factor;
433 op_params.dilation_height_factor = params->dilation_height_factor;
434 op_params.depth_multiplier = params->depth_multiplier;
435 op_params.weights_offset = 0;
436 op_params.quantized_activation_min = data->output_activation_min;
437 op_params.quantized_activation_max = data->output_activation_max;
438
439 reference_integer_ops::DepthwiseConvPerChannel(
440 op_params, data->per_channel_output_multiplier.data(),
441 data->per_channel_output_shift.data(), GetTensorShape(input),
442 GetTensorData<int16>(input), GetTensorShape(filter),
443 GetTensorData<int8>(filter), GetTensorShape(bias),
444 GetTensorData<std::int64_t>(bias), GetTensorShape(output),
445 GetTensorData<int16>(output));
446
447 return kTfLiteOk;
448 }
449
450 template <KernelType kernel_type>
EvalHybridPerChannel(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)451 TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
452 TfLiteDepthwiseConvParams* params,
453 OpData* data, const TfLiteTensor* input,
454 const TfLiteTensor* filter,
455 const TfLiteTensor* bias,
456 TfLiteTensor* output) {
457 float output_activation_min, output_activation_max;
458 CalculateActivationRange(params->activation, &output_activation_min,
459 &output_activation_max);
460 const int batch_size = SizeOfDimension(input, 0);
461 TF_LITE_ENSURE(context, batch_size != 0);
462 const int input_size = NumElements(input) / batch_size;
463 TfLiteTensor* input_quantized;
464 TF_LITE_ENSURE_OK(context,
465 GetTemporarySafe(context, node, data->input_quantized_index,
466 &input_quantized));
467 int8_t* quantized_input_ptr_batch = input_quantized->data.int8;
468 TfLiteTensor* scaling_factors_tensor;
469 TF_LITE_ENSURE_OK(context,
470 GetTemporarySafe(context, node, data->scaling_factors_index,
471 &scaling_factors_tensor));
472 float* scaling_factors_ptr = GetTensorData<float>(scaling_factors_tensor);
473 TfLiteTensor* input_offset_tensor;
474 TF_LITE_ENSURE_OK(context,
475 GetTemporarySafe(context, node, data->input_offset_index,
476 &input_offset_tensor));
477 int32_t* input_offset_ptr = GetTensorData<int32_t>(input_offset_tensor);
478
479 for (int b = 0; b < batch_size; ++b) {
480 const int offset = b * input_size;
481 tensor_utils::AsymmetricQuantizeFloats(
482 GetTensorData<float>(input) + offset, input_size,
483 quantized_input_ptr_batch + offset, &scaling_factors_ptr[b],
484 &input_offset_ptr[b]);
485 }
486
487 DepthwiseParams op_params;
488 op_params.padding_type = PaddingType::kSame;
489 op_params.padding_values.width = data->padding.width;
490 op_params.padding_values.height = data->padding.height;
491 op_params.stride_width = params->stride_width;
492 op_params.stride_height = params->stride_height;
493 op_params.dilation_width_factor = params->dilation_width_factor;
494 op_params.dilation_height_factor = params->dilation_height_factor;
495 op_params.depth_multiplier = params->depth_multiplier;
496
497 op_params.weights_offset = 0;
498 op_params.float_activation_min = output_activation_min;
499 op_params.float_activation_max = output_activation_max;
500 TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
501 const auto* affine_quantization =
502 reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
503 if (kernel_type == kReference) {
504 reference_integer_ops::DepthwiseConvHybridPerChannel(
505 op_params, scaling_factors_ptr, GetTensorShape(input),
506 quantized_input_ptr_batch, GetTensorShape(filter),
507 GetTensorData<int8>(filter), GetTensorShape(bias),
508 GetTensorData<float>(bias), GetTensorShape(output),
509 GetTensorData<float>(output), affine_quantization->scale->data,
510 input_offset_ptr);
511 } else {
512 optimized_integer_ops::DepthwiseConvHybridPerChannel(
513 op_params, scaling_factors_ptr, GetTensorShape(input),
514 quantized_input_ptr_batch, GetTensorShape(filter),
515 GetTensorData<int8>(filter), GetTensorShape(bias),
516 GetTensorData<float>(bias), GetTensorShape(output),
517 GetTensorData<float>(output), affine_quantization->scale->data,
518 input_offset_ptr, CpuBackendContext::GetFromContext(context));
519 }
520
521 return kTfLiteOk;
522 }
523
524 template <KernelType kernel_type, TfLiteType input_type>
EvalImpl(TfLiteContext * context,TfLiteNode * node)525 TfLiteStatus EvalImpl(TfLiteContext* context, TfLiteNode* node) {
526 auto* params =
527 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
528 OpData* data = reinterpret_cast<OpData*>(node->user_data);
529
530 TfLiteTensor* output;
531 TF_LITE_ENSURE_OK(context,
532 GetOutputSafe(context, node, kOutputTensor, &output));
533 const TfLiteTensor* input;
534 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
535 const TfLiteTensor* filter;
536 TF_LITE_ENSURE_OK(context,
537 GetInputSafe(context, node, kFilterTensor, &filter));
538 const TfLiteTensor* bias =
539 (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
540 TFLITE_DCHECK_EQ(input_type, input->type);
541
542 switch (input_type) { // Already know in/out types are same.
543 case kTfLiteFloat32:
544 if (filter->type == kTfLiteFloat32) {
545 return EvalFloat<kernel_type>(context, node, params, data, input,
546 filter, bias, output);
547 } else if (filter->type == kTfLiteInt8) {
548 return EvalHybridPerChannel<kernel_type>(context, node, params, data,
549 input, filter, bias, output);
550 } else {
551 TF_LITE_KERNEL_LOG(
552 context, "Type %s with filter type %s not currently supported.",
553 TfLiteTypeGetName(input->type), TfLiteTypeGetName(filter->type));
554 return kTfLiteError;
555 }
556 break;
557 case kTfLiteUInt8:
558 return EvalQuantized<kernel_type>(context, node, params, data, input,
559 filter, bias, output);
560 break;
561 case kTfLiteInt8:
562 return EvalQuantizedPerChannel<kernel_type>(context, node, params, data,
563 input, filter, bias, output);
564 break;
565 case kTfLiteInt16:
566 return EvalQuantizedPerChannel16x8(params, data, input, filter, bias,
567 output);
568 break;
569 default:
570 context->ReportError(context, "Type %d not currently supported.",
571 input->type);
572 return kTfLiteError;
573 }
574 }
575
576 template <KernelType kernel_type>
Eval(TfLiteContext * context,TfLiteNode * node)577 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
578 const TfLiteTensor* input;
579 TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
580
581 switch (input->type) { // Already know in/out types are same.
582 case kTfLiteFloat32:
583 return EvalImpl<kernel_type, kTfLiteFloat32>(context, node);
584 case kTfLiteUInt8:
585 return EvalImpl<kernel_type, kTfLiteUInt8>(context, node);
586 case kTfLiteInt8:
587 return EvalImpl<kernel_type, kTfLiteInt8>(context, node);
588 case kTfLiteInt16:
589 return EvalImpl<kernel_type, kTfLiteInt16>(context, node);
590 default:
591 context->ReportError(context, "Type %d not currently supported.",
592 input->type);
593 return kTfLiteError;
594 }
595 }
596
597 } // namespace depthwise_conv
598
Register_DEPTHWISE_CONVOLUTION_REF()599 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() {
600 static TfLiteRegistration r = {
601 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
602 depthwise_conv::Eval<depthwise_conv::kReference>};
603 return &r;
604 }
605
Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT()606 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() {
607 static TfLiteRegistration r = {
608 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
609 depthwise_conv::Eval<depthwise_conv::kGenericOptimized>};
610 return &r;
611 }
612
Register_DEPTHWISE_CONVOLUTION_NEON_OPT()613 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() {
614 static TfLiteRegistration r = {
615 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
616 depthwise_conv::Eval<depthwise_conv::kNeonOptimized>};
617 return &r;
618 }
619
Register_DEPTHWISE_CONVOLUTION_NEON_OPT_UINT8()620 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT_UINT8() {
621 static TfLiteRegistration r = {
622 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
623 depthwise_conv::EvalImpl<depthwise_conv::kNeonOptimized, kTfLiteUInt8>};
624 return &r;
625 }
626
Register_DEPTHWISE_CONV_2D()627 TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
628 #ifdef USE_NEON
629 return Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
630 #else
631 return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
632 #endif
633 }
634
635 // Warning: Clients using this variant are responsible for ensuring that their
636 // models only need the UINT8 type. TFLite's op registration mechanism doesn't
637 // yet allow for more nuanced registration mechanisms.
Register_DEPTHWISE_CONV_2D_UINT8()638 TfLiteRegistration* Register_DEPTHWISE_CONV_2D_UINT8() {
639 #ifdef USE_NEON
640 return Register_DEPTHWISE_CONVOLUTION_NEON_OPT_UINT8();
641 #else
642 return Register_DEPTHWISE_CONV_2D();
643 #endif
644 }
645
646 } // namespace builtin
647 } // namespace ops
648 } // namespace tflite
649