• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h"
16 
17 #include <stdint.h>
18 
19 #include <cmath>
20 #include <limits>
21 
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
24 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
25 #include "tensorflow/lite/kernels/kernel_util.h"
26 
27 namespace tflite {
28 namespace delegates {
29 namespace hexagon {
30 namespace {
31 
32 // Dilated Depthwise Convolution performs SpaceToBatchND & BatchToSpaceND before
33 // and after the op respectively.
34 // This helper computes the paddings param for SpaceToBatchND and crops param
35 // for BatchToSpaceND.
36 //
37 // Inspired by tf.nn.with_space_to_batch & tf.required_space_to_batch_paddings.
ComputeSpaceToBatchParams(int input_height,int input_width,int weights_height,int weights_width,const std::vector<int> & dilation_factors_h_w,const TfLitePadding padding_type,std::vector<int> * paddings,std::vector<int> * crops)38 void ComputeSpaceToBatchParams(int input_height, int input_width,
39                                int weights_height, int weights_width,
40                                const std::vector<int>& dilation_factors_h_w,
41                                const TfLitePadding padding_type,
42                                std::vector<int>* paddings,
43                                std::vector<int>* crops) {
44   // Base paddings depend on padding applied to the Depthwise Conv op.
45   // 4-element array: {top, bottom, left, right}.
46   std::vector<int> base_paddings(4, 0);
47   if (padding_type == kTfLitePaddingSame) {
48     const int dilated_weights_h =
49         dilation_factors_h_w[0] * (weights_height - 1) + 1;
50     const int dilated_weights_w =
51         dilation_factors_h_w[1] * (weights_width - 1) + 1;
52     base_paddings[0] = (dilated_weights_h - 1) / 2;
53     base_paddings[1] = dilated_weights_h - 1 - (dilated_weights_h - 1) / 2;
54     base_paddings[2] = (dilated_weights_w - 1) / 2;
55     base_paddings[3] = dilated_weights_w - 1 - (dilated_weights_w - 1) / 2;
56   }
57 
58   // paddings represents {pad_top, pad_bottom, pad_left, pad_right}.
59   paddings->resize(4, 0);
60   // crops represents {crop_top, crop_bottom, crop_left, crop_right}.
61   crops->resize(4, 0);
62 
63   // Logic for computing paddings & crops follows.
64   // Taken from tf.required_space_to_batch_paddings, but without array
65   // operations since we only deal with 2 dimensions.
66   int pad_start_h = base_paddings[0];
67   int pad_start_w = base_paddings[2];
68   int orig_pad_end_h = base_paddings[1];
69   int orig_pad_end_w = base_paddings[3];
70   int full_input_h = input_height + pad_start_h + orig_pad_end_h;
71   int full_input_w = input_width + pad_start_w + orig_pad_end_w;
72   int pad_end_extra_h =
73       (dilation_factors_h_w[0] - full_input_h % dilation_factors_h_w[0]) %
74       dilation_factors_h_w[0];
75   int pad_end_extra_w =
76       (dilation_factors_h_w[1] - full_input_w % dilation_factors_h_w[1]) %
77       dilation_factors_h_w[1];
78   int pad_end_h = orig_pad_end_h + pad_end_extra_h;
79   int pad_end_w = orig_pad_end_w + pad_end_extra_w;
80 
81   // Assign values.
82   (*paddings)[0] = pad_start_h;
83   (*paddings)[1] = pad_end_h;
84   (*paddings)[2] = pad_start_w;
85   (*paddings)[3] = pad_end_w;
86   (*crops)[0] = 0;
87   (*crops)[1] = pad_end_extra_h;
88   (*crops)[2] = 0;
89   (*crops)[3] = pad_end_extra_w;
90 }
91 
92 // Computes output dimensions for the SpaceToBatchND op used in the dilated
93 // Depthwise Conv case.
94 // space_to_batch_paddings should be in format {top, bottom, left, right}.
95 // These are computed from the documentation for SpaceToBatchND_8's output.
PopulateSpaceToBatchOutputDims(int input_batch_size,int input_height_size,int input_width_size,int input_depth_size,const std::vector<int> & dilation_factors_h_w,const std::vector<int> & space_to_batch_paddings,std::vector<int> * space_to_batch_output_dims)96 void PopulateSpaceToBatchOutputDims(
97     int input_batch_size, int input_height_size, int input_width_size,
98     int input_depth_size, const std::vector<int>& dilation_factors_h_w,
99     const std::vector<int>& space_to_batch_paddings,
100     std::vector<int>* space_to_batch_output_dims) {
101   // Batches.
102   space_to_batch_output_dims->push_back(
103       input_batch_size * dilation_factors_h_w[0] * dilation_factors_h_w[1]);
104   // Height.
105   space_to_batch_output_dims->push_back((space_to_batch_paddings[0] +
106                                          input_height_size +
107                                          space_to_batch_paddings[1]) /
108                                         dilation_factors_h_w[0]);
109   // Width.
110   space_to_batch_output_dims->push_back((space_to_batch_paddings[2] +
111                                          input_width_size +
112                                          space_to_batch_paddings[3]) /
113                                         dilation_factors_h_w[1]);
114   // Depth.
115   space_to_batch_output_dims->push_back(input_depth_size);
116 }
117 
118 }  // namespace
119 
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)120 TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
121                                                const TfLiteIntArray* outputs,
122                                                TfLiteContext* context) {
123   static std::vector<int> dilation_factors_shape = {1, 1, 1, 2};
124   static std::vector<int> paddings_shape = {1, 1, 2, 2};
125 
126   // Input data tensor.
127   const auto& data_tensor = context->tensors[inputs->data[0]];
128   int input_batch_size, input_height_size, input_width_size, input_depth_size;
129   GetDims(&input_batch_size, &input_height_size, &input_width_size,
130           &input_depth_size, data_tensor.dims);
131   float data_min = 0;
132   float data_max = 0;
133   TF_LITE_ENSURE_STATUS(
134       ComputeMinAndMaxQuantValues(data_tensor, &data_min, &data_max));
135   auto* data_min_const = graph_builder_->AddConstNodeWithData(
136       kScalarShape, reinterpret_cast<char*>(&data_min), sizeof(data_min));
137   auto* data_max_const = graph_builder_->AddConstNodeWithData(
138       kScalarShape, reinterpret_cast<char*>(&data_max), sizeof(data_max));
139 
140   // Gather information about the Convolution operations.
141   TfLitePadding padding_type = kTfLitePaddingUnknown;
142   TfLiteFusedActivation activation = kTfLiteActNone;
143   int stride_height = 0;
144   int stride_width = 0;
145   bool is_dilated_depthwise_conv = false;
146   int channel_multiplier = 1;
147   if (op_node_.op_type == OP_Supernode_8x8p32to8) {
148     const TfLiteConvParams* conv_params =
149         reinterpret_cast<const TfLiteConvParams*>(builtin_data_);
150     stride_height = conv_params->stride_height;
151     stride_width = conv_params->stride_width;
152     padding_type = conv_params->padding;
153     activation = conv_params->activation;
154   } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) {
155     const TfLiteDepthwiseConvParams* conv_params =
156         reinterpret_cast<const TfLiteDepthwiseConvParams*>(builtin_data_);
157     stride_height = conv_params->stride_height;
158     stride_width = conv_params->stride_width;
159     padding_type = conv_params->padding;
160     activation = conv_params->activation;
161     channel_multiplier = conv_params->depth_multiplier;
162     // We only support dilation for DepthwiseConv.
163     if (conv_params->dilation_height_factor > 1 ||
164         conv_params->dilation_width_factor > 1) {
165       is_dilated_depthwise_conv = true;
166       dilation_factors_h_w_.push_back(conv_params->dilation_height_factor);
167       dilation_factors_h_w_.push_back(conv_params->dilation_width_factor);
168     }
169   }
170 
171   // Weights tensor
172   TF_LITE_ENSURE_STATUS(
173       InitializeWeightsNodes(inputs, outputs, context, input_depth_size));
174 
175   // Stride node.
176   static int dummy = 0;
177   stride_shape_ = {1, stride_height, stride_width, 1};
178   auto* stride_node = graph_builder_->AddConstNodeWithData(
179       stride_shape_.data(), reinterpret_cast<char*>(&dummy), sizeof(dummy));
180 
181   // Output dimensions.
182   int output_batch_size, output_height_size, output_width_size,
183       output_depth_size;
184   GetDims(&output_batch_size, &output_height_size, &output_width_size,
185           &output_depth_size, context->tensors[outputs->data[0]].dims);
186   // Output bounds.
187   // TODO(b/129276536): Add support for other activations here. Current
188   // implementation assumes None/Relu.
189   float output_min = 0;
190   float output_max = 0;
191   TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues(
192       context->tensors[outputs->data[0]], &output_min, &output_max));
193   // These denote the bounds fed to Hexagon's Conv mechanism, which will be
194   // different from the TFLite tensor bounds if there is a RELU activation.
195   float conv_output_min = output_min;
196   float conv_output_max = output_max;
197   if (activation == kTfLiteActRelu6) {
198     conv_output_min = 0;
199     conv_output_max = 6;
200   } else if (activation == kTfLiteActReluN1To1) {
201     conv_output_min = -1;
202     conv_output_max = 1;
203   } else if (activation == kTfLiteActRelu) {
204     conv_output_min = 0;
205   }
206   auto* conv_output_min_const = graph_builder_->AddConstNodeWithData(
207       kScalarShape, reinterpret_cast<char*>(&conv_output_min),
208       sizeof(conv_output_min));
209   auto* conv_output_max_const = graph_builder_->AddConstNodeWithData(
210       kScalarShape, reinterpret_cast<char*>(&conv_output_max),
211       sizeof(conv_output_max));
212 
213   // Bias node.
214   TF_LITE_ENSURE_STATUS(InitializeBiasNodes(inputs, outputs, context));
215 
216   // TODO(b/143759564): Simplify this method when depth_multiplier support needs
217   // generalizing.
218   if (channel_multiplier > 1 && input_depth_size == 1) {
219     // Depthwise Conv with input_depth == 1 & channel_multiplier > 1 is
220     // equivalent to Conv.
221     SetOpType(OP_Supernode_8x8p32to8);
222   } else if (channel_multiplier > 1) {
223     TF_LITE_KERNEL_LOG(
224         context, "depth_multiplier > 1 not supported with input_depth > 1");
225     return kTfLiteError;
226   }
227 
228   TensorID output_tensor, output_min_tensor, output_max_tensor;
229   if (is_dilated_depthwise_conv) {
230     // For dilated Depthwise Conv, we convert this node into SpaceToBatchND, and
231     // then chain Supernode & BatchToSpaceND after it.
232     int input_batch_size, input_height_size, input_width_size, input_depth_size;
233     GetDims(&input_batch_size, &input_height_size, &input_width_size,
234             &input_depth_size, data_tensor.dims);
235     ComputeSpaceToBatchParams(
236         input_height_size, input_width_size, weight_shape_[0], weight_shape_[1],
237         dilation_factors_h_w_, padding_type, &space_to_batch_paddings_,
238         &batch_to_space_crops_);
239     auto* dilation_factors_const = graph_builder_->AddConstNodeWithData(
240         dilation_factors_shape.data(),
241         reinterpret_cast<char*>(dilation_factors_h_w_.data()),
242         dilation_factors_h_w_.size() * sizeof(stride_height));
243     auto* paddings_const = graph_builder_->AddConstNodeWithData(
244         paddings_shape.data(),
245         reinterpret_cast<char*>(space_to_batch_paddings_.data()),
246         space_to_batch_paddings_.size() * sizeof(stride_height));
247     auto* crops_const = graph_builder_->AddConstNodeWithData(
248         paddings_shape.data(),
249         reinterpret_cast<char*>(batch_to_space_crops_.data()),
250         batch_to_space_crops_.size() * sizeof(stride_height));
251 
252     // 1. SpaceToBatch.
253     SetOpType(OP_SpaceToBatchND_8);
254     AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
255     AddInput(TensorID(dilation_factors_const->GetID(), 0));
256     AddInput(TensorID(paddings_const->GetID(), 0));
257     AddInput(TensorID(data_min_const->GetID(), 0));
258     AddInput(TensorID(data_max_const->GetID(), 0));
259     std::vector<int> space_to_batch_output_dims;
260     PopulateSpaceToBatchOutputDims(
261         input_batch_size, input_height_size, input_width_size, input_depth_size,
262         dilation_factors_h_w_, space_to_batch_paddings_,
263         &space_to_batch_output_dims);
264     TensorID space_to_batch_op_out =
265         AddOutput(sizeof(uint8_t), 4, space_to_batch_output_dims);
266     AddOutput(sizeof(float), 4, kScalarShape);
267     AddOutput(sizeof(float), 4, kScalarShape);
268 
269     // 2. Depthwise Conv.
270     auto* conv_op = graph_builder_->AddNode(GetTFLiteNodeID());
271     conv_op->SetOpType(OP_DepthwiseSupernode_8x8p32to8);
272     conv_op->AddInput(space_to_batch_op_out);
273     conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
274     conv_op->AddInput(TensorID(data_min_const->GetID(), 0));
275     conv_op->AddInput(TensorID(data_max_const->GetID(), 0));
276     conv_op->AddInput(TensorID(weights_min_node_->GetID(), 0));
277     conv_op->AddInput(TensorID(weights_max_node_->GetID(), 0));
278     conv_op->AddInput(TensorID(stride_node->GetID(), 0));
279     conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
280     conv_op->AddInput(TensorID(bias_min_node_->GetID(), 0));
281     conv_op->AddInput(TensorID(bias_max_node_->GetID(), 0));
282     conv_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
283     conv_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
284     if (per_channel_quant_.channel_scales_node != nullptr) {
285       conv_op->AddInput(
286           TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
287     }
288     // The padding is handled by the SpaceToBatch/BatchToSpace ops surrounding
289     // this node. Hence, this op's padding remains VALID only.
290     // tf.nn.with_space_to_batch's docs state the following pattern:
291     // """
292     // batch_to_space_nd(
293     //  op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings),
294     //     num_spatial_dims,
295     //     "VALID")
296     //  adjusted_dilation_rate,
297     //  adjusted_crops)
298     // """
299     conv_op->SetPaddingType(NN_PAD_VALID);
300     // These dimensions are probably a little excessive, but they upper-bound
301     // the possible output from DepthwiseConv.
302     // TODO(b/139955809): Find better bounds?
303     TensorID conv_output = conv_op->AddOutput(
304         sizeof(uint8_t), 4,
305         {output_batch_size * dilation_factors_h_w_[0] *
306              dilation_factors_h_w_[1],
307          output_height_size, output_width_size, output_depth_size});
308     conv_op->AddOutput(sizeof(float), 4, kScalarShape);
309     conv_op->AddOutput(sizeof(float), 4, kScalarShape);
310 
311     // 3. BatchToSpace.
312     auto* batch_to_space_op = graph_builder_->AddNode(GetTFLiteNodeID());
313     batch_to_space_op->SetOpType(OP_BatchToSpaceND_8);
314     batch_to_space_op->AddInput(conv_output);
315     batch_to_space_op->AddInput(TensorID(dilation_factors_const->GetID(), 0));
316     batch_to_space_op->AddInput(TensorID(crops_const->GetID(), 0));
317     batch_to_space_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
318     batch_to_space_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
319     output_tensor =
320         batch_to_space_op->AddOutput(sizeof(uint8_t), 4,
321                                      {output_batch_size, output_height_size,
322                                       output_width_size, output_depth_size});
323     output_min_tensor =
324         batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
325     output_max_tensor =
326         batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
327   } else {
328     // Standard case.
329     // Padding type.
330     if (padding_type == kTfLitePaddingSame) {
331       SetPaddingType(NN_PAD_SAME);
332     } else if (padding_type == kTfLitePaddingValid) {
333       SetPaddingType(NN_PAD_VALID);
334     }
335     // Inputs
336     AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
337     AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
338     AddInput(TensorID(data_min_const->GetID(), 0));
339     AddInput(TensorID(data_max_const->GetID(), 0));
340     AddInput(TensorID(weights_min_node_->GetID(), 0));
341     AddInput(TensorID(weights_max_node_->GetID(), 0));
342     AddInput(TensorID(stride_node->GetID(), 0));
343     AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
344     AddInput(TensorID(bias_min_node_->GetID(), 0));
345     AddInput(TensorID(bias_max_node_->GetID(), 0));
346     AddInput(TensorID(conv_output_min_const->GetID(), 0));
347     AddInput(TensorID(conv_output_max_const->GetID(), 0));
348     if (per_channel_quant_.channel_scales_node != nullptr) {
349       AddInput(TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
350     }
351     // Outputs
352     output_tensor = AddOutput(sizeof(uint8_t), 4,
353                               {output_batch_size, output_height_size,
354                                output_width_size, output_depth_size});
355     output_min_tensor = AddOutput(sizeof(float), 4, kScalarShape);
356     output_max_tensor = AddOutput(sizeof(float), 4, kScalarShape);
357   }
358 
359   // Requantize if activation was not None & the TFLite tensor's min/max is
360   // different (diff > 1e-2) from the RELU bounds.
361   const float min_bound_diff = std::abs(conv_output_min - output_min);
362   const float max_bound_diff = std::abs(conv_output_max - output_max);
363   if (activation != kTfLiteActNone &&
364       (min_bound_diff > 0.01 || max_bound_diff > 0.01)) {
365     auto* requantized_min_const = graph_builder_->AddConstNodeWithData(
366         kScalarShape, reinterpret_cast<char*>(&output_min), sizeof(output_min));
367     auto* requantized_max_const = graph_builder_->AddConstNodeWithData(
368         kScalarShape, reinterpret_cast<char*>(&output_max), sizeof(output_max));
369     auto* requantize_op = graph_builder_->AddNode(GetTFLiteNodeID());
370     requantize_op->SetOpType(OP_Requantize_8to8);
371     requantize_op->AddInput(output_tensor);
372     requantize_op->AddInput(output_min_tensor);
373     requantize_op->AddInput(output_max_tensor);
374     requantize_op->AddInput(TensorID(requantized_min_const->GetID(), 0));
375     requantize_op->AddInput(TensorID(requantized_max_const->GetID(), 0));
376     node_output_ =
377         requantize_op->AddOutput(sizeof(uint8_t), 4,
378                                  {output_batch_size, output_height_size,
379                                   output_width_size, output_depth_size});
380     requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
381     requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
382   } else {
383     node_output_ = output_tensor;
384   }
385 
386   return kTfLiteOk;
387 }
388 
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)389 TfLiteStatus Conv2dOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
390                                               TfLiteContext* context) {
391   // Should be only 1 output.
392   graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
393                                   node_output_.second);
394   return kTfLiteOk;
395 }
396 
~Conv2dOpBuilder()397 Conv2dOpBuilder::~Conv2dOpBuilder() {}
398 
CreateConv2DBuilder(GraphBuilder * graph_builder,int op_type)399 OpBuilder* CreateConv2DBuilder(GraphBuilder* graph_builder, int op_type) {
400   return new Conv2dOpBuilder(graph_builder, op_type);
401 }
402 
403 }  // namespace hexagon
404 }  // namespace delegates
405 }  // namespace tflite
406