1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h"
16
17 #include <stdint.h>
18
19 #include <cmath>
20 #include <limits>
21
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
24 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
25 #include "tensorflow/lite/kernels/kernel_util.h"
26
27 namespace tflite {
28 namespace delegates {
29 namespace hexagon {
30 namespace {
31
32 // Dilated Depthwise Convolution performs SpaceToBatchND & BatchToSpaceND before
33 // and after the op respectively.
34 // This helper computes the paddings param for SpaceToBatchND and crops param
35 // for BatchToSpaceND.
36 //
37 // Inspired by tf.nn.with_space_to_batch & tf.required_space_to_batch_paddings.
ComputeSpaceToBatchParams(int input_height,int input_width,int weights_height,int weights_width,const std::vector<int> & dilation_factors_h_w,const TfLitePadding padding_type,std::vector<int> * paddings,std::vector<int> * crops)38 void ComputeSpaceToBatchParams(int input_height, int input_width,
39 int weights_height, int weights_width,
40 const std::vector<int>& dilation_factors_h_w,
41 const TfLitePadding padding_type,
42 std::vector<int>* paddings,
43 std::vector<int>* crops) {
44 // Base paddings depend on padding applied to the Depthwise Conv op.
45 // 4-element array: {top, bottom, left, right}.
46 std::vector<int> base_paddings(4, 0);
47 if (padding_type == kTfLitePaddingSame) {
48 const int dilated_weights_h =
49 dilation_factors_h_w[0] * (weights_height - 1) + 1;
50 const int dilated_weights_w =
51 dilation_factors_h_w[1] * (weights_width - 1) + 1;
52 base_paddings[0] = (dilated_weights_h - 1) / 2;
53 base_paddings[1] = dilated_weights_h - 1 - (dilated_weights_h - 1) / 2;
54 base_paddings[2] = (dilated_weights_w - 1) / 2;
55 base_paddings[3] = dilated_weights_w - 1 - (dilated_weights_w - 1) / 2;
56 }
57
58 // paddings represents {pad_top, pad_bottom, pad_left, pad_right}.
59 paddings->resize(4, 0);
60 // crops represents {crop_top, crop_bottom, crop_left, crop_right}.
61 crops->resize(4, 0);
62
63 // Logic for computing paddings & crops follows.
64 // Taken from tf.required_space_to_batch_paddings, but without array
65 // operations since we only deal with 2 dimensions.
66 int pad_start_h = base_paddings[0];
67 int pad_start_w = base_paddings[2];
68 int orig_pad_end_h = base_paddings[1];
69 int orig_pad_end_w = base_paddings[3];
70 int full_input_h = input_height + pad_start_h + orig_pad_end_h;
71 int full_input_w = input_width + pad_start_w + orig_pad_end_w;
72 int pad_end_extra_h =
73 (dilation_factors_h_w[0] - full_input_h % dilation_factors_h_w[0]) %
74 dilation_factors_h_w[0];
75 int pad_end_extra_w =
76 (dilation_factors_h_w[1] - full_input_w % dilation_factors_h_w[1]) %
77 dilation_factors_h_w[1];
78 int pad_end_h = orig_pad_end_h + pad_end_extra_h;
79 int pad_end_w = orig_pad_end_w + pad_end_extra_w;
80
81 // Assign values.
82 (*paddings)[0] = pad_start_h;
83 (*paddings)[1] = pad_end_h;
84 (*paddings)[2] = pad_start_w;
85 (*paddings)[3] = pad_end_w;
86 (*crops)[0] = 0;
87 (*crops)[1] = pad_end_extra_h;
88 (*crops)[2] = 0;
89 (*crops)[3] = pad_end_extra_w;
90 }
91
92 // Computes output dimensions for the SpaceToBatchND op used in the dilated
93 // Depthwise Conv case.
94 // space_to_batch_paddings should be in format {top, bottom, left, right}.
95 // These are computed from the documentation for SpaceToBatchND_8's output.
PopulateSpaceToBatchOutputDims(int input_batch_size,int input_height_size,int input_width_size,int input_depth_size,const std::vector<int> & dilation_factors_h_w,const std::vector<int> & space_to_batch_paddings,std::vector<int> * space_to_batch_output_dims)96 void PopulateSpaceToBatchOutputDims(
97 int input_batch_size, int input_height_size, int input_width_size,
98 int input_depth_size, const std::vector<int>& dilation_factors_h_w,
99 const std::vector<int>& space_to_batch_paddings,
100 std::vector<int>* space_to_batch_output_dims) {
101 // Batches.
102 space_to_batch_output_dims->push_back(
103 input_batch_size * dilation_factors_h_w[0] * dilation_factors_h_w[1]);
104 // Height.
105 space_to_batch_output_dims->push_back((space_to_batch_paddings[0] +
106 input_height_size +
107 space_to_batch_paddings[1]) /
108 dilation_factors_h_w[0]);
109 // Width.
110 space_to_batch_output_dims->push_back((space_to_batch_paddings[2] +
111 input_width_size +
112 space_to_batch_paddings[3]) /
113 dilation_factors_h_w[1]);
114 // Depth.
115 space_to_batch_output_dims->push_back(input_depth_size);
116 }
117
118 } // namespace
119
PopulateSubGraph(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)120 TfLiteStatus Conv2dOpBuilder::PopulateSubGraph(const TfLiteIntArray* inputs,
121 const TfLiteIntArray* outputs,
122 TfLiteContext* context) {
123 static std::vector<int> dilation_factors_shape = {1, 1, 1, 2};
124 static std::vector<int> paddings_shape = {1, 1, 2, 2};
125
126 // Input data tensor.
127 const auto& data_tensor = context->tensors[inputs->data[0]];
128 int input_batch_size, input_height_size, input_width_size, input_depth_size;
129 GetDims(&input_batch_size, &input_height_size, &input_width_size,
130 &input_depth_size, data_tensor.dims);
131 float data_min = 0;
132 float data_max = 0;
133 TF_LITE_ENSURE_STATUS(
134 ComputeMinAndMaxQuantValues(data_tensor, &data_min, &data_max));
135 auto* data_min_const = graph_builder_->AddConstNodeWithData(
136 kScalarShape, reinterpret_cast<char*>(&data_min), sizeof(data_min));
137 auto* data_max_const = graph_builder_->AddConstNodeWithData(
138 kScalarShape, reinterpret_cast<char*>(&data_max), sizeof(data_max));
139
140 // Gather information about the Convolution operations.
141 TfLitePadding padding_type = kTfLitePaddingUnknown;
142 TfLiteFusedActivation activation = kTfLiteActNone;
143 int stride_height = 0;
144 int stride_width = 0;
145 bool is_dilated_depthwise_conv = false;
146 int channel_multiplier = 1;
147 if (op_node_.op_type == OP_Supernode_8x8p32to8) {
148 const TfLiteConvParams* conv_params =
149 reinterpret_cast<const TfLiteConvParams*>(builtin_data_);
150 stride_height = conv_params->stride_height;
151 stride_width = conv_params->stride_width;
152 padding_type = conv_params->padding;
153 activation = conv_params->activation;
154 } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) {
155 const TfLiteDepthwiseConvParams* conv_params =
156 reinterpret_cast<const TfLiteDepthwiseConvParams*>(builtin_data_);
157 stride_height = conv_params->stride_height;
158 stride_width = conv_params->stride_width;
159 padding_type = conv_params->padding;
160 activation = conv_params->activation;
161 channel_multiplier = conv_params->depth_multiplier;
162 // We only support dilation for DepthwiseConv.
163 if (conv_params->dilation_height_factor > 1 ||
164 conv_params->dilation_width_factor > 1) {
165 is_dilated_depthwise_conv = true;
166 dilation_factors_h_w_.push_back(conv_params->dilation_height_factor);
167 dilation_factors_h_w_.push_back(conv_params->dilation_width_factor);
168 }
169 }
170
171 // Weights tensor
172 TF_LITE_ENSURE_STATUS(
173 InitializeWeightsNodes(inputs, outputs, context, input_depth_size));
174
175 // Stride node.
176 static int dummy = 0;
177 stride_shape_ = {1, stride_height, stride_width, 1};
178 auto* stride_node = graph_builder_->AddConstNodeWithData(
179 stride_shape_.data(), reinterpret_cast<char*>(&dummy), sizeof(dummy));
180
181 // Output dimensions.
182 int output_batch_size, output_height_size, output_width_size,
183 output_depth_size;
184 GetDims(&output_batch_size, &output_height_size, &output_width_size,
185 &output_depth_size, context->tensors[outputs->data[0]].dims);
186 // Output bounds.
187 // TODO(b/129276536): Add support for other activations here. Current
188 // implementation assumes None/Relu.
189 float output_min = 0;
190 float output_max = 0;
191 TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues(
192 context->tensors[outputs->data[0]], &output_min, &output_max));
193 // These denote the bounds fed to Hexagon's Conv mechanism, which will be
194 // different from the TFLite tensor bounds if there is a RELU activation.
195 float conv_output_min = output_min;
196 float conv_output_max = output_max;
197 if (activation == kTfLiteActRelu6) {
198 conv_output_min = 0;
199 conv_output_max = 6;
200 } else if (activation == kTfLiteActReluN1To1) {
201 conv_output_min = -1;
202 conv_output_max = 1;
203 } else if (activation == kTfLiteActRelu) {
204 conv_output_min = 0;
205 }
206 auto* conv_output_min_const = graph_builder_->AddConstNodeWithData(
207 kScalarShape, reinterpret_cast<char*>(&conv_output_min),
208 sizeof(conv_output_min));
209 auto* conv_output_max_const = graph_builder_->AddConstNodeWithData(
210 kScalarShape, reinterpret_cast<char*>(&conv_output_max),
211 sizeof(conv_output_max));
212
213 // Bias node.
214 TF_LITE_ENSURE_STATUS(InitializeBiasNodes(inputs, outputs, context));
215
216 // TODO(b/143759564): Simplify this method when depth_multiplier support needs
217 // generalizing.
218 if (channel_multiplier > 1 && input_depth_size == 1) {
219 // Depthwise Conv with input_depth == 1 & channel_multiplier > 1 is
220 // equivalent to Conv.
221 SetOpType(OP_Supernode_8x8p32to8);
222 } else if (channel_multiplier > 1) {
223 TF_LITE_KERNEL_LOG(
224 context, "depth_multiplier > 1 not supported with input_depth > 1");
225 return kTfLiteError;
226 }
227
228 TensorID output_tensor, output_min_tensor, output_max_tensor;
229 if (is_dilated_depthwise_conv) {
230 // For dilated Depthwise Conv, we convert this node into SpaceToBatchND, and
231 // then chain Supernode & BatchToSpaceND after it.
232 int input_batch_size, input_height_size, input_width_size, input_depth_size;
233 GetDims(&input_batch_size, &input_height_size, &input_width_size,
234 &input_depth_size, data_tensor.dims);
235 ComputeSpaceToBatchParams(
236 input_height_size, input_width_size, weight_shape_[0], weight_shape_[1],
237 dilation_factors_h_w_, padding_type, &space_to_batch_paddings_,
238 &batch_to_space_crops_);
239 auto* dilation_factors_const = graph_builder_->AddConstNodeWithData(
240 dilation_factors_shape.data(),
241 reinterpret_cast<char*>(dilation_factors_h_w_.data()),
242 dilation_factors_h_w_.size() * sizeof(stride_height));
243 auto* paddings_const = graph_builder_->AddConstNodeWithData(
244 paddings_shape.data(),
245 reinterpret_cast<char*>(space_to_batch_paddings_.data()),
246 space_to_batch_paddings_.size() * sizeof(stride_height));
247 auto* crops_const = graph_builder_->AddConstNodeWithData(
248 paddings_shape.data(),
249 reinterpret_cast<char*>(batch_to_space_crops_.data()),
250 batch_to_space_crops_.size() * sizeof(stride_height));
251
252 // 1. SpaceToBatch.
253 SetOpType(OP_SpaceToBatchND_8);
254 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
255 AddInput(TensorID(dilation_factors_const->GetID(), 0));
256 AddInput(TensorID(paddings_const->GetID(), 0));
257 AddInput(TensorID(data_min_const->GetID(), 0));
258 AddInput(TensorID(data_max_const->GetID(), 0));
259 std::vector<int> space_to_batch_output_dims;
260 PopulateSpaceToBatchOutputDims(
261 input_batch_size, input_height_size, input_width_size, input_depth_size,
262 dilation_factors_h_w_, space_to_batch_paddings_,
263 &space_to_batch_output_dims);
264 TensorID space_to_batch_op_out =
265 AddOutput(sizeof(uint8_t), 4, space_to_batch_output_dims);
266 AddOutput(sizeof(float), 4, kScalarShape);
267 AddOutput(sizeof(float), 4, kScalarShape);
268
269 // 2. Depthwise Conv.
270 auto* conv_op = graph_builder_->AddNode(GetTFLiteNodeID());
271 conv_op->SetOpType(OP_DepthwiseSupernode_8x8p32to8);
272 conv_op->AddInput(space_to_batch_op_out);
273 conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
274 conv_op->AddInput(TensorID(data_min_const->GetID(), 0));
275 conv_op->AddInput(TensorID(data_max_const->GetID(), 0));
276 conv_op->AddInput(TensorID(weights_min_node_->GetID(), 0));
277 conv_op->AddInput(TensorID(weights_max_node_->GetID(), 0));
278 conv_op->AddInput(TensorID(stride_node->GetID(), 0));
279 conv_op->AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
280 conv_op->AddInput(TensorID(bias_min_node_->GetID(), 0));
281 conv_op->AddInput(TensorID(bias_max_node_->GetID(), 0));
282 conv_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
283 conv_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
284 if (per_channel_quant_.channel_scales_node != nullptr) {
285 conv_op->AddInput(
286 TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
287 }
288 // The padding is handled by the SpaceToBatch/BatchToSpace ops surrounding
289 // this node. Hence, this op's padding remains VALID only.
290 // tf.nn.with_space_to_batch's docs state the following pattern:
291 // """
292 // batch_to_space_nd(
293 // op(space_to_batch_nd(input, adjusted_dilation_rate, adjusted_paddings),
294 // num_spatial_dims,
295 // "VALID")
296 // adjusted_dilation_rate,
297 // adjusted_crops)
298 // """
299 conv_op->SetPaddingType(NN_PAD_VALID);
300 // These dimensions are probably a little excessive, but they upper-bound
301 // the possible output from DepthwiseConv.
302 // TODO(b/139955809): Find better bounds?
303 TensorID conv_output = conv_op->AddOutput(
304 sizeof(uint8_t), 4,
305 {output_batch_size * dilation_factors_h_w_[0] *
306 dilation_factors_h_w_[1],
307 output_height_size, output_width_size, output_depth_size});
308 conv_op->AddOutput(sizeof(float), 4, kScalarShape);
309 conv_op->AddOutput(sizeof(float), 4, kScalarShape);
310
311 // 3. BatchToSpace.
312 auto* batch_to_space_op = graph_builder_->AddNode(GetTFLiteNodeID());
313 batch_to_space_op->SetOpType(OP_BatchToSpaceND_8);
314 batch_to_space_op->AddInput(conv_output);
315 batch_to_space_op->AddInput(TensorID(dilation_factors_const->GetID(), 0));
316 batch_to_space_op->AddInput(TensorID(crops_const->GetID(), 0));
317 batch_to_space_op->AddInput(TensorID(conv_output_min_const->GetID(), 0));
318 batch_to_space_op->AddInput(TensorID(conv_output_max_const->GetID(), 0));
319 output_tensor =
320 batch_to_space_op->AddOutput(sizeof(uint8_t), 4,
321 {output_batch_size, output_height_size,
322 output_width_size, output_depth_size});
323 output_min_tensor =
324 batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
325 output_max_tensor =
326 batch_to_space_op->AddOutput(sizeof(float), 4, kScalarShape);
327 } else {
328 // Standard case.
329 // Padding type.
330 if (padding_type == kTfLitePaddingSame) {
331 SetPaddingType(NN_PAD_SAME);
332 } else if (padding_type == kTfLitePaddingValid) {
333 SetPaddingType(NN_PAD_VALID);
334 }
335 // Inputs
336 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[0]));
337 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[1]));
338 AddInput(TensorID(data_min_const->GetID(), 0));
339 AddInput(TensorID(data_max_const->GetID(), 0));
340 AddInput(TensorID(weights_min_node_->GetID(), 0));
341 AddInput(TensorID(weights_max_node_->GetID(), 0));
342 AddInput(TensorID(stride_node->GetID(), 0));
343 AddInput(graph_builder_->GetHexagonTensorId(inputs->data[2]));
344 AddInput(TensorID(bias_min_node_->GetID(), 0));
345 AddInput(TensorID(bias_max_node_->GetID(), 0));
346 AddInput(TensorID(conv_output_min_const->GetID(), 0));
347 AddInput(TensorID(conv_output_max_const->GetID(), 0));
348 if (per_channel_quant_.channel_scales_node != nullptr) {
349 AddInput(TensorID(per_channel_quant_.channel_scales_node->GetID(), 0));
350 }
351 // Outputs
352 output_tensor = AddOutput(sizeof(uint8_t), 4,
353 {output_batch_size, output_height_size,
354 output_width_size, output_depth_size});
355 output_min_tensor = AddOutput(sizeof(float), 4, kScalarShape);
356 output_max_tensor = AddOutput(sizeof(float), 4, kScalarShape);
357 }
358
359 // Requantize if activation was not None & the TFLite tensor's min/max is
360 // different (diff > 1e-2) from the RELU bounds.
361 const float min_bound_diff = std::abs(conv_output_min - output_min);
362 const float max_bound_diff = std::abs(conv_output_max - output_max);
363 if (activation != kTfLiteActNone &&
364 (min_bound_diff > 0.01 || max_bound_diff > 0.01)) {
365 auto* requantized_min_const = graph_builder_->AddConstNodeWithData(
366 kScalarShape, reinterpret_cast<char*>(&output_min), sizeof(output_min));
367 auto* requantized_max_const = graph_builder_->AddConstNodeWithData(
368 kScalarShape, reinterpret_cast<char*>(&output_max), sizeof(output_max));
369 auto* requantize_op = graph_builder_->AddNode(GetTFLiteNodeID());
370 requantize_op->SetOpType(OP_Requantize_8to8);
371 requantize_op->AddInput(output_tensor);
372 requantize_op->AddInput(output_min_tensor);
373 requantize_op->AddInput(output_max_tensor);
374 requantize_op->AddInput(TensorID(requantized_min_const->GetID(), 0));
375 requantize_op->AddInput(TensorID(requantized_max_const->GetID(), 0));
376 node_output_ =
377 requantize_op->AddOutput(sizeof(uint8_t), 4,
378 {output_batch_size, output_height_size,
379 output_width_size, output_depth_size});
380 requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
381 requantize_op->AddOutput(sizeof(float), 4, kScalarShape);
382 } else {
383 node_output_ = output_tensor;
384 }
385
386 return kTfLiteOk;
387 }
388
RegisterOutputs(const TfLiteIntArray * outputs,TfLiteContext * context)389 TfLiteStatus Conv2dOpBuilder::RegisterOutputs(const TfLiteIntArray* outputs,
390 TfLiteContext* context) {
391 // Should be only 1 output.
392 graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
393 node_output_.second);
394 return kTfLiteOk;
395 }
396
~Conv2dOpBuilder()397 Conv2dOpBuilder::~Conv2dOpBuilder() {}
398
CreateConv2DBuilder(GraphBuilder * graph_builder,int op_type)399 OpBuilder* CreateConv2DBuilder(GraphBuilder* graph_builder, int op_type) {
400 return new Conv2dOpBuilder(graph_builder, op_type);
401 }
402
403 } // namespace hexagon
404 } // namespace delegates
405 } // namespace tflite
406