• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <stdint.h>
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <limits>
20 #include <vector>
21 
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h"
25 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
26 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28 
29 namespace tflite {
30 namespace delegates {
31 namespace hexagon {
32 namespace {
33 
34 constexpr uint8_t k8BitSignFlipConstant = 0x80;
35 // 1/1024 ~ 0.0009766 is a restriction set by Hexagon's kernels.
36 // TODO(b/151103818): Figure out a way to retrieve this constant reliably.
37 constexpr float kHexagonMinRelativeScale = 0.0009766f;
38 
39 }  // namespace
40 
ProcessPerChannelQuantizedWeights(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context,float * weights_min,float * weights_max,GraphBuilder * graph_builder,PerChannelQuantData * per_channel_quant)41 TfLiteStatus ProcessPerChannelQuantizedWeights(
42     const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
43     TfLiteContext* context, float* weights_min, float* weights_max,
44     GraphBuilder* graph_builder, PerChannelQuantData* per_channel_quant) {
45   if (!per_channel_quant) return kTfLiteError;
46   const auto& weights_tensor = context->tensors[inputs->data[1]];
47   TfLiteAffineQuantization* weights_quant_params =
48       reinterpret_cast<TfLiteAffineQuantization*>(
49           weights_tensor.quantization.params);
50 
51   // Retrieve channel scales.
52   per_channel_quant->num_scale_values = weights_quant_params->scale->size;
53   // Normalize the scales as expected by Hexagon.
54   per_channel_quant->scales_data = weights_quant_params->scale->data;
55   std::vector<float> normalized_scales;
56   normalized_scales.reserve(per_channel_quant->num_scale_values);
57   float scale_max = 0.0;
58   for (int i = 0; i < per_channel_quant->num_scale_values; ++i) {
59     normalized_scales.push_back(per_channel_quant->scales_data[i]);
60     if (per_channel_quant->scales_data[i] > scale_max) {
61       scale_max = per_channel_quant->scales_data[i];
62     }
63   }
64   if (scale_max == 0.0) {
65     TF_LITE_KERNEL_LOG(context, "Scale max is zero for: %s",
66                        weights_tensor.name);
67     return kTfLiteError;
68   }
69   for (int i = 0; i < per_channel_quant->num_scale_values; ++i) {
70     normalized_scales[i] =
71         std::max(normalized_scales[i] / scale_max, kHexagonMinRelativeScale);
72   }
73   // Add node for channel scales data.
74   const std::vector<int> scales_shape = {1, 1, 1,
75                                          per_channel_quant->num_scale_values};
76   per_channel_quant->channel_scales_node = graph_builder->AddConstNodeWithData(
77       scales_shape.data(), reinterpret_cast<char*>(normalized_scales.data()),
78       normalized_scales.size() * sizeof(normalized_scales[0]));
79   *weights_min = -128 * scale_max;
80   *weights_max = 127 * scale_max;
81   return kTfLiteOk;
82 }
83 
ProcessPerChannelQuantizedBias(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context,float * bias_min,float * bias_max,GraphBuilder * graph_builder,PerChannelQuantData * per_channel_quant,OpBuilder ** bias_const_node)84 TfLiteStatus ProcessPerChannelQuantizedBias(
85     const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
86     TfLiteContext* context, float* bias_min, float* bias_max,
87     GraphBuilder* graph_builder, PerChannelQuantData* per_channel_quant,
88     OpBuilder** bias_const_node) {
89   const auto& bias_tensor = context->tensors[inputs->data[2]];
90 
91   const TfLiteAffineQuantization* input_quant_params =
92       static_cast<const TfLiteAffineQuantization*>(
93           context->tensors[inputs->data[0]].quantization.params);
94   const float input_scale = input_quant_params->scale->data[0];
95   // Now dequantize bias values to float first, to adjust for the
96   // normalization of channel scales.
97   auto* bias_data = bias_tensor.data.i32;
98   const int bias_size = NumElements(&bias_tensor);
99   if (bias_size != per_channel_quant->num_scale_values) {
100     TF_LITE_KERNEL_LOG(
101         context, "Bias/channel scales number mismatch for bias tensor: %s",
102         bias_tensor.name);
103     return kTfLiteError;
104   }
105   std::vector<float> dequantized_bias;
106   dequantized_bias.reserve(bias_size);
107   for (int i = 0; i < bias_size; ++i) {
108     const float dequantized_value =
109         bias_data[i] * input_scale * per_channel_quant->scales_data[i];
110     const float abs_dequantized_value = std::abs(dequantized_value);
111     if (abs_dequantized_value > *bias_max) {
112       *bias_max = abs_dequantized_value;
113     }
114     dequantized_bias.push_back(dequantized_value);
115   }
116   *bias_max = *bias_max * 8;
117   *bias_min = -1 * *bias_max;
118   // Now requantize the bias values to the new min/max values.
119   std::vector<int> preprocessed_bias_data;
120   preprocessed_bias_data.reserve(per_channel_quant->num_scale_values);
121   for (int i = 0; i < bias_size; ++i) {
122     preprocessed_bias_data.push_back(static_cast<int>(
123         std::round(std::pow(2, 31) * (dequantized_bias[i] / *bias_max))));
124   }
125   // Add nodes for bias.
126   const std::vector<int> bias_shape = {1, 1, 1, bias_size};
127   auto* bias_data_node = graph_builder->AddConstNodeWithData(
128       bias_shape.data(), reinterpret_cast<char*>(preprocessed_bias_data.data()),
129       preprocessed_bias_data.size() * sizeof(preprocessed_bias_data[0]));
130   if (bias_const_node) {
131     *bias_const_node = bias_data_node;
132   }
133   graph_builder->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0,
134                                  /*overwrite=*/true);
135   return kTfLiteOk;
136 }
137 
InitializeWeightsNodes(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context,const int input_depth)138 TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes(
139     const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
140     TfLiteContext* context, const int input_depth) {
141   const std::vector<int> quant_bound_shape = {1, 1, 1, 1};
142 
143   const auto& weights_tensor = context->tensors[inputs->data[1]];
144   if (weights_tensor.allocation_type != kTfLiteMmapRo) {
145     TF_LITE_KERNEL_LOG(
146         context, "Weights tensor doesn't have correct allocation type: %s",
147         weights_tensor.name);
148     return kTfLiteError;
149   }
150   int weights_batch_size, weights_height_size, weights_width_size,
151       weights_depth_size;
152   // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC.
153   // Transpose NHWC -> HWCN
154   GetDims(&weights_batch_size, &weights_height_size, &weights_width_size,
155           &weights_depth_size, weights_tensor.dims);
156 
157   // Weights tensor could be int8 even for per-tensor quantization.
158   // Therefore, we look at the number of scale values to check if it is
159   // per-channel quantized.
160   TfLiteAffineQuantization* weights_quant_params =
161       reinterpret_cast<TfLiteAffineQuantization*>(
162           weights_tensor.quantization.params);
163   const bool is_per_channel_quant = weights_quant_params->scale->size > 1;
164 
165   // WEIGHTS DATA.
166   OpBuilder* weights_data_node = nullptr;
167   if (op_node_.op_type == OP_Supernode_8x8p32to8) {
168     // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC.
169     // Transpose NHWC -> HWCN
170     weight_shape_ = {weights_height_size, weights_width_size,
171                      weights_depth_size, weights_batch_size};
172     RuntimeShape nhwc_shape({weights_batch_size, weights_height_size,
173                              weights_width_size, weights_depth_size});
174     RuntimeShape hwcn_shape({weights_height_size, weights_width_size,
175                              weights_depth_size, weights_batch_size});
176     std::vector<uint8_t> hwcn(NumElements(&weights_tensor));
177     TransposeParams transpose_params;
178     transpose_params.perm_count = 4;
179     transpose_params.perm[0] = 1;
180     transpose_params.perm[1] = 2;
181     transpose_params.perm[2] = 3;
182     transpose_params.perm[3] = 0;
183     // TODO(b/151103818): Try merging Transpose & bit flip.
184     if (weights_tensor.type == kTfLiteInt8) {
185       optimized_ops::Transpose<int8_t>(transpose_params, nhwc_shape,
186                                        weights_tensor.data.int8, hwcn_shape,
187                                        reinterpret_cast<int8_t*>(hwcn.data()));
188       // Flip bits on the weight values so that the int8 values are treated
189       // as uint8.
190       for (int i = 0; i < hwcn.size(); ++i) {
191         hwcn[i] = hwcn[i] ^ k8BitSignFlipConstant;
192       }
193     } else {
194       optimized_ops::Transpose<uint8_t>(transpose_params, nhwc_shape,
195                                         weights_tensor.data.uint8, hwcn_shape,
196                                         hwcn.data());
197     }
198     weights_data_node = graph_builder_->AddConstNodeWithData(
199         weight_shape_.data(), reinterpret_cast<char*>(hwcn.data()),
200         hwcn.size() * sizeof(hwcn[0]));
201   } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) {
202     // Hexagon treats depthwise conv like tf.nn.depthwise_conv2d, where the
203     // expected filter shape is [fh,fw,din,dmul].
204     // The data itself will remain the same, since TFLite's representation is
205     // just a 'flattening' of Hexagon's version.
206     const int channel_multiplier = weights_depth_size / input_depth;
207     weight_shape_ = {weights_height_size, weights_width_size, input_depth,
208                      channel_multiplier};
209 
210     if (weights_tensor.type == kTfLiteInt8) {
211       // Flip bits on the weight values so that the int8 values are treated
212       // as uint8.
213       std::vector<uint8_t> converted_data(NumElements(&weights_tensor));
214       for (int i = 0; i < converted_data.size(); ++i) {
215         converted_data[i] = weights_tensor.data.int8[i] ^ k8BitSignFlipConstant;
216       }
217       weights_data_node = graph_builder_->AddConstNodeWithData(
218           weight_shape_.data(), reinterpret_cast<char*>(converted_data.data()),
219           converted_data.size() * sizeof(converted_data[0]));
220     } else {
221       weights_data_node = graph_builder_->AddConstNodeWithData(
222           weight_shape_.data(), weights_tensor.data.raw,
223           NumElements(&weights_tensor) * sizeof(weights_tensor.data.uint8[0]));
224     }
225   }
226   graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node->GetID(),
227                                   0, /*overwrite=*/true);
228 
229   // WEIGHTS QUANTIZATION.
230   float weights_min = 0;
231   float weights_max = 0;
232   if (is_per_channel_quant) {
233     ProcessPerChannelQuantizedWeights(inputs, outputs, context, &weights_min,
234                                       &weights_max, graph_builder_,
235                                       &per_channel_quant_);
236   } else {
237     TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues(
238         weights_tensor, &weights_min, &weights_max));
239   }
240   weights_min_node_ = graph_builder_->AddConstNodeWithData(
241       quant_bound_shape.data(), reinterpret_cast<char*>(&weights_min),
242       sizeof(weights_min));
243   weights_max_node_ = graph_builder_->AddConstNodeWithData(
244       quant_bound_shape.data(), reinterpret_cast<char*>(&weights_max),
245       sizeof(weights_max));
246 
247   return kTfLiteOk;
248 }
249 
InitializeBiasNodes(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)250 TfLiteStatus Conv2dOpBuilder::InitializeBiasNodes(const TfLiteIntArray* inputs,
251                                                   const TfLiteIntArray* outputs,
252                                                   TfLiteContext* context) {
253   const std::vector<int> quant_bound_shape = {1, 1, 1, 1};
254 
255   const auto& bias_tensor = context->tensors[inputs->data[2]];
256 
257   float bias_min = 0;
258   float bias_max = 0;
259   if (per_channel_quant_.channel_scales_node != nullptr) {
260     ProcessPerChannelQuantizedBias(inputs, outputs, context, &bias_min,
261                                    &bias_max, graph_builder_,
262                                    &per_channel_quant_);
263   } else {
264     auto* bias_data_node =
265         graph_builder_->AddConstNodeWithData(inputs->data[2], bias_tensor);
266     graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0,
267                                     /*overwrite=*/true);
268     TF_LITE_ENSURE_STATUS(
269         ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max));
270   }
271 
272   bias_min_node_ = graph_builder_->AddConstNodeWithData(
273       quant_bound_shape.data(), reinterpret_cast<char*>(&bias_min),
274       sizeof(bias_min));
275   bias_max_node_ = graph_builder_->AddConstNodeWithData(
276       quant_bound_shape.data(), reinterpret_cast<char*>(&bias_max),
277       sizeof(bias_max));
278 
279   return kTfLiteOk;
280 }
281 
282 }  // namespace hexagon
283 }  // namespace delegates
284 }  // namespace tflite
285