1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <stdint.h>
16
17 #include <algorithm>
18 #include <cmath>
19 #include <limits>
20 #include <vector>
21
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/hexagon/builders/conv_2d_builder.h"
25 #include "tensorflow/lite/delegates/hexagon/hexagon_nn/hexagon_nn.h"
26 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
27 #include "tensorflow/lite/kernels/kernel_util.h"
28
29 namespace tflite {
30 namespace delegates {
31 namespace hexagon {
32 namespace {
33
34 constexpr uint8_t k8BitSignFlipConstant = 0x80;
35 // 1/1024 ~ 0.0009766 is a restriction set by Hexagon's kernels.
36 // TODO(b/151103818): Figure out a way to retrieve this constant reliably.
37 constexpr float kHexagonMinRelativeScale = 0.0009766f;
38
39 } // namespace
40
ProcessPerChannelQuantizedWeights(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context,float * weights_min,float * weights_max,GraphBuilder * graph_builder,PerChannelQuantData * per_channel_quant)41 TfLiteStatus ProcessPerChannelQuantizedWeights(
42 const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
43 TfLiteContext* context, float* weights_min, float* weights_max,
44 GraphBuilder* graph_builder, PerChannelQuantData* per_channel_quant) {
45 if (!per_channel_quant) return kTfLiteError;
46 const auto& weights_tensor = context->tensors[inputs->data[1]];
47 TfLiteAffineQuantization* weights_quant_params =
48 reinterpret_cast<TfLiteAffineQuantization*>(
49 weights_tensor.quantization.params);
50
51 // Retrieve channel scales.
52 per_channel_quant->num_scale_values = weights_quant_params->scale->size;
53 // Normalize the scales as expected by Hexagon.
54 per_channel_quant->scales_data = weights_quant_params->scale->data;
55 std::vector<float> normalized_scales;
56 normalized_scales.reserve(per_channel_quant->num_scale_values);
57 float scale_max = 0.0;
58 for (int i = 0; i < per_channel_quant->num_scale_values; ++i) {
59 normalized_scales.push_back(per_channel_quant->scales_data[i]);
60 if (per_channel_quant->scales_data[i] > scale_max) {
61 scale_max = per_channel_quant->scales_data[i];
62 }
63 }
64 if (scale_max == 0.0) {
65 TF_LITE_KERNEL_LOG(context, "Scale max is zero for: %s",
66 weights_tensor.name);
67 return kTfLiteError;
68 }
69 for (int i = 0; i < per_channel_quant->num_scale_values; ++i) {
70 normalized_scales[i] =
71 std::max(normalized_scales[i] / scale_max, kHexagonMinRelativeScale);
72 }
73 // Add node for channel scales data.
74 const std::vector<int> scales_shape = {1, 1, 1,
75 per_channel_quant->num_scale_values};
76 per_channel_quant->channel_scales_node = graph_builder->AddConstNodeWithData(
77 scales_shape.data(), reinterpret_cast<char*>(normalized_scales.data()),
78 normalized_scales.size() * sizeof(normalized_scales[0]));
79 *weights_min = -128 * scale_max;
80 *weights_max = 127 * scale_max;
81 return kTfLiteOk;
82 }
83
ProcessPerChannelQuantizedBias(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context,float * bias_min,float * bias_max,GraphBuilder * graph_builder,PerChannelQuantData * per_channel_quant,OpBuilder ** bias_const_node)84 TfLiteStatus ProcessPerChannelQuantizedBias(
85 const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
86 TfLiteContext* context, float* bias_min, float* bias_max,
87 GraphBuilder* graph_builder, PerChannelQuantData* per_channel_quant,
88 OpBuilder** bias_const_node) {
89 const auto& bias_tensor = context->tensors[inputs->data[2]];
90
91 const TfLiteAffineQuantization* input_quant_params =
92 static_cast<const TfLiteAffineQuantization*>(
93 context->tensors[inputs->data[0]].quantization.params);
94 const float input_scale = input_quant_params->scale->data[0];
95 // Now dequantize bias values to float first, to adjust for the
96 // normalization of channel scales.
97 auto* bias_data = bias_tensor.data.i32;
98 const int bias_size = NumElements(&bias_tensor);
99 if (bias_size != per_channel_quant->num_scale_values) {
100 TF_LITE_KERNEL_LOG(
101 context, "Bias/channel scales number mismatch for bias tensor: %s",
102 bias_tensor.name);
103 return kTfLiteError;
104 }
105 std::vector<float> dequantized_bias;
106 dequantized_bias.reserve(bias_size);
107 for (int i = 0; i < bias_size; ++i) {
108 const float dequantized_value =
109 bias_data[i] * input_scale * per_channel_quant->scales_data[i];
110 const float abs_dequantized_value = std::abs(dequantized_value);
111 if (abs_dequantized_value > *bias_max) {
112 *bias_max = abs_dequantized_value;
113 }
114 dequantized_bias.push_back(dequantized_value);
115 }
116 *bias_max = *bias_max * 8;
117 *bias_min = -1 * *bias_max;
118 // Now requantize the bias values to the new min/max values.
119 std::vector<int> preprocessed_bias_data;
120 preprocessed_bias_data.reserve(per_channel_quant->num_scale_values);
121 for (int i = 0; i < bias_size; ++i) {
122 preprocessed_bias_data.push_back(static_cast<int>(
123 std::round(std::pow(2, 31) * (dequantized_bias[i] / *bias_max))));
124 }
125 // Add nodes for bias.
126 const std::vector<int> bias_shape = {1, 1, 1, bias_size};
127 auto* bias_data_node = graph_builder->AddConstNodeWithData(
128 bias_shape.data(), reinterpret_cast<char*>(preprocessed_bias_data.data()),
129 preprocessed_bias_data.size() * sizeof(preprocessed_bias_data[0]));
130 if (bias_const_node) {
131 *bias_const_node = bias_data_node;
132 }
133 graph_builder->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0,
134 /*overwrite=*/true);
135 return kTfLiteOk;
136 }
137
InitializeWeightsNodes(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context,const int input_depth)138 TfLiteStatus Conv2dOpBuilder::InitializeWeightsNodes(
139 const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
140 TfLiteContext* context, const int input_depth) {
141 const std::vector<int> quant_bound_shape = {1, 1, 1, 1};
142
143 const auto& weights_tensor = context->tensors[inputs->data[1]];
144 if (weights_tensor.allocation_type != kTfLiteMmapRo) {
145 TF_LITE_KERNEL_LOG(
146 context, "Weights tensor doesn't have correct allocation type: %s",
147 weights_tensor.name);
148 return kTfLiteError;
149 }
150 int weights_batch_size, weights_height_size, weights_width_size,
151 weights_depth_size;
152 // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC.
153 // Transpose NHWC -> HWCN
154 GetDims(&weights_batch_size, &weights_height_size, &weights_width_size,
155 &weights_depth_size, weights_tensor.dims);
156
157 // Weights tensor could be int8 even for per-tensor quantization.
158 // Therefore, we look at the number of scale values to check if it is
159 // per-channel quantized.
160 TfLiteAffineQuantization* weights_quant_params =
161 reinterpret_cast<TfLiteAffineQuantization*>(
162 weights_tensor.quantization.params);
163 const bool is_per_channel_quant = weights_quant_params->scale->size > 1;
164
165 // WEIGHTS DATA.
166 OpBuilder* weights_data_node = nullptr;
167 if (op_node_.op_type == OP_Supernode_8x8p32to8) {
168 // Hexagon lib expects the weight tensor in HWCN, TFLite uses NHWC.
169 // Transpose NHWC -> HWCN
170 weight_shape_ = {weights_height_size, weights_width_size,
171 weights_depth_size, weights_batch_size};
172 RuntimeShape nhwc_shape({weights_batch_size, weights_height_size,
173 weights_width_size, weights_depth_size});
174 RuntimeShape hwcn_shape({weights_height_size, weights_width_size,
175 weights_depth_size, weights_batch_size});
176 std::vector<uint8_t> hwcn(NumElements(&weights_tensor));
177 TransposeParams transpose_params;
178 transpose_params.perm_count = 4;
179 transpose_params.perm[0] = 1;
180 transpose_params.perm[1] = 2;
181 transpose_params.perm[2] = 3;
182 transpose_params.perm[3] = 0;
183 // TODO(b/151103818): Try merging Transpose & bit flip.
184 if (weights_tensor.type == kTfLiteInt8) {
185 optimized_ops::Transpose<int8_t>(transpose_params, nhwc_shape,
186 weights_tensor.data.int8, hwcn_shape,
187 reinterpret_cast<int8_t*>(hwcn.data()));
188 // Flip bits on the weight values so that the int8 values are treated
189 // as uint8.
190 for (int i = 0; i < hwcn.size(); ++i) {
191 hwcn[i] = hwcn[i] ^ k8BitSignFlipConstant;
192 }
193 } else {
194 optimized_ops::Transpose<uint8_t>(transpose_params, nhwc_shape,
195 weights_tensor.data.uint8, hwcn_shape,
196 hwcn.data());
197 }
198 weights_data_node = graph_builder_->AddConstNodeWithData(
199 weight_shape_.data(), reinterpret_cast<char*>(hwcn.data()),
200 hwcn.size() * sizeof(hwcn[0]));
201 } else if (op_node_.op_type == OP_DepthwiseSupernode_8x8p32to8) {
202 // Hexagon treats depthwise conv like tf.nn.depthwise_conv2d, where the
203 // expected filter shape is [fh,fw,din,dmul].
204 // The data itself will remain the same, since TFLite's representation is
205 // just a 'flattening' of Hexagon's version.
206 const int channel_multiplier = weights_depth_size / input_depth;
207 weight_shape_ = {weights_height_size, weights_width_size, input_depth,
208 channel_multiplier};
209
210 if (weights_tensor.type == kTfLiteInt8) {
211 // Flip bits on the weight values so that the int8 values are treated
212 // as uint8.
213 std::vector<uint8_t> converted_data(NumElements(&weights_tensor));
214 for (int i = 0; i < converted_data.size(); ++i) {
215 converted_data[i] = weights_tensor.data.int8[i] ^ k8BitSignFlipConstant;
216 }
217 weights_data_node = graph_builder_->AddConstNodeWithData(
218 weight_shape_.data(), reinterpret_cast<char*>(converted_data.data()),
219 converted_data.size() * sizeof(converted_data[0]));
220 } else {
221 weights_data_node = graph_builder_->AddConstNodeWithData(
222 weight_shape_.data(), weights_tensor.data.raw,
223 NumElements(&weights_tensor) * sizeof(weights_tensor.data.uint8[0]));
224 }
225 }
226 graph_builder_->AddTensorWithID(inputs->data[1], weights_data_node->GetID(),
227 0, /*overwrite=*/true);
228
229 // WEIGHTS QUANTIZATION.
230 float weights_min = 0;
231 float weights_max = 0;
232 if (is_per_channel_quant) {
233 ProcessPerChannelQuantizedWeights(inputs, outputs, context, &weights_min,
234 &weights_max, graph_builder_,
235 &per_channel_quant_);
236 } else {
237 TF_LITE_ENSURE_STATUS(ComputeMinAndMaxQuantValues(
238 weights_tensor, &weights_min, &weights_max));
239 }
240 weights_min_node_ = graph_builder_->AddConstNodeWithData(
241 quant_bound_shape.data(), reinterpret_cast<char*>(&weights_min),
242 sizeof(weights_min));
243 weights_max_node_ = graph_builder_->AddConstNodeWithData(
244 quant_bound_shape.data(), reinterpret_cast<char*>(&weights_max),
245 sizeof(weights_max));
246
247 return kTfLiteOk;
248 }
249
InitializeBiasNodes(const TfLiteIntArray * inputs,const TfLiteIntArray * outputs,TfLiteContext * context)250 TfLiteStatus Conv2dOpBuilder::InitializeBiasNodes(const TfLiteIntArray* inputs,
251 const TfLiteIntArray* outputs,
252 TfLiteContext* context) {
253 const std::vector<int> quant_bound_shape = {1, 1, 1, 1};
254
255 const auto& bias_tensor = context->tensors[inputs->data[2]];
256
257 float bias_min = 0;
258 float bias_max = 0;
259 if (per_channel_quant_.channel_scales_node != nullptr) {
260 ProcessPerChannelQuantizedBias(inputs, outputs, context, &bias_min,
261 &bias_max, graph_builder_,
262 &per_channel_quant_);
263 } else {
264 auto* bias_data_node =
265 graph_builder_->AddConstNodeWithData(inputs->data[2], bias_tensor);
266 graph_builder_->AddTensorWithID(inputs->data[2], bias_data_node->GetID(), 0,
267 /*overwrite=*/true);
268 TF_LITE_ENSURE_STATUS(
269 ComputeMinAndMaxQuantValues(bias_tensor, &bias_min, &bias_max));
270 }
271
272 bias_min_node_ = graph_builder_->AddConstNodeWithData(
273 quant_bound_shape.data(), reinterpret_cast<char*>(&bias_min),
274 sizeof(bias_min));
275 bias_max_node_ = graph_builder_->AddConstNodeWithData(
276 quant_bound_shape.data(), reinterpret_cast<char*>(&bias_max),
277 sizeof(bias_max));
278
279 return kTfLiteOk;
280 }
281
282 } // namespace hexagon
283 } // namespace delegates
284 } // namespace tflite
285