1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/hexagon/builders/op_builder.h"
16
17 #include "hexagon/hexagon_nn_ops.h"
18 #include "tensorflow/lite/builtin_ops.h"
19 #include "tensorflow/lite/c/common.h"
20 #include "tensorflow/lite/delegates/hexagon/builders/op_factory.h"
21 #include <farmhash.h>
22
23 namespace tflite {
24 namespace delegates {
25 namespace hexagon {
26 namespace {
27 // Farmhash Fingerprint
CombineFingerprints(uint64_t l,uint64_t h)28 inline uint64_t CombineFingerprints(uint64_t l, uint64_t h) {
29 // Murmur-inspired hashing.
30 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
31 uint64_t a = (l ^ h) * kMul;
32 a ^= (a >> 47);
33 uint64_t b = (h ^ a) * kMul;
34 b ^= (b >> 44);
35 b *= kMul;
36 b ^= (b >> 41);
37 b *= kMul;
38 return b;
39 }
40
ComputeHash(const int shape[],const char * data,const int data_len)41 inline uint64_t ComputeHash(const int shape[], const char* data,
42 const int data_len) {
43 return CombineFingerprints(
44 ::util::Fingerprint64(data, data_len),
45 ::util::Fingerprint64(reinterpret_cast<const char*>(shape),
46 sizeof(shape[0]) * 4));
47 }
48
ComputeHash(const TfLiteTensor & tensor,const int shape[],int int8_to_uint8)49 inline uint64_t ComputeHash(const TfLiteTensor& tensor, const int shape[],
50 int int8_to_uint8) {
51 auto data_hash = ComputeHash(shape, tensor.data.raw_const, tensor.bytes);
52 auto int8_to_uint8_hash = ::util::Fingerprint64(
53 reinterpret_cast<char*>(&int8_to_uint8), sizeof(int8_to_uint8));
54 return CombineFingerprints(data_hash, int8_to_uint8_hash);
55 }
56
GetElementSize(TfLiteType type)57 int GetElementSize(TfLiteType type) {
58 switch (type) {
59 case kTfLiteFloat32:
60 return sizeof(float);
61 case kTfLiteBool:
62 return sizeof(bool);
63 case kTfLiteInt32:
64 return sizeof(int32_t);
65 case kTfLiteInt8:
66 return sizeof(int8_t);
67 case kTfLiteUInt8:
68 return sizeof(uint8_t);
69 default:
70 return sizeof(int8_t);
71 }
72 }
73 } // namespace
74
CreateOpBuilderFromTfLiteOp(int op_type,TfLiteNode * node)75 OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type,
76 TfLiteNode* node) {
77 switch (op_type) {
78 case kTfLiteBuiltinAdd:
79 return CreateArithmeticBuilder(this, OP_QuantizedAdd_8p8to8);
80 case kTfLiteBuiltinArgMax:
81 return CreateArgMinMaxOpBuilder(this, OP_ArgMax_8toInt32);
82 case kTfLiteBuiltinArgMin:
83 return CreateArgMinMaxOpBuilder(this, OP_ArgMin_8);
84 case kTfLiteBuiltinMul:
85 // The 32-bit version of Mul is more accurate, and robust to disparities
86 // in input/output ranges.
87 return CreateArithmeticBuilder(this, OP_QuantizedMul_8x8to32);
88 case kTfLiteBuiltinSub:
89 return CreateArithmeticBuilder(this, OP_QuantizedSub_8p8to8);
90 case kTfLiteBuiltinMean:
91 return CreateReduceBuilder(this, OP_QuantizedMean_8);
92 case kTfLiteBuiltinSum:
93 return CreateReduceBuilder(this, OP_QuantizedSum_8to32);
94 case kTfLiteBuiltinPad:
95 return CreatePadBuilder(this, OP_QuantizedPad_8);
96 case kTfLiteBuiltinMirrorPad:
97 return CreateMirrorPadBuilder(this, OP_MirrorPad_8);
98 case kTfLiteBuiltinFullyConnected: {
99 const auto& weights_tensor = context_->tensors[node->inputs->data[1]];
100 if (weights_tensor.allocation_type == kTfLiteMmapRo)
101 return CreateMatMulWithConstWeightsOpBuilder(
102 this, OP_QuantizedMatMul_8x8to32);
103 else
104 return CreateMatMulOpBuilder(this, OP_Transpose_8);
105 }
106 case kTfLiteBuiltinAveragePool2d:
107 return CreatePool2DBuilder(this, OP_QuantizedAvgPool_8);
108 case kTfLiteBuiltinMaxPool2d:
109 return CreatePool2DBuilder(this, OP_QuantizedMaxPool_8);
110 case kTfLiteBuiltinConcatenation:
111 return CreateConcatBuilder(this, OP_QuantizedConcat_8);
112 case kTfLiteBuiltinConv2d:
113 return CreateConv2DBuilder(this, OP_Supernode_8x8p32to8);
114 case kTfLiteBuiltinTransposeConv:
115 return CreateTransposeConv2DBuilder(
116 this, OP_QuantizedTransposeConv2d_8x8p32to8);
117 case kTfLiteBuiltinDepthwiseConv2d:
118 return CreateConv2DBuilder(this, OP_DepthwiseSupernode_8x8p32to8);
119 case kTfLiteBuiltinReshape:
120 return CreateReshapeBuilder(this, OP_Reshape);
121 case kTfLiteBuiltinSoftmax:
122 return CreateSoftmaxBuilder(this, OP_QuantizedSoftmax_8);
123 case kTfLiteBuiltinResizeNearestNeighbor:
124 return CreateResizeNearestNeighborBuilder(this,
125 OP_ResizeNearestNeighbor_8);
126 case kTfLiteBuiltinL2Normalization:
127 return CreateL2NormalizationBuilder(this, OP_L2Normalize_8);
128 case kTfLiteBuiltinRelu:
129 return CreateActivationBuilder(this, OP_QuantizedRelu_8);
130 case kTfLiteBuiltinRelu6:
131 return CreateActivationBuilder(this, OP_QuantizedReluX_8);
132 case kTfLiteBuiltinTanh:
133 return CreateActivationBuilder(this, OP_QuantizedTanh_8);
134 case kTfLiteBuiltinLogistic:
135 return CreateActivationBuilder(this, OP_QuantizedSigmoid_8);
136 case kTfLiteBuiltinSplit:
137 return CreateSplitBuilder(this, OP_QuantizedSplit_8);
138 case kTfLiteBuiltinResizeBilinear:
139 return CreateResizeBilinearOpBuilder(this, OP_QuantizedResizeBilinear_8);
140 case kTfLiteBuiltinNeg:
141 return CreateNegOpBuilder(this, OP_QuantizedNeg_8);
142 case kTfLiteBuiltinTranspose:
143 return CreateTransposeBuilder(this, OP_Transpose_8);
144 case kTfLiteBuiltinSpaceToDepth:
145 return CreateSpaceToDepthBuilder(this, OP_SpaceToDepth_8);
146 case kTfLiteBuiltinDepthToSpace:
147 return CreateSpaceToDepthBuilder(this, OP_DepthToSpace_8);
148 case kTfLiteBuiltinQuantize:
149 return CreateQuantizeBuilder(this, OP_Requantize_8to8);
150 case kTfLiteBuiltinHardSwish:
151 return CreateHardSwishBuilder(this, OP_QuantizedHardSwish_8);
152 case kTfLiteBuiltinMinimum:
153 return CreateMinMaxBuilder(this, OP_QuantizedMinimum_8);
154 case kTfLiteBuiltinMaximum:
155 return CreateMinMaxBuilder(this, OP_QuantizedMaximum_8);
156 case kTfLiteBuiltinSlice:
157 return CreateSliceOpBuilder(this, OP_QuantizedSlice_8);
158 case kTfLiteBuiltinPack:
159 return CreatePackBuilder(this, OP_QuantizedPack_8);
160 case kTfLiteBuiltinStridedSlice:
161 return CreateStridedSliceBuilder(this, OP_QuantizedStridedSlice_8);
162 case kTfLiteBuiltinSquaredDifference:
163 return CreateSquaredDifferenceOpBuilder(this, OP_QuantizedSub_8p8to8);
164 case kTfLiteBuiltinRsqrt:
165 return CreateRSqrtOpBuilder(this, OP_QuantizedSqrt_8);
166 default:
167 context_->ReportError(context_, "Op not supported: %d", op_type);
168 return nullptr;
169 }
170 }
171
LookupConstData(uint64_t cache_key)172 OpBuilder* GraphBuilder::LookupConstData(uint64_t cache_key) {
173 auto lookup_result = cache_.find(cache_key);
174 if (lookup_result != cache_.end()) return lookup_result->second;
175 return nullptr;
176 }
177
AddToCache(uint64_t cache_key,OpBuilder * value)178 void GraphBuilder::AddToCache(uint64_t cache_key, OpBuilder* value) {
179 cache_[cache_key] = value;
180 }
181
AddConstNodeWithData(const int shape[],char * data,int data_size)182 OpBuilder* GraphBuilder::AddConstNodeWithData(const int shape[], char* data,
183 int data_size) {
184 auto cache_key = ComputeHash(shape, data, data_size);
185 if (auto lookup_result = LookupConstData(cache_key)) return lookup_result;
186 builders_.emplace_back(new OpBuilder(this, OP_Const));
187 builders_.back()->SetConstNode();
188 builders_.back()->SetNodeId(builders_.size());
189 int error = hexagon_nn_->hexagon_nn_append_const_node(
190 graph_id_, builders_.size(), shape[0], shape[1], shape[2], shape[3],
191 reinterpret_cast<const uint8_t*>(data), data_size);
192 if (error != 0) {
193 TF_LITE_KERNEL_LOG(context_, "Error adding const node with shape id: %d",
194 static_cast<int>(builders_.size()));
195 return nullptr;
196 }
197 AddToCache(cache_key, builders_.back().get());
198 return builders_.back().get();
199 }
200
AddConstNodeWithData(int tensor_id,const TfLiteTensor & tensor,bool int8_to_uint8)201 OpBuilder* GraphBuilder::AddConstNodeWithData(int tensor_id,
202 const TfLiteTensor& tensor,
203 bool int8_to_uint8) {
204 // Fetch shape of tensor and pad 1's so it is always 4D.
205 int batch_size, height_size, width_size, depth_size;
206 GetDims(&batch_size, &height_size, &width_size, &depth_size, tensor.dims);
207 const int shape[] = {batch_size, height_size, width_size, depth_size};
208
209 auto cache_key = ComputeHash(tensor, shape, int8_to_uint8 ? 1 : 0);
210 if (auto lookup_result = LookupConstData(cache_key)) {
211 // If tensor is cached but with no id, that can happen when the same
212 // data is added from a constant value (not tensor). We can cache the data
213 // and reuse it.
214 // We assign the tensor to this cached const node before returning.
215 if (!HasTensor(tensor_id))
216 AddTensorWithID(tensor_id, lookup_result->GetID(), 0);
217 return lookup_result;
218 }
219 builders_.emplace_back(new OpBuilder(this, OP_Const));
220 const int node_id = builders_.size();
221 builders_.back()->SetConstNode();
222 builders_.back()->SetNodeId(node_id);
223 int error = hexagon_nn_->hexagon_nn_append_const_node(
224 graph_id_, node_id, batch_size, height_size, width_size, depth_size,
225 reinterpret_cast<const uint8_t*>(tensor.data.raw), tensor.bytes);
226 if (error > 0) {
227 context_->ReportError(
228 context_, "Failed to add const node for tensor with id: %d", tensor_id);
229 return nullptr;
230 }
231 AddTensorWithID(tensor_id, node_id, 0);
232 // We need to return the builder with result, so we can't rely
233 // on builders_.back() as it can change while casting, so we hold pointer
234 // and update with value from casting if needed.
235 OpBuilder* result_builder = builders_.back().get();
236 // Cast int8 to uint8 if requested.
237 // This will add cast op to uint8 and update tensor map to point
238 // to the casted tensor.
239 if (int8_to_uint8 && tensor.type == kTfLiteInt8) {
240 AddCastOp(context_, OP_Quantized_CastInt8ToUInt8, tensor_id,
241 &result_builder);
242 }
243 AddToCache(cache_key, result_builder);
244 return result_builder;
245 }
246
247 // TODO(b/154604279): Support these casting ops in Hexagon op profiling (which
248 // seems to key tensors on a single op, which may not be the case now).
AddCastOp(TfLiteContext * context,int op_type,int tensor_id,OpBuilder ** cast_op_builder)249 TfLiteStatus GraphBuilder::AddCastOp(TfLiteContext* context, int op_type,
250 int tensor_id,
251 OpBuilder** cast_op_builder) {
252 // Create a new OpBuilder for casting the tensor.
253 OpBuilder* cast_builder = CreateCastBuilder(this, op_type);
254 builders_.emplace_back(cast_builder);
255 cast_builder->SetNodeId(builders_.size());
256 // We cast the tensor in-place, so there is only 1 input & output which is the
257 // same.
258 auto* tensor_data = TfLiteIntArrayCreate(1);
259 tensor_data->data[0] = tensor_id;
260
261 TF_LITE_ENSURE_STATUS(
262 cast_builder->PopulateSubGraph(tensor_data, tensor_data, context));
263 TF_LITE_ENSURE_STATUS(cast_builder->RegisterOutputs(tensor_data, context));
264
265 TfLiteIntArrayFree(tensor_data);
266 if (cast_op_builder != nullptr) *cast_op_builder = cast_builder;
267 return kTfLiteOk;
268 }
269
AddInputTensors(const TfLiteIntArray * input_tensors,TfLiteContext * context)270 TfLiteStatus GraphBuilder::AddInputTensors(const TfLiteIntArray* input_tensors,
271 TfLiteContext* context) {
272 auto* input_op = AddNode();
273 input_op->SetOpType(OP_INPUT);
274
275 // We need to track num_inputs since not all input_tensors are actual input
276 // data. Some are constants.
277 int num_inputs = 0;
278 for (int i = 0; i < input_tensors->size; ++i) {
279 const int tensor_id = input_tensors->data[i];
280 const auto& tensor = context->tensors[tensor_id];
281 if (tensor.allocation_type == kTfLiteMmapRo) continue;
282 input_op->AddOutput(tensor.dims, GetElementSize(tensor.type));
283 AddTensorWithID(tensor_id, input_op->GetID(), num_inputs);
284 // If tensor is of type int8, add an op to cast it to uint8.
285 if (tensor.type == kTfLiteInt8) {
286 TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastInt8ToUInt8,
287 tensor_id, /*cast_op_builder=*/nullptr));
288 }
289 ++num_inputs;
290 }
291
292 return kTfLiteOk;
293 }
294
AddOutputTensors(const TfLiteIntArray * output_tensors,TfLiteContext * context)295 TfLiteStatus GraphBuilder::AddOutputTensors(
296 const TfLiteIntArray* output_tensors, TfLiteContext* context) {
297 std::vector<OpBuilder::TensorID> hexagon_output_ids;
298 hexagon_output_ids.reserve(output_tensors->size);
299
300 for (int i = 0; i < output_tensors->size; ++i) {
301 const int tensor_id = output_tensors->data[i];
302 const auto& tensor = context->tensors[tensor_id];
303 // If tensor is of type int8, add an op to cast it to uint8.
304 if (tensor.type == kTfLiteInt8) {
305 TF_LITE_ENSURE_STATUS(AddCastOp(context, OP_Quantized_CastUInt8ToInt8,
306 tensor_id, /*cast_op_builder=*/nullptr));
307 }
308 hexagon_output_ids.push_back(GetHexagonTensorId(tensor_id));
309 }
310
311 // Add Hexagon OUTPUT op.
312 auto* output_op = AddNode();
313 output_op->SetOpType(OP_OUTPUT);
314 for (auto hexagon_output : hexagon_output_ids) {
315 output_op->AddInput(hexagon_output);
316 }
317
318 return kTfLiteOk;
319 }
320
AddOutput(const TfLiteIntArray * dims,int element_size)321 OpBuilder::TensorID OpBuilder::AddOutput(const TfLiteIntArray* dims,
322 int element_size) {
323 op_node_.outputs.push_back(hexagon_nn_output());
324 op_node_.outputs.back().elementsize = element_size;
325 op_node_.outputs.back().rank = 4;
326 // TODO(karimnosseir): What is a good to estimate the max size ?
327 int batch_size, height_size, width_size, depth_size;
328 GetDims(&batch_size, &height_size, &width_size, &depth_size, dims);
329 auto& max_sizes = op_node_.outputs.back().max_sizes;
330 if (graph_builder_->GraphHasDynamicBatch()) {
331 max_sizes[0] = graph_builder_->GetMaxBatchSize();
332 } else {
333 max_sizes[0] = batch_size;
334 }
335 max_sizes[1] = height_size;
336 max_sizes[2] = width_size;
337 max_sizes[3] = depth_size;
338 return TensorID(GetID(), op_node_.outputs.size() - 1);
339 }
340
AddOutput(int elementsize,int rank,const int * max_sizes_vect)341 OpBuilder::TensorID OpBuilder::AddOutput(int elementsize, int rank,
342 const int* max_sizes_vect) {
343 op_node_.outputs.push_back(hexagon_nn_output());
344 op_node_.outputs.back().elementsize = elementsize;
345 op_node_.outputs.back().rank = rank;
346 auto& max_sizes = op_node_.outputs.back().max_sizes;
347 for (int i = 0; i < rank; ++i) {
348 max_sizes[i] = max_sizes_vect[i];
349 }
350 if (graph_builder_->GraphHasDynamicBatch()) {
351 max_sizes[0] = graph_builder_->GetMaxBatchSize();
352 }
353 return TensorID(GetID(), op_node_.outputs.size() - 1);
354 }
355
AddOutput(int elementsize,int rank,const std::vector<int> & max_sizes_vect)356 OpBuilder::TensorID OpBuilder::AddOutput(
357 int elementsize, int rank, const std::vector<int>& max_sizes_vect) {
358 return AddOutput(elementsize, rank, max_sizes_vect.data());
359 }
360
Build()361 const OpNode* OpBuilder::Build() {
362 for (const auto& id : input_ids_) {
363 op_node_.inputs.push_back(hexagon_nn_input());
364 op_node_.inputs.back().src_id = id.first;
365 op_node_.inputs.back().output_idx = id.second;
366 }
367 return &op_node_;
368 }
369
ComputeAndAddMinAndMax(TfLiteContext * context,const TfLiteTensor & tensor)370 TfLiteStatus OpBuilder::ComputeAndAddMinAndMax(TfLiteContext* context,
371 const TfLiteTensor& tensor) {
372 float tensor_min, tensor_max;
373 TF_LITE_ENSURE_STATUS(
374 ComputeMinAndMaxQuantValues(tensor, &tensor_min, &tensor_max));
375 auto* min_const_node = graph_builder_->AddConstNodeWithData(
376 kScalarShape, reinterpret_cast<char*>(&tensor_min), sizeof(tensor_min));
377 auto* max_const_node = graph_builder_->AddConstNodeWithData(
378 kScalarShape, reinterpret_cast<char*>(&tensor_max), sizeof(tensor_max));
379 AddInput(TensorID(min_const_node->GetID(), 0));
380 AddInput(TensorID(max_const_node->GetID(), 0));
381
382 return kTfLiteOk;
383 }
384
385 // Static
386 constexpr int OpBuilder::kScalarShape[];
387
AddNode(int tflite_node_index)388 OpBuilder* GraphBuilder::AddNode(int tflite_node_index) {
389 OpBuilder* op = new OpBuilder(this, OP_Nop);
390 builders_.emplace_back(op);
391 op->SetNodeId(builders_.size());
392 op->SetTFLiteNodeId(tflite_node_index);
393 return op;
394 }
395
AddNodeFromTfLiteOp(int op_type,TfLiteNode * node,int tflite_node_index)396 OpBuilder* GraphBuilder::AddNodeFromTfLiteOp(int op_type, TfLiteNode* node,
397 int tflite_node_index) {
398 OpBuilder* op = CreateOpBuilderFromTfLiteOp(op_type, node);
399 builders_.emplace_back(op);
400 op->SetNodeId(builders_.size());
401 op->SetTFLiteNodeId(tflite_node_index);
402 op->SetBuiltinData(node->builtin_data);
403 op->SetTfLiteNode(node);
404 return op;
405 }
406
AddBatchSeqConfig(int max_size_for_batch,TfLiteIntArray * input_batch_dimensions,TfLiteIntArray * output_batch_dimensions)407 void GraphBuilder::AddBatchSeqConfig(int max_size_for_batch,
408 TfLiteIntArray* input_batch_dimensions,
409 TfLiteIntArray* output_batch_dimensions) {
410 OpBuilder* batch_seq_node =
411 CreateBatchSeqBuilder(this, OP_BatchSeqConfig, max_size_for_batch,
412 input_batch_dimensions, output_batch_dimensions);
413 builders_.emplace_back(batch_seq_node);
414 batch_seq_node->SetNodeId(builders_.size());
415 batch_seq_node->PopulateSubGraph(nullptr, nullptr, nullptr);
416 max_size_for_batch_ = max_size_for_batch;
417 }
418
419 } // namespace hexagon
420 } // namespace delegates
421 } // namespace tflite
422