1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
16
17 #include <algorithm>
18 #include <cstdarg>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstdio>
22 #include <cstring>
23 #include <functional>
24 #include <initializer_list>
25 #include <iostream>
26 #include <iterator>
27 #include <limits>
28 #include <map>
29 #include <memory>
30 #include <string>
31 #include <tuple>
32 #include <utility>
33 #include <vector>
34
35 #include "tensorflow/lite/c/c_api_types.h"
36 #include "tensorflow/lite/delegates/serialization.h"
37 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
38 #include "tensorflow/lite/nnapi/sl/public/NeuralNetworksSupportLibraryImpl.h"
39
40 #ifdef __ANDROID__
41 #include <sys/system_properties.h>
42 #endif
43
44 #if defined __ANDROID__ || defined __unix__
45 #define TFLITE_NNAPI_ALLOW_MMAP_SHARING
46 #include <sys/mman.h>
47 #include <unistd.h>
48 #endif
49
50 #include "fp16.h"
51 #include "tensorflow/lite/allocation.h"
52 #include "tensorflow/lite/builtin_op_data.h"
53 #include "tensorflow/lite/builtin_ops.h"
54 #include "tensorflow/lite/c/builtin_op_data.h"
55 #include "tensorflow/lite/c/common.h"
56 #include "tensorflow/lite/context_util.h"
57 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
58 #include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h"
59 #include "tensorflow/lite/delegates/utils.h"
60 #include "tensorflow/lite/kernels/kernel_util.h"
61 #include "tensorflow/lite/minimal_logging.h"
62 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
63 #include "tensorflow/lite/nnapi/nnapi_util.h"
64 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
65 #include "tensorflow/lite/util.h"
66 #ifdef NNAPI_VERBOSE_VALIDATION
67 #include "tensorflow/lite/schema/schema_generated.h"
68 #endif
69 #include "utils/hash/farmhash.h"
70
71 namespace tflite {
72 namespace {
73
74 static const char kNnapiId[] = "nnapi_";
75
76 // Returns a string ID unique to what accelerator is run by NNAPI, based on
77 // user params. Assumes that the default accelerator is same across runs.
78 // Used for caching nodes to be delegated for a model.
NnApiBackendId(const StatefulNnApiDelegate::Options & delegate_options)79 std::string NnApiBackendId(
80 const StatefulNnApiDelegate::Options& delegate_options) {
81 std::string delegate_id = kNnapiId;
82 if (delegate_options.accelerator_name) {
83 delegate_id += delegate_options.accelerator_name;
84 }
85 return delegate_id;
86 }
87
88 // Returns the enum name corresponding to the given error code if the given
89 // value corresponds to an of the error codes in the enumeration above or
90 // an message with the unknown code.
91 // LINT.IfChange(NnApiErrorDescription)
NnApiErrorDescription(int error_code)92 std::string NnApiErrorDescription(int error_code) {
93 switch (error_code) {
94 case ANEURALNETWORKS_NO_ERROR:
95 return "ANEURALNETWORKS_NO_ERROR";
96 case ANEURALNETWORKS_OUT_OF_MEMORY:
97 return "ANEURALNETWORKS_OUT_OF_MEMORY";
98 case ANEURALNETWORKS_INCOMPLETE:
99 return "ANEURALNETWORKS_INCOMPLETE";
100 case ANEURALNETWORKS_UNEXPECTED_NULL:
101 return "ANEURALNETWORKS_UNEXPECTED_NULL";
102 case ANEURALNETWORKS_BAD_DATA:
103 return "ANEURALNETWORKS_BAD_DATA";
104 case ANEURALNETWORKS_OP_FAILED:
105 return "ANEURALNETWORKS_OP_FAILED";
106 case ANEURALNETWORKS_BAD_STATE:
107 return "ANEURALNETWORKS_BAD_STATE";
108 case ANEURALNETWORKS_UNMAPPABLE:
109 return "ANEURALNETWORKS_UNMAPPABLE";
110 case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
111 return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
112 case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
113 return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
114 case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT:
115 return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT";
116 case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT:
117 return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT";
118 case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT:
119 return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT";
120 case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT:
121 return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT";
122 case ANEURALNETWORKS_DEAD_OBJECT:
123 return "ANEURALNETWORKS_DEAD_OBJECT";
124 default:
125 return "Unknown NNAPI error code: " + std::to_string(error_code);
126 }
127 }
128 // LINT.ThenChange()
129
130 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno) \
131 do { \
132 const auto _code = (code); \
133 const auto _call_desc = (call_desc); \
134 if (_code != ANEURALNETWORKS_NO_ERROR) { \
135 const auto error_desc = NnApiErrorDescription(_code); \
136 TF_LITE_KERNEL_LOG(context, \
137 "NN API returned error %s at line %d while %s.\n", \
138 error_desc.c_str(), __LINE__, _call_desc); \
139 *p_errno = _code; \
140 return kTfLiteError; \
141 } \
142 } while (0)
143
144 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \
145 p_tensor, p_errno) \
146 do { \
147 const auto _code = (code); \
148 const auto _call_desc = (call_desc); \
149 if (_code != ANEURALNETWORKS_NO_ERROR) { \
150 const auto error_desc = NnApiErrorDescription(_code); \
151 TF_LITE_KERNEL_LOG(context, \
152 "NN API returned error %s at line %d while %s " \
153 "for tensor '%s'.\n", \
154 error_desc.c_str(), __LINE__, _call_desc, \
155 (p_tensor)->name ? (p_tensor)->name : "no-name"); \
156 *p_errno = _code; \
157 return kTfLiteError; \
158 } \
159 } while (0)
160
IsFloat(TfLiteType type)161 bool IsFloat(TfLiteType type) {
162 switch (type) {
163 case kTfLiteFloat32:
164 return true;
165 default:
166 return false;
167 }
168 }
169
IsFloatOrUInt8(TfLiteType type)170 bool IsFloatOrUInt8(TfLiteType type) {
171 switch (type) {
172 case kTfLiteFloat32:
173 case kTfLiteUInt8:
174 return true;
175 default:
176 return false;
177 }
178 }
179
IsQuantized(TfLiteType type)180 bool IsQuantized(TfLiteType type) {
181 switch (type) {
182 case kTfLiteUInt8:
183 case kTfLiteInt8:
184 return true;
185 default:
186 // kTfLiteInt16 isn't supported as quantized type yet.
187 return false;
188 }
189 }
190
IsInt32(TfLiteType type)191 bool IsInt32(TfLiteType type) {
192 switch (type) {
193 case kTfLiteInt32:
194 return true;
195 default:
196 return false;
197 }
198 }
199
IsFloatOrQuantized(TfLiteType type)200 bool IsFloatOrQuantized(TfLiteType type) {
201 switch (type) {
202 case kTfLiteFloat32:
203 case kTfLiteUInt8:
204 case kTfLiteInt8:
205 return true;
206 default:
207 return false;
208 }
209 }
210
IsFloatOrInt32(TfLiteType type)211 bool IsFloatOrInt32(TfLiteType type) {
212 switch (type) {
213 case kTfLiteFloat32:
214 case kTfLiteInt32:
215 return true;
216 default:
217 return false;
218 }
219 }
220
IsFloatQuantizedOrInt32(TfLiteType type)221 bool IsFloatQuantizedOrInt32(TfLiteType type) {
222 switch (type) {
223 case kTfLiteFloat32:
224 case kTfLiteUInt8:
225 case kTfLiteInt8:
226 case kTfLiteInt32:
227 return true;
228 default:
229 return false;
230 }
231 }
232
IsScalarInputSupported(int builtin_code)233 bool IsScalarInputSupported(int builtin_code) {
234 switch (builtin_code) {
235 case kTfLiteBuiltinAdd:
236 case kTfLiteBuiltinMul:
237 case kTfLiteBuiltinSub:
238 case kTfLiteBuiltinDiv:
239 case kTfLiteBuiltinEqual:
240 case kTfLiteBuiltinNotEqual:
241 case kTfLiteBuiltinGreater:
242 case kTfLiteBuiltinGreaterEqual:
243 case kTfLiteBuiltinLess:
244 case kTfLiteBuiltinLessEqual:
245 case kTfLiteBuiltinPow:
246 case kTfLiteBuiltinMaximum:
247 case kTfLiteBuiltinMinimum:
248 case kTfLiteBuiltinPrelu:
249 case kTfLiteBuiltinLeakyRelu:
250 return true;
251 default:
252 return false;
253 }
254 }
255
256 // Check if the operation requires explicit conversion from int8 to uint8
257 // values.
NeedInt8Conversion(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)258 bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
259 const TfLiteNode* node) {
260 const int input_id = node->inputs->data[0];
261 const TfLiteType input_type = context->tensors[input_id].type;
262 switch (builtin_code) {
263 case kTfLiteBuiltinConv2d:
264 case kTfLiteBuiltinDepthwiseConv2d:
265 case kTfLiteBuiltinFullyConnected: {
266 if (input_type == kTfLiteInt8) {
267 const int weights_id = node->inputs->data[1];
268 const auto& weights_tensor = context->tensors[weights_id];
269 if ((weights_tensor.type == kTfLiteInt8 ||
270 weights_tensor.type == kTfLiteUInt8) &&
271 weights_tensor.quantization.type == kTfLiteAffineQuantization) {
272 return true;
273 }
274 }
275 return false;
276 }
277 case kTfLiteBuiltinTransposeConv: {
278 // Transpose convolution has a different order of inputs:
279 // 0: output_shape, 1: filter, 2: input, 3: bias.
280 const int input_id = 2;
281 const TfLiteType input_type = context->tensors[input_id].type;
282 if (input_type == kTfLiteInt8) {
283 return true;
284 }
285 return false;
286 }
287 case kTfLiteBuiltinSelect: {
288 const auto value_type = context->tensors[node->inputs->data[1]].type;
289 return value_type == kTfLiteInt8;
290 }
291 case kTfLiteBuiltinAdd:
292 case kTfLiteBuiltinArgMax:
293 case kTfLiteBuiltinArgMin:
294 case kTfLiteBuiltinAveragePool2d:
295 case kTfLiteBuiltinBatchToSpaceNd:
296 case kTfLiteBuiltinConcatenation:
297 case kTfLiteBuiltinEqual:
298 case kTfLiteBuiltinExpandDims:
299 case kTfLiteBuiltinGather:
300 case kTfLiteBuiltinGreater:
301 case kTfLiteBuiltinGreaterEqual:
302 case kTfLiteBuiltinHardSwish:
303 case kTfLiteBuiltinL2Normalization:
304 case kTfLiteBuiltinLeakyRelu:
305 case kTfLiteBuiltinLess:
306 case kTfLiteBuiltinLessEqual:
307 case kTfLiteBuiltinLogistic:
308 case kTfLiteBuiltinMaximum:
309 case kTfLiteBuiltinMaxPool2d:
310 case kTfLiteBuiltinMean:
311 case kTfLiteBuiltinMinimum:
312 case kTfLiteBuiltinMul:
313 case kTfLiteBuiltinNotEqual:
314 case kTfLiteBuiltinPad:
315 case kTfLiteBuiltinPadv2:
316 case kTfLiteBuiltinPrelu:
317 case kTfLiteBuiltinReduceMax:
318 case kTfLiteBuiltinReduceMin:
319 case kTfLiteBuiltinRelu:
320 case kTfLiteBuiltinReluN1To1:
321 case kTfLiteBuiltinRelu6:
322 case kTfLiteBuiltinResizeBilinear:
323 case kTfLiteBuiltinResizeNearestNeighbor:
324 case kTfLiteBuiltinReshape:
325 case kTfLiteBuiltinSlice:
326 case kTfLiteBuiltinSoftmax:
327 case kTfLiteBuiltinSpaceToBatchNd:
328 case kTfLiteBuiltinSpaceToDepth:
329 case kTfLiteBuiltinDepthToSpace:
330 case kTfLiteBuiltinStridedSlice:
331 case kTfLiteBuiltinSub:
332 case kTfLiteBuiltinTanh:
333 case kTfLiteBuiltinTile:
334 case kTfLiteBuiltinTopkV2:
335 case kTfLiteBuiltinTranspose: {
336 return input_type == kTfLiteInt8;
337 }
338 default:
339 return false;
340 }
341 }
342
343 constexpr int kLstmFullKernelInputSize = 24;
344 // The 20 input version is deprecated and kept only to
345 // support old model. The latest version of the LSTM Full Kernel
346 // is the one with 24 inputs
347 constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20;
348 constexpr int kLstmBasicKernelInputSize = 5;
349
isLstmBasicKernel(const TfLiteNode * node)350 inline bool isLstmBasicKernel(const TfLiteNode* node) {
351 return node->inputs->size == kLstmBasicKernelInputSize;
352 }
353
isLstmFullKernel(const TfLiteNode * node)354 inline bool isLstmFullKernel(const TfLiteNode* node) {
355 return node->inputs->size == kLstmFullKernelInputSize ||
356 node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize;
357 }
358
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)359 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
360 const TfLiteNode* node) {
361 switch (builtin_code) {
362 case kTfLiteBuiltinConv2d:
363 case kTfLiteBuiltinFullyConnected: {
364 const int input_id = node->inputs->data[0];
365 const int filter_id = node->inputs->data[1];
366 const TfLiteType input_type = context->tensors[input_id].type;
367 const TfLiteType filter_type = context->tensors[filter_id].type;
368 return IsFloat(input_type) && IsQuantized(filter_type);
369 }
370 case kTfLiteBuiltinLstm: {
371 const int input_id = node->inputs->data[0];
372 // Input #1 is optional so use #2 to determine if hybrid.
373 const int weights_id = node->inputs->data[2];
374 const TfLiteType input_type = context->tensors[input_id].type;
375 const TfLiteType weights_type = context->tensors[weights_id].type;
376 return isLstmFullKernel(node) && IsFloat(input_type) &&
377 IsQuantized(weights_type);
378 }
379 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
380 const int input_id = node->inputs->data[0];
381 // Input #1 is optional so use #2 to determine if hybrid.
382 const int weights_id = node->inputs->data[2];
383 const TfLiteType input_type = context->tensors[input_id].type;
384 const TfLiteType weights_type = context->tensors[weights_id].type;
385 return IsFloat(input_type) && IsQuantized(weights_type);
386 }
387 case kTfLiteBuiltinBidirectionalSequenceLstm: {
388 const int input_id = node->inputs->data[0];
389 // Input #1 is optional so use #2 to determine if hybrid.
390 const int weights_id = node->inputs->data[2];
391 const TfLiteType input_type = context->tensors[input_id].type;
392 const TfLiteType weights_type = context->tensors[weights_id].type;
393 return IsFloat(input_type) && IsQuantized(weights_type);
394 }
395 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
396 const int input_id = node->inputs->data[0];
397 const int weights_id = node->inputs->data[1];
398 const TfLiteType input_type = context->tensors[input_id].type;
399 const TfLiteType weights_type = context->tensors[weights_id].type;
400 return IsFloat(input_type) && IsQuantized(weights_type);
401 }
402 default:
403 return false;
404 }
405 }
406
IsDequantizeConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)407 bool IsDequantizeConstFloat16(TfLiteContext* context, const TfLiteNode* node,
408 const TfLiteRegistration* registration) {
409 return registration->builtin_code == kTfLiteBuiltinDequantize &&
410 context->tensors[node->inputs->data[0]].type ==
411 TfLiteType::kTfLiteFloat16 &&
412 IsConstantTensor(&context->tensors[node->inputs->data[0]]);
413 }
414
IsDequantizeNonConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)415 bool IsDequantizeNonConstFloat16(TfLiteContext* context, const TfLiteNode* node,
416 const TfLiteRegistration* registration) {
417 return registration->builtin_code == kTfLiteBuiltinDequantize &&
418 context->tensors[node->inputs->data[0]].type ==
419 TfLiteType::kTfLiteFloat16 &&
420 !IsConstantTensor(&context->tensors[node->inputs->data[0]]);
421 }
422
IsDensifyConstTensor(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)423 bool IsDensifyConstTensor(TfLiteContext* context, const TfLiteNode* node,
424 const TfLiteRegistration* registration) {
425 return registration->builtin_code == kTfLiteBuiltinDensify &&
426 IsConstantTensor(&context->tensors[node->inputs->data[0]]);
427 }
428
HasUnspecifiedDimension(const TfLiteTensor * tensor)429 bool HasUnspecifiedDimension(const TfLiteTensor* tensor) {
430 if (tensor->dims_signature) {
431 for (int i : TfLiteIntArrayView(tensor->dims_signature)) {
432 if (i == -1) return true;
433 }
434 }
435 return false;
436 }
437
ConvertTensorTypeToNNType(const TfLiteTensor * tensor,TfLiteType ann_type_equivalent)438 ANeuralNetworksOperandType ConvertTensorTypeToNNType(
439 const TfLiteTensor* tensor, TfLiteType ann_type_equivalent) {
440 int32_t nn_type = 0;
441 float scale = 0.0f;
442 int32_t zero_point = 0;
443 switch (tensor->type) {
444 case kTfLiteFloat32:
445 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
446 break;
447 case kTfLiteUInt8:
448 nn_type = ann_type_equivalent == kTfLiteInt32
449 ? ANEURALNETWORKS_TENSOR_INT32
450 : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
451 scale = tensor->params.scale;
452 zero_point = tensor->params.zero_point;
453 if (scale == 0) {
454 // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
455 // with zero scale are not valid in NNAPI.
456 scale = 1;
457 }
458 break;
459 case kTfLiteInt8:
460 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
461 scale = tensor->params.scale;
462 zero_point = tensor->params.zero_point;
463 if (ann_type_equivalent == kTfLiteUInt8) {
464 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
465 zero_point += 128;
466 } else if (ann_type_equivalent == kTfLiteInt32) {
467 nn_type = ANEURALNETWORKS_TENSOR_INT32;
468 zero_point += 128;
469 }
470 if (scale == 0) {
471 // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
472 // with zero scale are not valid in NNAPI.
473 scale = 1;
474 }
475 break;
476 case kTfLiteInt32:
477 nn_type = ANEURALNETWORKS_TENSOR_INT32;
478 scale = tensor->params.scale;
479 zero_point = tensor->params.zero_point;
480 break;
481 case kTfLiteBool:
482 nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
483 break;
484 case kTfLiteInt16:
485 nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
486 scale = tensor->params.scale;
487 zero_point = tensor->params.zero_point;
488 break;
489 default:
490 break;
491 }
492 uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
493 uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data);
494 static uint32_t scalar_rank = 1;
495 // treat scalar input as single cell tensor in NNAPI.
496 if (tensor_rank == 0) {
497 tensor_rank = scalar_rank;
498 tensor_dims = &scalar_rank;
499 }
500 ANeuralNetworksOperandType nn_operand_type{
501 .type = nn_type,
502 .dimensionCount = tensor_rank,
503 .dimensions = tensor_dims,
504 .scale = scale,
505 .zeroPoint = zero_point,
506 };
507 return nn_operand_type;
508 }
509
510 // NNAPI in API 31 hard-code the preferred alignment/padding with 64 bytes.
511 constexpr size_t kDefaultByteAlignmentForNNAPI = 64;
512
GetNumPaddingBytes(size_t byte_size)513 static size_t GetNumPaddingBytes(size_t byte_size) {
514 size_t num_padding_bytes = 0;
515 if (byte_size % kDefaultByteAlignmentForNNAPI) {
516 num_padding_bytes = kDefaultByteAlignmentForNNAPI -
517 (byte_size % kDefaultByteAlignmentForNNAPI);
518 }
519 return num_padding_bytes;
520 }
521
GetNNTensorSize(size_t tensor_size,bool allow_padding)522 static size_t GetNNTensorSize(size_t tensor_size, bool allow_padding) {
523 size_t padding_bytes = GetNumPaddingBytes(tensor_size);
524 size_t nn_tensor_size = tensor_size;
525 if (allow_padding) {
526 nn_tensor_size += padding_bytes;
527 }
528 return nn_tensor_size;
529 }
530
531 // Return NNAPI device handle with the provided null-terminated device name.
532 // Returns kTfLiteError in case of any NNAPI error and if no device with the
533 // given name can be found.
GetDeviceHandle(const NnApi * nnapi,TfLiteContext * context,const char * device_name_ptr,ANeuralNetworksDevice ** result,int * nnapi_errno)534 TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context,
535 const char* device_name_ptr,
536 ANeuralNetworksDevice** result, int* nnapi_errno) {
537 if (!device_name_ptr) return kTfLiteError;
538 *result = nullptr;
539 std::string device_name(device_name_ptr);
540 uint32_t num_devices = 0;
541 nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
542
543 for (uint32_t i = 0; i < num_devices; i++) {
544 ANeuralNetworksDevice* device = nullptr;
545 const char* buffer = nullptr;
546 RETURN_TFLITE_ERROR_IF_NN_ERROR(
547 context, nnapi->ANeuralNetworks_getDevice(i, &device),
548 "Searching for target device", nnapi_errno);
549
550 RETURN_TFLITE_ERROR_IF_NN_ERROR(
551 context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
552 "Searching for target device", nnapi_errno);
553
554 if (device_name == buffer) {
555 *result = device;
556 return kTfLiteOk;
557 }
558 }
559
560 context->ReportError(context,
561 "Could not find the specified NNAPI accelerator: %s. "
562 "Must be one of: {%s}.",
563 device_name_ptr,
564 nnapi::GetStringDeviceNamesList(nnapi).c_str());
565 return kTfLiteError;
566 }
567
568 // Compute the hash of a TfLiteIntArray.
GetHash(const TfLiteIntArray * int_array,uint64_t combine_with=0)569 uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) {
570 constexpr auto kHashConst = 0x9e3779b97f4a7800ULL;
571 uint64_t result = combine_with;
572 for (auto i : TfLiteIntArrayView(int_array)) {
573 result = result ^ (i + kHashConst + (result << 10) + (result >> 4));
574 }
575 return result;
576 }
577
HasZeroes(TfLiteIntArrayView array)578 bool HasZeroes(TfLiteIntArrayView array) {
579 for (auto value : array) {
580 if (value == 0) {
581 return true;
582 }
583 }
584 return false;
585 }
586
587 // Bit mask for tensor flags.
588 enum {
589 NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
590 NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
591 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
592 NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
593 NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION = 1U << 4,
594 };
595
596 // Returns the feature level to target when delegating to the given devices.
597 // The feature level is the max of the ones supported by the devices or
598 // the current NNAPI runtime feature level if no device is present.
GetTargetFeatureLevel(TfLiteContext * context,const NnApi * nnapi,const std::vector<ANeuralNetworksDevice * > & device_handles,int * target_feature_level,int * nnapi_errno)599 TfLiteStatus GetTargetFeatureLevel(
600 TfLiteContext* context, const NnApi* nnapi,
601 const std::vector<ANeuralNetworksDevice*>& device_handles,
602 int* target_feature_level, int* nnapi_errno) {
603 *target_feature_level = nnapi->nnapi_runtime_feature_level;
604 int64_t devices_feature_level = -1;
605 for (const auto* device_handle : device_handles) {
606 int64_t curr_device_feature_level;
607 RETURN_TFLITE_ERROR_IF_NN_ERROR(
608 context,
609 nnapi->ANeuralNetworksDevice_getFeatureLevel(
610 device_handle, &curr_device_feature_level),
611 "Searching for target device", nnapi_errno);
612
613 devices_feature_level =
614 std::max(curr_device_feature_level, devices_feature_level);
615 }
616
617 if ((devices_feature_level > 0) &&
618 // This second check is necessary since if the nnapi-reference device is
619 // in the list of target devices the devices_feature_level value will be
620 // 1000.
621 (devices_feature_level < nnapi->nnapi_runtime_feature_level)) {
622 TFLITE_LOG(TFLITE_LOG_INFO,
623 "Changing NNAPI Feature Level %lld to "
624 "supported by target devices: %lld",
625 nnapi->android_sdk_version, devices_feature_level);
626
627 *target_feature_level = devices_feature_level;
628 }
629
630 return kTfLiteOk;
631 }
632
633 // Returns true if this delegate is configured to use a specific set of devices.
634 // This will happen either if:
635 // - accelerator_name option has been specified
636 // - NNAPI CPU implementation has been explicitly disabled.
637 // If exclude_nnapi_reference is true this method will return false if the
638 // accelerator_name in the delegate options is equal to "nnapi-reference"
ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,const NnApi * nnapi,bool exclude_nnapi_reference=false)639 bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
640 const NnApi* nnapi,
641 bool exclude_nnapi_reference = false) {
642 const char* device_name_ptr = delegate_options.accelerator_name;
643 std::string nnapi_cpu("nnapi-reference");
644 bool has_selected_accelerator = device_name_ptr != nullptr;
645 if (exclude_nnapi_reference && has_selected_accelerator) {
646 if (nnapi_cpu == device_name_ptr) return false;
647 }
648 return (delegate_options.disallow_nnapi_cpu &&
649 nnapi->android_sdk_version >=
650 delegate::nnapi::kMinSdkVersionForNNAPI12) ||
651 has_selected_accelerator;
652 }
653
654 // Fills the given result vector with the list of devices the given delegate
655 // is referring to.
656 // There are three possible results:
657 // - an empty array (not the full list of available accelerators,
658 // for efficiency reasons) if no accelerator is chosen and the
659 // disallow_nnapi_cpu delegate option is false.
660 // - A single element array with the target processor, if an accelerator name
661 // is specified in the delegate options.
662 // - The full list of devices available on device less the nnapi reference
663 // implementation if the delegate option disallow_nnapi_cpu has been
664 // specified.
GetTargetDevices(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,int * nnapi_errno,std::vector<ANeuralNetworksDevice * > * result)665 TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
666 const NnApi* nnapi, int* nnapi_errno,
667 std::vector<ANeuralNetworksDevice*>* result) {
668 if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
669 return kTfLiteError;
670 }
671
672 const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
673 const char* device_name_ptr = delegate_options.accelerator_name;
674
675 if (device_name_ptr != nullptr) {
676 // User specified an accelerator to use.
677 ANeuralNetworksDevice* nnapi_device = nullptr;
678 TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr,
679 &nnapi_device, nnapi_errno));
680 result->push_back(nnapi_device);
681 } else if (delegate_options.disallow_nnapi_cpu) {
682 std::string nnapi_cpu("nnapi-reference");
683 uint32_t num_devices = 0;
684 nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
685
686 for (uint32_t i = 0; i < num_devices; i++) {
687 ANeuralNetworksDevice* device = nullptr;
688 const char* buffer = nullptr;
689 RETURN_TFLITE_ERROR_IF_NN_ERROR(
690 context, nnapi->ANeuralNetworks_getDevice(i, &device),
691 "Getting list of available devices", nnapi_errno);
692 RETURN_TFLITE_ERROR_IF_NN_ERROR(
693 context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
694 "Getting list of available devices", nnapi_errno);
695 if (nnapi_cpu != buffer) {
696 result->push_back(device);
697 }
698 }
699 }
700
701 return kTfLiteOk;
702 }
703
704 } // namespace
705
706 namespace delegate {
707 namespace nnapi {
708
709 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
NNMemory(const NnApi * nnapi,const char * name,size_t size)710 NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) {
711 if (name && size > 0) {
712 nnapi_ = nnapi;
713 byte_size_ = size;
714 #ifdef __ANDROID__
715 fd_ = nnapi_->ASharedMemory_create(name, size);
716 #else
717 // For non-Android platforms ASharedMemory_create needs unique name to
718 // create a shared memory object (see nnapi_implementation.cc).
719 char shm_name_buffer[L_tmpnam];
720 if (tmpnam(shm_name_buffer) == nullptr) {
721 shm_name_buffer[0] = '\0';
722 }
723 // tmpnam will produce a string containing with slashes, but shm_open
724 // won't like that.
725 shm_region_name_ = std::string(name) + std::string(shm_name_buffer);
726 std::replace(shm_region_name_.begin(), shm_region_name_.end(), '/', '-');
727 fd_ = nnapi_->ASharedMemory_create(shm_region_name_.c_str(), size);
728 #endif
729
730 data_ptr_ = reinterpret_cast<uint8_t*>(
731 mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
732 nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
733 fd_, 0, &nn_memory_handle_);
734 }
735 }
736 #else
737 NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/,
738 size_t /*size*/)
739 : nnapi_(nullptr) {}
740 #endif
741
~NNMemory()742 NNMemory::~NNMemory() {
743 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
744 if (data_ptr_) {
745 munmap(data_ptr_, byte_size_);
746 }
747 if (nn_memory_handle_) {
748 nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
749 }
750 #ifdef __ANDROID__
751 if (fd_ >= 0) close(fd_);
752 #else
753 if (!shm_region_name_.empty()) shm_unlink(shm_region_name_.c_str());
754 #endif
755 #endif
756 }
757
758 class DequantizeMapping {
759 public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const760 int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
761 for (const auto& element : mapping_) {
762 if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
763 return std::get<2>(element);
764 }
765 }
766 return -1;
767 }
768
Add(int ann_index,TfLiteType type,int dequantized_ann_index)769 void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
770 // This assumes it is not already mapped.
771 mapping_.emplace_back(ann_index, type, dequantized_ann_index);
772 }
773
774 private:
775 // Each tuple specifies the ANN (quantized) tensor index, the desired
776 // floating-point type and the matching ANN (dequantized) tensor index. This
777 // could use a map but instead std::vector is used to keep code size lower.
778 std::vector<std::tuple<int, TfLiteType, int>> mapping_;
779 };
780
781 // Abstract builder for building an op in the NN API graph. This handles
782 // the disparity between TFLite and NN API operand types. NN API has singular
783 // operands for both tensors and parameters, and TFLite separates the two.
784 class NNAPIOpBuilder {
785 public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,OperandMapping * tensor_mapping,DequantizeMapping * dequantize_mapping,std::map<const MMAPAllocation *,ANeuralNetworksMemory * > * allocation_mapping,std::vector<int> * nnapi_to_tflite_op_mapping,ANeuralNetworksModel * nn_model,int * nnapi_errno,bool allow_dynamic_dimensions)786 NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
787 OperandMapping* tensor_mapping,
788 DequantizeMapping* dequantize_mapping,
789 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
790 allocation_mapping,
791 std::vector<int>* nnapi_to_tflite_op_mapping,
792 ANeuralNetworksModel* nn_model, int* nnapi_errno,
793 bool allow_dynamic_dimensions)
794 : nnapi_(nnapi),
795 context_(context),
796 operand_mapping_(tensor_mapping),
797 dequantize_mapping_(dequantize_mapping),
798 allocation_memory_mapping_(allocation_mapping),
799 nnapi_to_tflite_op_mapping_(nnapi_to_tflite_op_mapping),
800 nn_model_(nn_model),
801 nnapi_errno_(nnapi_errno),
802 allow_dynamic_dimensions_(allow_dynamic_dimensions) {}
803
AddScalarBoolOperand(bool value)804 TfLiteStatus AddScalarBoolOperand(bool value) {
805 return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL);
806 }
807
AddScalarInt32Operand(int32_t value)808 TfLiteStatus AddScalarInt32Operand(int32_t value) {
809 return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
810 }
811
AddScalarFloat32Operand(float value)812 TfLiteStatus AddScalarFloat32Operand(float value) {
813 return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
814 }
815
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)816 TfLiteStatus AddVectorInt32Operand(const int32_t* values,
817 uint32_t num_values) {
818 return AddVectorOperand<int32_t>(values, num_values,
819 ANEURALNETWORKS_TENSOR_INT32,
820 /*scale=*/0.f, /*zero_point=*/0);
821 }
822
AddVectorInt32Operand(const int32_t * values,uint32_t num_values,float scale,int32_t zero_point)823 TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values,
824 float scale, int32_t zero_point) {
825 return AddVectorOperand<int32_t>(
826 values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point);
827 }
828
AddVectorInt16Operand(const int16_t * values,uint32_t num_values)829 TfLiteStatus AddVectorInt16Operand(const int16_t* values,
830 uint32_t num_values) {
831 return AddVectorOperand<int16_t>(values, num_values,
832 ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
833 /*scale=*/1.f, /*zero_point=*/0);
834 }
835
AddVectorInt8Operand(const int8_t * values,uint32_t num_values)836 TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) {
837 return AddVectorOperand<int8_t>(values, num_values,
838 ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
839 /*scale=*/1.f, /*zero_point=*/0);
840 }
841
AddVectorFloat32Operand(const float * values,uint32_t num_values)842 TfLiteStatus AddVectorFloat32Operand(const float* values,
843 uint32_t num_values) {
844 return AddVectorOperand<float>(values, num_values,
845 ANEURALNETWORKS_TENSOR_FLOAT32);
846 }
847
AddPoolingParams(void * data)848 TfLiteStatus AddPoolingParams(void* data) {
849 auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
850 AddScalarInt32Operand(builtin->padding);
851 AddScalarInt32Operand(builtin->stride_width);
852 AddScalarInt32Operand(builtin->stride_height);
853 AddScalarInt32Operand(builtin->filter_width);
854 AddScalarInt32Operand(builtin->filter_height);
855 AddScalarInt32Operand(builtin->activation);
856 return kTfLiteOk;
857 }
858
AddTensorInput(int tensor_index,bool hybrid_op,int tensor_flags=0)859 TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op,
860 int tensor_flags = 0) {
861 return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags);
862 }
863
AddTensorOutput(int tensor_index,int tensor_flags=0)864 TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) {
865 return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_,
866 tensor_flags);
867 }
868
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)869 TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
870 std::vector<uint32_t> dims(dimension_count, 0);
871 return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
872 }
873
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)874 TfLiteStatus AddStateFloat32Tensor(int tensor_index,
875 int* ann_tensor_index_out) {
876 TfLiteTensor* tensor = &context_->tensors[tensor_index];
877 return AddFloat32OutputTensor(
878 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
879 ann_tensor_index_out);
880 }
881
AddStateInt16Tensor(int tensor_index,int * ann_tensor_index_out)882 TfLiteStatus AddStateInt16Tensor(int tensor_index,
883 int* ann_tensor_index_out) {
884 TfLiteTensor* tensor = &context_->tensors[tensor_index];
885 return AddAdditionalOutputTensor(
886 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
887 ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale,
888 tensor->params.zero_point, ann_tensor_index_out);
889 }
890
AddStateInt8AsymTensor(int tensor_index,int * ann_tensor_index_out)891 TfLiteStatus AddStateInt8AsymTensor(int tensor_index,
892 int* ann_tensor_index_out) {
893 TfLiteTensor* tensor = &context_->tensors[tensor_index];
894 return AddAdditionalOutputTensor(
895 tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
896 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale,
897 tensor->params.zero_point, ann_tensor_index_out);
898 }
899
900 // Add a constant tensor with a single element, intended for broadcast capable
901 // ops.
AddSingleValueConstantTensor(float value,bool is_quantized)902 TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) {
903 if (!is_quantized) {
904 return AddVectorFloat32Operand(&value, 1);
905 } else {
906 // in the case that we need to add a quantized tensor, set the value to
907 // 64, zero_point to be 0 and adjust scale accordingly.
908 const uint8_t quant8_value = 64;
909 return AddVectorOperand<uint8_t>(&quant8_value, 1,
910 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
911 value / quant8_value, 0);
912 }
913 }
914
915 // Calculate the scale and zero_point for 8-bit unsigned tensor, given float
916 // min and max. zero_point is clamped to [0, 255].
CalculateQuantizationParams(float min,float max,float * scale,int * zero_point)917 TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale,
918 int* zero_point) {
919 if (max < min) return kTfLiteError;
920 *scale = (max - min) / 255.f;
921 if (min > 0.f) {
922 *zero_point = 0;
923 } else if (max < 0.f) {
924 *zero_point = 255;
925 } else {
926 *zero_point = (0.f - min) / (*scale);
927 }
928 return kTfLiteOk;
929 }
930
931 // Lower hardswish according to the following equation:
932 // hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6
933 // = 0.5x * Relu_N1_to_1(x/3) + 0.5x
TransformHardSwishIntoSupportedOps(int lite_input_index,int lite_output_index,bool need_int8_conversion,int lite_node_index)934 TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index,
935 int lite_output_index,
936 bool need_int8_conversion,
937 int lite_node_index) {
938 const TfLiteTensor& tensor = context_->tensors[lite_input_index];
939 float input_scale = tensor.params.scale;
940 int input_zero_point = tensor.params.zero_point;
941 float input_min = 0.f;
942 float input_max = 0.f;
943 int tensor_flags = 0;
944 if (need_int8_conversion) {
945 tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION;
946 input_zero_point += 128;
947 }
948 bool is_quantized = false;
949 int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
950 if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) {
951 is_quantized = true;
952 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
953 input_min = (0 - input_zero_point) * input_scale;
954 input_max = (255 - input_zero_point) * input_scale;
955 }
956
957 // Stage1 : s1 = Relu1(x * 1/3)
958 float s1_output_min = 0.f;
959 float s1_output_max = 0.f;
960 int s1_out_ann_index = 0;
961 {
962 float s1_output_scale = 0.f;
963 int s1_output_zero_point = 0;
964 if (is_quantized) {
965 // clamp the output range to [-1, 1] if needed.
966 s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f;
967 s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f;
968 CalculateQuantizationParams(s1_output_min, s1_output_max,
969 &s1_output_scale, &s1_output_zero_point);
970 }
971 TF_LITE_ENSURE_OK(context_,
972 AddTensorInput(lite_input_index, false, tensor_flags));
973 const float value3f = 1.f / 3.f;
974 TF_LITE_ENSURE_OK(context_,
975 AddSingleValueConstantTensor(value3f, is_quantized));
976 TF_LITE_ENSURE_OK(context_,
977 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1));
978 TF_LITE_ENSURE_OK(
979 context_,
980 AddAdditionalOutputTensor(
981 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
982 nn_type, s1_output_scale, s1_output_zero_point,
983 &s1_out_ann_index));
984 TF_LITE_ENSURE_OK(
985 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
986 }
987
988 // Stage2 : s2 = x / 2
989 float s2_output_min = input_min / 2.f;
990 float s2_output_max = input_max / 2.f;
991 int s2_out_ann_index = 0;
992 {
993 float s2_output_scale = input_scale / 2.0f;
994 int s2_output_zero_point = input_zero_point;
995 TF_LITE_ENSURE_OK(context_,
996 AddTensorInput(lite_input_index, false, tensor_flags));
997 const float value2f = 0.5f;
998 TF_LITE_ENSURE_OK(context_,
999 AddSingleValueConstantTensor(value2f, is_quantized));
1000 TF_LITE_ENSURE_OK(context_,
1001 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1002 TF_LITE_ENSURE_OK(
1003 context_,
1004 AddAdditionalOutputTensor(
1005 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1006 nn_type, s2_output_scale, s2_output_zero_point,
1007 &s2_out_ann_index));
1008 TF_LITE_ENSURE_OK(
1009 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1010 }
1011
1012 // Stage 3 : s3 = s1 * s2
1013 int s3_out_ann_index = 0;
1014 {
1015 augmented_inputs_.push_back(s1_out_ann_index);
1016 augmented_inputs_.push_back(s2_out_ann_index);
1017 TF_LITE_ENSURE_OK(context_,
1018 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1019 float s3_output_scale = 0.f;
1020 int s3_output_zero_point = 0;
1021 if (is_quantized) {
1022 // the min for stage 3 is always 0.0f.
1023 float s3_output_min = 0.f;
1024 // the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max).
1025 float s3_output_max =
1026 s1_output_max * s2_output_max > s1_output_min * s2_output_min
1027 ? s1_output_max * s2_output_max
1028 : s1_output_min * s2_output_min;
1029 CalculateQuantizationParams(s3_output_min, s3_output_max,
1030 &s3_output_scale, &s3_output_zero_point);
1031 }
1032 TF_LITE_ENSURE_OK(
1033 context_,
1034 AddAdditionalOutputTensor(
1035 tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1036 nn_type, s3_output_scale, s3_output_zero_point,
1037 &s3_out_ann_index));
1038 TF_LITE_ENSURE_OK(
1039 context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1040 }
1041
1042 // Stage 4: y = s3 + s2
1043 {
1044 augmented_inputs_.push_back(s2_out_ann_index);
1045 augmented_inputs_.push_back(s3_out_ann_index);
1046 TF_LITE_ENSURE_OK(context_,
1047 AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1048 TF_LITE_ENSURE_OK(context_,
1049 AddTensorOutput(lite_output_index, tensor_flags));
1050 TF_LITE_ENSURE_OK(
1051 context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
1052 }
1053
1054 return kTfLiteOk;
1055 }
1056
1057 // Adds the operation to the model and maps the operation to the originating
1058 // TFLite one.
AddOperationToModel(ANeuralNetworksOperationType type,uint32_t input_count,const uint32_t * inputs,uint32_t output_count,const uint32_t * outputs,int lite_node_index)1059 TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type,
1060 uint32_t input_count, const uint32_t* inputs,
1061 uint32_t output_count,
1062 const uint32_t* outputs,
1063 int lite_node_index) {
1064 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1065 context_,
1066 nnapi_->ANeuralNetworksModel_addOperation(
1067 nn_model_, type, input_count, inputs, output_count, outputs),
1068 "adding operation", nnapi_errno_);
1069 nnapi_to_tflite_op_mapping_->push_back(lite_node_index);
1070 return kTfLiteOk;
1071 }
1072
1073 // Adds a Dequantize operator and replaces the input tensor index with the
1074 // dequantized version. If the dequantized version of the operator already
1075 // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_tensor_index,TfLiteType dequantized_type,int lite_node_index)1076 TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index,
1077 TfLiteType dequantized_type, int lite_node_index) {
1078 const int ann_index =
1079 operand_mapping_->lite_index_to_ann(lite_tensor_index);
1080 int dequantized_ann_index =
1081 dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
1082
1083 if (dequantized_ann_index == -1) {
1084 // The dequantized version does not exist yet, it has to be added: a new
1085 // Dequantize operation is added, yielding a new tensor.
1086 const TfLiteTensor& tensor = context_->tensors[lite_tensor_index];
1087 ANeuralNetworksOperandType operand_type{
1088 ANEURALNETWORKS_TENSOR_FLOAT32,
1089 static_cast<uint32_t>(tensor.dims->size),
1090 reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
1091 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1092 context_,
1093 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1094 "adding operand", nnapi_errno_);
1095 dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
1096
1097 // Add Dequantize operation.
1098 const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
1099 const uint32_t dequantize_output[1] = {
1100 static_cast<uint32_t>(dequantized_ann_index)};
1101 TF_LITE_ENSURE_OK(
1102 context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE,
1103 /*input_count=*/1, dequantize_input,
1104 /*output_count=*/1, dequantize_output,
1105 lite_node_index));
1106 dequantize_mapping_->Add(ann_index, dequantized_type,
1107 dequantized_ann_index);
1108 }
1109
1110 // The input for the original operation is modified so that the operation
1111 // now uses the dequantized tensor as input.
1112 augmented_inputs_[nn_input_index] = dequantized_ann_index;
1113
1114 return kTfLiteOk;
1115 }
1116
1117 // Add a RESHAPE op which reshapes an NNAPI intermediate output to the
1118 // dimensions of the TFLite output tensor.
AppendReshape(int nn_input_index,int lite_out_tensor_index,int lite_node_index)1119 TfLiteStatus AppendReshape(int nn_input_index, int lite_out_tensor_index,
1120 int lite_node_index) {
1121 augmented_inputs_.push_back(nn_input_index);
1122 auto& output_tensor = context_->tensors[lite_out_tensor_index];
1123 TF_LITE_ENSURE_STATUS(
1124 AddVectorInt32Operand(output_tensor.dims->data,
1125 static_cast<uint32_t>(output_tensor.dims->size)));
1126 TF_LITE_ENSURE_OK(context_,
1127 AddTensorOutput(lite_out_tensor_index,
1128 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1129 TF_LITE_ENSURE_STATUS(
1130 FinalizeAddOperation(ANEURALNETWORKS_RESHAPE, lite_node_index));
1131 return kTfLiteOk;
1132 }
1133
1134 // Lower PACK into CONCAT + RESHAPE when possible
TransformPackIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1135 TfLiteStatus TransformPackIntoSupportedOps(int lite_node_index,
1136 TfLiteNode* node,
1137 TfLiteRegistration* reg) {
1138 // Add input tensors for CONCAT, and calculate the dimensions for the
1139 // output.
1140 int concat_output_ann_index = -1;
1141 TfLitePackParams* builtin =
1142 reinterpret_cast<TfLitePackParams*>(node->builtin_data);
1143 auto& input_tensor = context_->tensors[node->inputs->data[0]];
1144 int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
1145 : builtin->axis;
1146 TF_LITE_ENSURE(context_, axis < input_tensor.dims->size);
1147 uint32_t concat_dim_size = 0;
1148 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
1149 const auto input_index = node->inputs->data[input_pos];
1150 concat_dim_size +=
1151 context_->tensors[node->inputs->data[input_pos]].dims->data[axis];
1152 TF_LITE_ENSURE_STATUS(
1153 AddTensorInput(input_index, /*hybrid_op=*/false,
1154 NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1155 }
1156 TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(axis));
1157 std::vector<uint32_t> concat_output_shape(input_tensor.dims->size, 0);
1158 for (int i = 0; i < concat_output_shape.size(); i++) {
1159 if (i == axis) {
1160 concat_output_shape[i] = concat_dim_size;
1161 } else {
1162 concat_output_shape[i] = input_tensor.dims->data[i];
1163 }
1164 }
1165 TF_LITE_ENSURE_STATUS(AddIntermediateOutputTensor(
1166 input_tensor.type, concat_output_shape.size(),
1167 concat_output_shape.data(), input_tensor.params.scale,
1168 input_tensor.params.zero_point, &concat_output_ann_index));
1169 TF_LITE_ENSURE_STATUS(
1170 FinalizeAddOperation(ANEURALNETWORKS_CONCATENATION, lite_node_index));
1171
1172 // Reshape the output tensor
1173 TF_LITE_ENSURE_STATUS(AppendReshape(
1174 concat_output_ann_index, node->outputs->data[0], lite_node_index));
1175 return kTfLiteOk;
1176 }
1177
1178 // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type,int lite_node_index)1179 TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type,
1180 int lite_node_index) {
1181 // Actually add a NN API operation
1182 TF_LITE_ENSURE_OK(context_,
1183 AddOperationToModel(
1184 type, static_cast<uint32_t>(augmented_inputs_.size()),
1185 augmented_inputs_.data(),
1186 static_cast<uint32_t>(augmented_outputs_.size()),
1187 augmented_outputs_.data(), lite_node_index));
1188 augmented_inputs_.clear();
1189 augmented_outputs_.clear();
1190 return kTfLiteOk;
1191 }
1192
AddSingleValueTensorAsScalarOperand(int tensor_index,int nn_type)1193 TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index,
1194 int nn_type) {
1195 const TfLiteTensor* tensor = &context_->tensors[tensor_index];
1196 TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1);
1197
1198 ANeuralNetworksOperandType operand_type{.type = nn_type};
1199 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1200 context_,
1201 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1202 "adding operand", tensor, nnapi_errno_);
1203 int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1204 if (ann_tensor_index != -1) {
1205 augmented_inputs_.push_back(ann_tensor_index);
1206 return kTfLiteOk;
1207 }
1208 // Allocate a new tensor index
1209 ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1210 augmented_inputs_.push_back(ann_tensor_index);
1211
1212 const TfLiteType tensor_type = tensor->type;
1213 TfLiteType nn_type_equivalent;
1214 TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type,
1215 &nn_type_equivalent));
1216 if (tensor_type != nn_type_equivalent) {
1217 operand_mapping_->add_type_conversion(tensor_index, nn_type_equivalent);
1218 }
1219 return kTfLiteOk;
1220 }
1221
1222 template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,const TfLiteIntArray * dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1223 TfLiteStatus AddNewInputConstantTensor(
1224 int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims,
1225 const std::vector<T>& tensor_value,
1226 const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1227 TF_LITE_ENSURE_OK(context_,
1228 context_->AddTensors(context_, 1, tensor_index));
1229
1230 TfLiteTensor* new_tensor = &context_->tensors[*tensor_index];
1231 new_tensor->type = type;
1232 new_tensor->allocation_type = kTfLiteDynamic;
1233 new_tensor->params = quant_params;
1234
1235 // Not removing the new tensor in case of resizing errors since it will
1236 // be cleared by the context
1237 TF_LITE_ENSURE_OK(
1238 context_,
1239 context_->ResizeTensor(
1240 context_, new_tensor,
1241 // Resize Tensor takes ownership of the dims array passed as param
1242 TfLiteIntArrayCopy(dims)));
1243
1244 memcpy(new_tensor->data.raw,
1245 reinterpret_cast<const char*>(tensor_value.data()),
1246 tensor_value.size() * sizeof(T));
1247
1248 const uint32_t tensor_rank = static_cast<uint32_t>(dims->size);
1249 const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data);
1250 ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1251 quant_params.scale,
1252 quant_params.zero_point};
1253
1254 const int ann_tensor_index =
1255 operand_mapping_->add_delegate_generated_input_ann_tensors_operand();
1256
1257 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1258 context_,
1259 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1260 "adding operand", nnapi_errno_);
1261
1262 augmented_inputs_.push_back(ann_tensor_index);
1263
1264 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1265 context_,
1266 nnapi_->ANeuralNetworksModel_setOperandValue(
1267 nn_model_, ann_tensor_index, new_tensor->data.raw,
1268 new_tensor->bytes),
1269 "setting new operand value", nnapi_errno_);
1270
1271 return kTfLiteOk;
1272 }
1273
1274 template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,std::initializer_list<int> dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1275 TfLiteStatus AddNewInputConstantTensor(
1276 int32_t nn_type, TfLiteType type, std::initializer_list<int> dims,
1277 const std::vector<T>& tensor_value,
1278 const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1279 TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size());
1280 dim_array->size = dims.size();
1281 std::copy(dims.begin(), dims.end(), dim_array->data);
1282
1283 const auto result = AddNewInputConstantTensor(
1284 nn_type, type, dim_array, tensor_value, quant_params, tensor_index);
1285 TfLiteIntArrayFree(dim_array);
1286 return result;
1287 }
1288
AddIntermediateOutputTensor(TfLiteType tfl_type,uint32_t dimension_count,const uint32_t * dimension_data,float scale,int32_t zero_point,int * ann_index_out)1289 TfLiteStatus AddIntermediateOutputTensor(TfLiteType tfl_type,
1290 uint32_t dimension_count,
1291 const uint32_t* dimension_data,
1292 float scale, int32_t zero_point,
1293 int* ann_index_out) {
1294 int32_t nn_type;
1295 switch (tfl_type) {
1296 case kTfLiteFloat32:
1297 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1298 break;
1299 case kTfLiteInt8:
1300 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1301 break;
1302 case kTfLiteUInt8:
1303 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1304 break;
1305 default:
1306 return kTfLiteError;
1307 }
1308 TF_LITE_ENSURE_STATUS(
1309 AddAdditionalOutputTensor(dimension_count, dimension_data, nn_type,
1310 scale, zero_point, ann_index_out));
1311 return kTfLiteOk;
1312 }
1313
ClearInputOuputLists()1314 void ClearInputOuputLists() {
1315 augmented_inputs_.clear();
1316 augmented_outputs_.clear();
1317 }
1318
1319 private:
1320 // Returns a TF Lite type which has the same memory representation as a
1321 // provided NN API type.
GetEquivalentToANNType(TfLiteContext * context,int nn_type,TfLiteType * type)1322 TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type,
1323 TfLiteType* type) {
1324 switch (nn_type) {
1325 case ANEURALNETWORKS_INT32:
1326 *type = kTfLiteInt32;
1327 return kTfLiteOk;
1328 case ANEURALNETWORKS_FLOAT32:
1329 *type = kTfLiteFloat32;
1330 return kTfLiteOk;
1331 default:
1332 context->ReportError(context,
1333 "NN API Delegate: Can't get an equivalent TF Lite "
1334 "type for provided NN API type: %d.\n",
1335 nn_type);
1336 return kTfLiteError;
1337 }
1338 }
1339
1340 template <typename T>
AddScalarOperand(T value,int32_t nn_type)1341 TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
1342 ANeuralNetworksOperandType operand_type{.type = nn_type};
1343 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1344 context_,
1345 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1346 "adding operand", nnapi_errno_);
1347 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1348 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1349 context_,
1350 nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
1351 &value, sizeof(T)),
1352 "setting new operand value", nnapi_errno_);
1353 augmented_inputs_.push_back(ann_index);
1354 return kTfLiteOk;
1355 }
1356
1357 template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type,float scale,int32_t zero_point)1358 TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1359 int32_t nn_type, float scale,
1360 int32_t zero_point) {
1361 ANeuralNetworksOperandType operand_type{.type = nn_type,
1362 .dimensionCount = 1,
1363 .dimensions = &num_values,
1364 .scale = scale,
1365 .zeroPoint = zero_point};
1366
1367 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1368 context_,
1369 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1370 "adding operand", nnapi_errno_);
1371
1372 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1373 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1374 context_,
1375 nnapi_->ANeuralNetworksModel_setOperandValue(
1376 nn_model_, ann_index, values, sizeof(T) * num_values),
1377 "settings new operand value", nnapi_errno_);
1378 augmented_inputs_.push_back(ann_index);
1379 return kTfLiteOk;
1380 }
1381
1382 template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)1383 TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1384 int32_t nn_type) {
1385 return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f,
1386 /*zero_point=*/0);
1387 }
1388
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)1389 TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
1390 const uint32_t* dimension_data,
1391 int* ann_index_out) {
1392 return AddAdditionalOutputTensor(
1393 dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32,
1394 /*scale=*/0.f, /*zero_point=*/0, ann_index_out);
1395 }
1396
AddAdditionalOutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int32_t nn_type,float scale,int32_t zero_point,int * ann_index_out)1397 TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count,
1398 const uint32_t* dimension_data,
1399 int32_t nn_type, float scale,
1400 int32_t zero_point,
1401 int* ann_index_out) {
1402 ANeuralNetworksOperandType operand_type{
1403 .type = nn_type,
1404 .dimensionCount = dimension_count,
1405 .dimensions = dimension_data,
1406 .scale = scale,
1407 .zeroPoint = zero_point,
1408 };
1409 RETURN_TFLITE_ERROR_IF_NN_ERROR(
1410 context_,
1411 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1412 "adding operand", nnapi_errno_);
1413 const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1414 augmented_outputs_.push_back(ann_index);
1415 if (ann_index_out) *ann_index_out = ann_index;
1416 return kTfLiteOk;
1417 }
1418
1419 // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
1420 // This returns the NN API tensor index corresponding to the created tensor.
1421 // If another caller previously created a NN API tensor for `tensor_index`
1422 // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices,int tensor_flags=0)1423 TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
1424 std::vector<uint32_t>* indices, int tensor_flags = 0) {
1425 const bool scalar_as_tensor =
1426 tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
1427 const bool need_int8_conversion =
1428 tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1429 const bool use_int8_asymm_signed =
1430 tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
1431 const bool force_per_channel =
1432 tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
1433 const bool need_half2float_conversion =
1434 tensor_flags & NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION;
1435
1436 int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1437 if (ann_tensor_index != -1) {
1438 indices->push_back(ann_tensor_index);
1439 return kTfLiteOk;
1440 }
1441 // Allocate a new tensor index
1442 ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1443
1444 // Parameters needed for new type.
1445 int32_t nn_type = 0;
1446 float scale = 0.0f;
1447 int32_t zeroPoint = 0;
1448 ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params;
1449 TfLiteTensor* tensor = &context_->tensors[tensor_index];
1450 TfLiteType tensor_type = tensor->type;
1451 if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
1452 // For legacy reason, UINT8 weights in hybrid operators are actually INT8
1453 // values and should be interpreted as such.
1454 tensor_type = kTfLiteInt8;
1455 }
1456 switch (tensor_type) {
1457 case kTfLiteNoType:
1458 // Tensors added during initialization of Ops don't have a type yet and
1459 // should not be registered with the NNAPI.
1460 indices->push_back(-1);
1461 return kTfLiteOk;
1462 case kTfLiteFloat32:
1463 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1464 break;
1465 case kTfLiteFloat16:
1466 nn_type = ANEURALNETWORKS_TENSOR_FLOAT16;
1467 if (need_half2float_conversion) {
1468 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1469 operand_mapping_->add_type_conversion(tensor_index, kTfLiteFloat32);
1470 }
1471 break;
1472 case kTfLiteUInt8:
1473 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1474 scale = tensor->params.scale;
1475 zeroPoint = tensor->params.zero_point;
1476 if (scale == 0) {
1477 // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
1478 // NNAPI.
1479 scale = 1;
1480 }
1481 break;
1482 case kTfLiteInt8:
1483 // If explicit int8 conversion is needed, we still need
1484 // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
1485 if (use_int8_asymm_signed) {
1486 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1487 } else if (need_int8_conversion) {
1488 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1489 } else {
1490 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
1491 }
1492 scale = tensor->params.scale;
1493 zeroPoint = tensor->params.zero_point;
1494 if (tensor->quantization.type == kTfLiteAffineQuantization) {
1495 TfLiteAffineQuantization* quantization_params =
1496 static_cast<TfLiteAffineQuantization*>(
1497 tensor->quantization.params);
1498 if (quantization_params->scale->size > 1 || force_per_channel) {
1499 // Set up per-channel quantization.
1500 ann_perchannel_params = {
1501 .channelDim = static_cast<uint32_t>(
1502 quantization_params->quantized_dimension),
1503 .scaleCount =
1504 static_cast<uint32_t>(quantization_params->scale->size),
1505 .scales = quantization_params->scale->data,
1506 };
1507 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL;
1508 scale = 0.0f;
1509 zeroPoint = 0;
1510 } else if (quantization_params->scale->size == 1) {
1511 scale = quantization_params->scale->data[0];
1512 zeroPoint = quantization_params->zero_point->data[0];
1513 }
1514 }
1515 if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1516 if (need_int8_conversion) {
1517 zeroPoint += 128;
1518 operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
1519 }
1520 if (scale == 0) {
1521 // QUANT8 tensors with zero scale are not valid in NNAPI.
1522 scale = 1;
1523 }
1524 }
1525 break;
1526 case kTfLiteInt32:
1527 nn_type = ANEURALNETWORKS_TENSOR_INT32;
1528 scale = tensor->params.scale;
1529 zeroPoint = tensor->params.zero_point;
1530 break;
1531 case kTfLiteBool:
1532 nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
1533 break;
1534 case kTfLiteInt16:
1535 nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
1536 scale = tensor->params.scale;
1537 zeroPoint = tensor->params.zero_point;
1538 break;
1539 default:
1540 context_->ReportError(
1541 context_, "Failed to add NN API tensor: type %s is not supported.",
1542 TfLiteTypeGetName(tensor_type));
1543 return kTfLiteError;
1544 }
1545 bool has_unspecified_dimensions = HasUnspecifiedDimension(tensor);
1546 uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
1547 std::vector<uint32_t> dims_unspecified(tensor_rank, 0);
1548 if (has_unspecified_dimensions) {
1549 for (int i = 0; i < tensor->dims_signature->size; i++) {
1550 dims_unspecified[i] = tensor->dims_signature->data[i] == -1
1551 ? 0
1552 : tensor->dims_signature->data[i];
1553 }
1554 }
1555 uint32_t* tensor_dims =
1556 has_unspecified_dimensions && allow_dynamic_dimensions_
1557 ? dims_unspecified.data()
1558 : reinterpret_cast<uint32_t*>(tensor->dims->data);
1559 if (scalar_as_tensor && tensor_rank == 0) {
1560 // Use rank 1, shape {1} operand for TFLite scalar tensors.
1561 tensor_rank = 1;
1562 tensor_dims = &tensor_rank;
1563 }
1564 if (tensor_rank == 0) {
1565 // if the tensor_rank is 0, the dimension ptr must be nullptr.
1566 tensor_dims = nullptr;
1567 }
1568
1569 ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1570 scale, zeroPoint};
1571 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1572 context_,
1573 nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1574 "adding operand", tensor, nnapi_errno_);
1575
1576 if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1577 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1578 context_,
1579 nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
1580 nn_model_, ann_tensor_index, &ann_perchannel_params),
1581 "setting new operand per channel quantization params", tensor,
1582 nnapi_errno_);
1583 }
1584 if (tensor->allocation_type == kTfLiteMmapRo) {
1585 if (IsQuantized(tensor_type) && need_int8_conversion &&
1586 nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1587 // We need to add a tensor and convert the weights into uint8.
1588 // Currently this is only needed for fully_connected. The new_tensor is
1589 // needed for lifetime management for the converted weights.
1590 int new_tensor_index = -1;
1591 TF_LITE_ENSURE_OK(context_,
1592 context_->AddTensors(context_, 1, &new_tensor_index));
1593 TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1594 new_tensor->type = kTfLiteUInt8;
1595 new_tensor->allocation_type = kTfLiteDynamic;
1596 new_tensor->params.scale = scale;
1597 new_tensor->params.zero_point = zeroPoint;
1598 // Not removing the new tensor in case of resizing errors since it will
1599 // be cleared by the context
1600 TF_LITE_ENSURE_OK(
1601 context_, context_->ResizeTensor(context_, new_tensor,
1602 // Resize Tensor takes ownership of
1603 // the dims array passed as param
1604 TfLiteIntArrayCopy(tensor->dims)));
1605 // Convert the int8 value into corresponding uint8 value;
1606 const auto num_elements = NumElements(tensor);
1607 for (int i = 0; i < num_elements; ++i) {
1608 new_tensor->data.uint8[i] = static_cast<const uint8_t>(
1609 static_cast<int32_t>(tensor->data.int8[i]) + 128);
1610 }
1611 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1612 context_,
1613 nnapi_->ANeuralNetworksModel_setOperandValue(
1614 nn_model_, ann_tensor_index, new_tensor->data.raw,
1615 new_tensor->bytes),
1616 "setting new operand value", tensor, nnapi_errno_);
1617 } else if (tensor_type == kTfLiteFloat16 && need_half2float_conversion) {
1618 // We need to convert the constant fp16 weights to fp32. The new_tensor
1619 // is needed for lifetime management for the converted weights.
1620 int new_tensor_index = -1;
1621 TF_LITE_ENSURE_OK(context_,
1622 context_->AddTensors(context_, 1, &new_tensor_index));
1623 TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1624 new_tensor->type = kTfLiteFloat32;
1625 new_tensor->allocation_type = kTfLiteDynamic;
1626 // Not removing the new tensor in case of resizing errors since it will
1627 // be cleared by the context
1628 TF_LITE_ENSURE_OK(
1629 context_, context_->ResizeTensor(context_, new_tensor,
1630 // Resize Tensor takes ownership of
1631 // the dims array passed as param
1632 TfLiteIntArrayCopy(tensor->dims)));
1633 // Convert the fp16 value into corresponding fp32 value;
1634 const auto num_elements = NumElements(tensor);
1635 for (int i = 0; i < num_elements; ++i) {
1636 new_tensor->data.f[i] = fp16_ieee_to_fp32_value(
1637 reinterpret_cast<uint16_t*>(tensor->data.data)[i]);
1638 }
1639 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1640 context_,
1641 nnapi_->ANeuralNetworksModel_setOperandValue(
1642 nn_model_, ann_tensor_index, new_tensor->data.data,
1643 new_tensor->bytes),
1644 "setting new operand value", tensor, nnapi_errno_);
1645 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
1646 } else if (tensor->allocation &&
1647 static_cast<const Allocation*>(tensor->allocation)->type() ==
1648 Allocation::Type::kMMap) {
1649 const MMAPAllocation* mmap_alloc =
1650 static_cast<const MMAPAllocation*>(tensor->allocation);
1651 if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
1652 ANeuralNetworksMemory* ann_memory_handle = nullptr;
1653 nnapi_->ANeuralNetworksMemory_createFromFd(
1654 mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
1655 &ann_memory_handle);
1656 allocation_memory_mapping_->insert(
1657 std::make_pair(mmap_alloc, ann_memory_handle));
1658 }
1659 ANeuralNetworksMemory* ann_memory_handle =
1660 allocation_memory_mapping_->at(mmap_alloc);
1661 // Compute the offset to the base pointer of the MMAPAllocation.
1662 auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
1663 reinterpret_cast<const uint8_t*>(mmap_alloc->base());
1664 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1665 context_,
1666 nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
1667 nn_model_, ann_tensor_index, ann_memory_handle, offset,
1668 tensor->bytes),
1669 "setting new operand value from memory", tensor, nnapi_errno_);
1670 #endif
1671 } else {
1672 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1673 context_,
1674 nnapi_->ANeuralNetworksModel_setOperandValue(
1675 nn_model_, ann_tensor_index, tensor->data.data, tensor->bytes),
1676 "setting new operand value", tensor, nnapi_errno_);
1677 }
1678 }
1679 indices->push_back(ann_tensor_index);
1680 return kTfLiteOk;
1681 }
1682
1683 // Access to NNAPI.
1684 const NnApi* const nnapi_;
1685
1686 // TfLiteContext for error handling.
1687 TfLiteContext* const context_;
1688
1689 // Tracks relationship between indices.
1690 OperandMapping* const operand_mapping_;
1691
1692 // Keeps mapping of ANN quantized tensor and float data type to equivalent
1693 // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
1694 // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
1695 // tensor #4 to a FLOAT32 tensor.
1696 DequantizeMapping* const dequantize_mapping_;
1697
1698 std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
1699 allocation_memory_mapping_;
1700
1701 // Tracks for every operation in the NNAPI model the source TfLite model
1702 // node index.
1703 std::vector<int>* const nnapi_to_tflite_op_mapping_;
1704
1705 // The NNAPI model.
1706 ANeuralNetworksModel* const nn_model_;
1707
1708 // Inputs and outputs for the current op. These are augmented in the sense
1709 // that NN API uses operands for all arguments, not just tensors, unlike
1710 // TensorFlow Lite.
1711 std::vector<uint32_t> augmented_inputs_;
1712 std::vector<uint32_t> augmented_outputs_;
1713
1714 // Return status code of the latest NNAPI call.
1715 int* nnapi_errno_;
1716
1717 // Whether to allow dynamic batch size without re-compilation.
1718 bool allow_dynamic_dimensions_;
1719 }; // namespace nnapi
1720
1721 namespace {
1722 struct OpValidationContext {
1723 bool is_valid;
1724 std::vector<NNAPIValidationFailure>* validation_failures;
1725 };
1726
1727 #define EXPECT_INPUT_TYPE_IN(actual_type, ...) \
1728 ExpectTypeIn(actual_type, {__VA_ARGS__}, \
1729 NNAPIValidationFailureType::kUnsupportedInputType, \
1730 "Input type not in expected list " #__VA_ARGS__, &val_ctx)
1731
AddValidationFailure(NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1732 inline void AddValidationFailure(NNAPIValidationFailureType failure_type,
1733 const char* message,
1734 OpValidationContext* val_ctx) {
1735 val_ctx->is_valid = false;
1736
1737 #ifdef NNAPI_VERBOSE_VALIDATION
1738 if (val_ctx->validation_failures) {
1739 val_ctx->validation_failures->push_back({failure_type, message});
1740 }
1741 #endif
1742 }
1743
1744 template <typename... Args>
AddValidationFailureFmt(OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1745 inline void AddValidationFailureFmt(OpValidationContext* val_ctx,
1746 NNAPIValidationFailureType failure_type,
1747 const char* message_fmt, Args... args) {
1748 val_ctx->is_valid = false;
1749 #ifdef NNAPI_VERBOSE_VALIDATION
1750 if (val_ctx->validation_failures) {
1751 size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1;
1752 std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]);
1753 snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...);
1754
1755 val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()});
1756 }
1757 #endif
1758 }
1759
Expect(bool condition,NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1760 inline bool Expect(bool condition, NNAPIValidationFailureType failure_type,
1761 const char* message, OpValidationContext* val_ctx) {
1762 if (!condition) {
1763 AddValidationFailure(failure_type, message, val_ctx);
1764 return false;
1765 }
1766 return true;
1767 }
1768
1769 template <typename... Args>
ExpectFmt(bool condition,OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1770 inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx,
1771 NNAPIValidationFailureType failure_type,
1772 const char* message_fmt, Args... args) {
1773 if (!condition) {
1774 AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...);
1775 return false;
1776 }
1777 return true;
1778 }
1779
ExpectTypeIn(TfLiteType actual_type,std::initializer_list<TfLiteType> allowed_types,NNAPIValidationFailureType failure_type,const char * msg,OpValidationContext * val_ctx)1780 inline bool ExpectTypeIn(TfLiteType actual_type,
1781 std::initializer_list<TfLiteType> allowed_types,
1782 NNAPIValidationFailureType failure_type,
1783 const char* msg, OpValidationContext* val_ctx) {
1784 return Expect(std::find(allowed_types.begin(), allowed_types.end(),
1785 actual_type) != allowed_types.end(),
1786 failure_type, msg, val_ctx);
1787 }
1788
ExpectMinAndroidSdkVersion(int curr_version,int min_version,OpValidationContext * val_ctx)1789 inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version,
1790 OpValidationContext* val_ctx) {
1791 return ExpectFmt(curr_version >= min_version, val_ctx,
1792 NNAPIValidationFailureType::kUnsupportedAndroidVersion,
1793 "Android sdk version less than %d", min_version);
1794 }
1795
ExpectMaxOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1796 inline bool ExpectMaxOpVersion(int curr_version, int max_version,
1797 OpValidationContext* val_ctx) {
1798 return ExpectFmt(curr_version <= max_version, val_ctx,
1799 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1800 "OP Version higher than %d", max_version);
1801 }
1802
ExpectOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1803 inline bool ExpectOpVersion(int curr_version, int max_version,
1804 OpValidationContext* val_ctx) {
1805 return ExpectFmt(curr_version <= max_version, val_ctx,
1806 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1807 "OP Version different from %d", max_version);
1808 }
1809
ExpectIsFloatOperator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1810 inline bool ExpectIsFloatOperator(const TfLiteContext* context,
1811 const TfLiteNode* node,
1812 OpValidationContext* val_ctx) {
1813 const auto input_type = context->tensors[node->inputs->data[0]].type;
1814 return Expect(IsFloat(input_type),
1815 NNAPIValidationFailureType::kUnsupportedInputType,
1816 "Input should be Float", val_ctx);
1817 }
1818
ExpectIsFloatOrUint8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1819 bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context,
1820 const TfLiteNode* node,
1821 OpValidationContext* val_ctx) {
1822 const auto input_type = context->tensors[node->inputs->data[0]].type;
1823 return Expect(IsFloatOrUInt8(input_type),
1824 NNAPIValidationFailureType::kUnsupportedInputType,
1825 "Input should be Float or UINT8", val_ctx);
1826 }
1827
ExpectIsFloatOrQuant8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1828 bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context,
1829 const TfLiteNode* node,
1830 OpValidationContext* val_ctx) {
1831 const auto input_type = context->tensors[node->inputs->data[0]].type;
1832 return Expect(IsFloatOrQuantized(input_type),
1833 NNAPIValidationFailureType::kUnsupportedInputType,
1834 "Input should be Float or Quant8", val_ctx);
1835 }
1836
ExpectIsFloatOrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1837 bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context,
1838 const TfLiteNode* node,
1839 OpValidationContext* val_ctx) {
1840 const auto input_type = context->tensors[node->inputs->data[0]].type;
1841 return Expect(IsFloatOrInt32(input_type),
1842 NNAPIValidationFailureType::kUnsupportedInputType,
1843 "Input should be Float or Int32", val_ctx);
1844 }
1845
ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1846 bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context,
1847 const TfLiteNode* node,
1848 OpValidationContext* val_ctx) {
1849 const auto input_type = context->tensors[node->inputs->data[0]].type;
1850 return Expect(IsFloatQuantizedOrInt32(input_type),
1851 NNAPIValidationFailureType::kUnsupportedInputType,
1852 "Input should be Float, Quant8, or Int32", val_ctx);
1853 }
1854
1855 // When using NN API version 1.0 or 1.1, the condition below must be true for
1856 // quantized versions of the following ops:
1857 // * CONV_2D
1858 // * DEPTHWISE_CONV_2D
1859 // * FULLY_CONNECTED (where filter actually stands for weights)
1860 // The condition is relaxed and no longer required since version 1.2.
ExpectIsRestrictedScalesCompliant(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1861 bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context,
1862 const TfLiteNode* node,
1863 OpValidationContext* val_ctx) {
1864 const int input_id = node->inputs->data[0];
1865 const int filter_id = node->inputs->data[1];
1866 const int output_id = node->outputs->data[0];
1867 const float input_scale = context->tensors[input_id].params.scale;
1868 const float filter_scale = context->tensors[filter_id].params.scale;
1869 const float output_scale = context->tensors[output_id].params.scale;
1870 return Expect(input_scale * filter_scale < output_scale,
1871 NNAPIValidationFailureType::kNotRestrictedScaleCompliant,
1872 "When using NN API version 1.0 or 1.1, input_scale * "
1873 "filter_scale < output_scale.",
1874 val_ctx);
1875 }
1876
1877 } // namespace
1878
1879 // Return a function that knows how to translate a node into its operands
1880 // when called. You can use this function to see if a node is supported
1881 // (i.e. if the returned MappingFn is null, then the node is not supported).
Validate(const TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const TfLiteNode * node,bool is_accelerator_specified,std::vector<NNAPIValidationFailure> * map_failures)1882 bool NNAPIDelegateKernel::Validate(
1883 const TfLiteContext* context, int builtin_code, int version,
1884 int android_sdk_version, const TfLiteNode* node,
1885 bool is_accelerator_specified,
1886 std::vector<NNAPIValidationFailure>* map_failures) {
1887 OpValidationContext val_ctx{true, map_failures};
1888 switch (builtin_code) {
1889 case kTfLiteBuiltinAdd: {
1890 ExpectMaxOpVersion(version, 2, &val_ctx);
1891 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1892 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1893 if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1894 Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data)
1895 ->activation == kTfLiteActNone,
1896 NNAPIValidationFailureType::kNoActivationExpected,
1897 "No activation function supported", &val_ctx);
1898 }
1899 } else {
1900 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1901 }
1902 } break;
1903 case kTfLiteBuiltinArgMax:
1904 case kTfLiteBuiltinArgMin: {
1905 ExpectMaxOpVersion(version, 2, &val_ctx);
1906 // Those operators were introduced in NNAPI 1.2.
1907 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
1908 &val_ctx);
1909 const TfLiteType input_type =
1910 context->tensors[node->inputs->data[(0)]].type;
1911 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
1912 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
1913
1914 const auto& axis_tensor = context->tensors[node->inputs->data[1]];
1915 if (axis_tensor.type == kTfLiteInt64) {
1916 Expect(
1917 axis_tensor.allocation_type == kTfLiteMmapRo &&
1918 *axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
1919 *axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
1920 NNAPIValidationFailureType::kUnsupportedInputType,
1921 "NNAPI only supports axis as int32. If the axis type is int64 and "
1922 "constant we can convert it to int32 if the value isn't too "
1923 "large.",
1924 &val_ctx);
1925 } else {
1926 Expect(axis_tensor.type == kTfLiteInt32,
1927 NNAPIValidationFailureType::kUnsupportedInputType,
1928 "Axis should be Int32", &val_ctx);
1929 }
1930 if (builtin_code == kTfLiteBuiltinArgMax) {
1931 auto builtin =
1932 reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
1933 Expect(builtin->output_type == kTfLiteInt32,
1934 NNAPIValidationFailureType::kUnsupportedOutputType,
1935 "NNAPI only supports int32 output.", &val_ctx);
1936 } else {
1937 auto builtin =
1938 reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data);
1939 Expect(builtin->output_type == kTfLiteInt32,
1940 NNAPIValidationFailureType::kUnsupportedOutputType,
1941 "NNAPI only supports int32 output.", &val_ctx);
1942 }
1943 } break;
1944 case kTfLiteBuiltinMul: {
1945 if (is_accelerator_specified) {
1946 ExpectMaxOpVersion(version, 3, &val_ctx);
1947 } else {
1948 ExpectMaxOpVersion(version, 2, &val_ctx);
1949 }
1950 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1951 ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1952 if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1953 Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data)
1954 ->activation == kTfLiteActNone,
1955 NNAPIValidationFailureType::kNoActivationExpected,
1956 "No activation function supported", &val_ctx);
1957 }
1958 } else {
1959 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1960 }
1961 } break;
1962 case kTfLiteBuiltinAveragePool2d: {
1963 ExpectMaxOpVersion(version, 2, &val_ctx);
1964 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1965 auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1966 // TODO(b/138756912): Large filter window would overflow on the
1967 // quantized reference CPU path.
1968 if (IsQuantized(context->tensors[node->inputs->data[0]].type)) {
1969 Expect(is_accelerator_specified ||
1970 (builtin->filter_width * builtin->filter_height <= 256),
1971 NNAPIValidationFailureType::kUnsupportedOperandSize,
1972 "Large filter window would overflow on the reference CPU path",
1973 &val_ctx);
1974 }
1975 } break;
1976 case kTfLiteBuiltinMaxPool2d: {
1977 ExpectMaxOpVersion(version, 2, &val_ctx);
1978 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1979 } break;
1980 case kTfLiteBuiltinL2Pool2d: {
1981 ExpectOpVersion(version, 1, &val_ctx);
1982 ExpectIsFloatOperator(context, node, &val_ctx);
1983
1984 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1985 auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1986 Expect(builtin->activation == kTfLiteActNone,
1987 NNAPIValidationFailureType::kUnsupportedOperandValue,
1988 "Before NNAPI 1.2 fused activation for l2_pool may not be "
1989 "supported.",
1990 &val_ctx);
1991 }
1992 } break;
1993 case kTfLiteBuiltinConv2d: {
1994 ExpectMaxOpVersion(version, 3, &val_ctx);
1995 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1996 Expect(!IsHybridOperator(context, builtin_code, node),
1997 NNAPIValidationFailureType::kUnsupportedHybridOperator,
1998 "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
1999 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2000
2001 const auto& filter_tensor = context->tensors[node->inputs->data[1]];
2002 if (filter_tensor.quantization.type == kTfLiteAffineQuantization) {
2003 TfLiteAffineQuantization* quantization_params =
2004 static_cast<TfLiteAffineQuantization*>(
2005 filter_tensor.quantization.params);
2006 Expect(quantization_params->scale->size <= 1,
2007 NNAPIValidationFailureType::kUnsupportedQuantizationType,
2008 "Per-channel quantized convolution not supported before NNAPI "
2009 "1.2.",
2010 &val_ctx);
2011 }
2012 }
2013 const auto input_type = context->tensors[node->inputs->data[0]].type;
2014 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2015 input_type == kTfLiteUInt8) {
2016 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2017 }
2018 auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
2019 // TODO(b/132950584): Add support for Conv2D with omitted bias.
2020 Expect(node->inputs->size == 3,
2021 NNAPIValidationFailureType::kMissingRequiredOperand,
2022 "Conv2D with omitted bias not supported", &val_ctx);
2023 if (builtin->dilation_width_factor != 1 ||
2024 builtin->dilation_height_factor != 1) {
2025 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2026 NNAPIValidationFailureType::kUnsupportedOperandValue,
2027 "NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx);
2028 }
2029 } break;
2030 case kTfLiteBuiltinDepthwiseConv2d: {
2031 ExpectMaxOpVersion(version, 3, &val_ctx);
2032
2033 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2034 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2035
2036 const auto input_type = context->tensors[node->inputs->data[0]].type;
2037 if (input_type == kTfLiteUInt8) {
2038 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2039 }
2040
2041 auto builtin =
2042 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
2043 Expect(builtin->dilation_width_factor == 1 &&
2044 builtin->dilation_height_factor == 1,
2045 NNAPIValidationFailureType::kUnsupportedOperandValue,
2046 "dilation_width_factor and dilation_height_factor expected to "
2047 "be equal to 1",
2048 &val_ctx);
2049 }
2050 } break;
2051 case kTfLiteBuiltinFullyConnected: {
2052 ExpectMaxOpVersion(version, 5, &val_ctx);
2053 const auto output_type = context->tensors[node->outputs->data[0]].type;
2054 Expect(output_type != kTfLiteInt16,
2055 NNAPIValidationFailureType::kUnsupportedOutputType,
2056 "Unsupported output of type kTfLiteInt16", &val_ctx);
2057 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2058 Expect(!IsHybridOperator(context, builtin_code, node),
2059 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2060 "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
2061 ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2062 }
2063 const auto input_type = context->tensors[node->inputs->data[0]].type;
2064 if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2065 input_type == kTfLiteUInt8) {
2066 ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2067 }
2068 auto builtin =
2069 reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
2070 if (builtin->keep_num_dims) {
2071 ExpectMinAndroidSdkVersion(android_sdk_version,
2072 kMinSdkVersionForNNAPI13, &val_ctx);
2073 }
2074 } break;
2075 case kTfLiteBuiltinHardSwish: {
2076 // Add support for hardswish. For Pre-Q devices, deconstructing it into
2077 // basic ops. Though for some nnapi accelerators using optimized tflite
2078 // kernels might even be faster.
2079 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2080 } break;
2081 case kTfLiteBuiltinSoftmax: {
2082 ExpectOpVersion(version, 2, &val_ctx);
2083 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2084 const auto& output = context->tensors[node->outputs->data[0]];
2085 ExpectTypeIn(output.type, {kTfLiteFloat32, kTfLiteUInt8, kTfLiteInt8},
2086 NNAPIValidationFailureType::kUnsupportedOutputType,
2087 "Output type should be one of kTfLiteFloat32, kTfLiteUInt8, "
2088 "kTfLiteInt8.",
2089 &val_ctx);
2090 const auto& input = context->tensors[node->inputs->data[0]];
2091 const int input_rank = input.dims->size;
2092 Expect(input_rank <= 4,
2093 NNAPIValidationFailureType::kUnsupportedOperandRank,
2094 "Input rank should be <= 4", &val_ctx);
2095 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2096 Expect(
2097 input_rank == 2 || input_rank == 4,
2098 NNAPIValidationFailureType::kUnsupportedOperandRank,
2099 "Before API level 29 only 2D and 4D input tensors were supported.",
2100 &val_ctx);
2101 }
2102 } break;
2103 case kTfLiteBuiltinReshape: {
2104 ExpectOpVersion(version, 1, &val_ctx);
2105 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2106 if (node->inputs->size >= 2) {
2107 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2108 kTfLiteMmapRo,
2109 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2110 "The shape input tensor must be constant.", &val_ctx);
2111 }
2112 if (node->inputs->size == 1) {
2113 // reject scalar reshaping
2114 auto* params =
2115 reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data);
2116 int num_dimensions = params->num_dimensions;
2117 if (num_dimensions == 1 && params->shape[0] == 0) {
2118 // Legacy tflite models use a shape parameter of [0] to indicate
2119 // scalars.
2120 num_dimensions = 0;
2121 }
2122 Expect(num_dimensions > 0,
2123 NNAPIValidationFailureType::kUnsupportedOperandRank,
2124 "New shape rank should be > 0", &val_ctx);
2125 }
2126 } break;
2127 case kTfLiteBuiltinResizeBilinear: {
2128 ExpectMaxOpVersion(version, 3, &val_ctx);
2129 const auto& input = context->tensors[node->inputs->data[0]];
2130 const auto output_dims = context->tensors[node->outputs->data[0]].dims;
2131 Expect(input.dims->size == 4,
2132 NNAPIValidationFailureType::kUnsupportedOperandRank,
2133 "Input should have rank 4", &val_ctx);
2134 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2135 Expect(node->inputs->size >= 2,
2136 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2137 "Expected at least 2 inputs", &val_ctx);
2138 if (node->inputs->size >= 2) {
2139 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2140 kTfLiteMmapRo,
2141 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2142 "The size input tensor must be constant.", &val_ctx);
2143 }
2144 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2145 Expect(output_dims->data[1] == output_dims->data[2],
2146 NNAPIValidationFailureType::kUnsupportedOperandValue,
2147 "Require width == height due to driver differences in NNAPI "
2148 "< 1.2",
2149 &val_ctx);
2150 }
2151 auto builtin =
2152 reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
2153 if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2154 Expect(!builtin->align_corners,
2155 NNAPIValidationFailureType::kUnsupportedOperandValue,
2156 "NNAPI does not support align_corners == true.", &val_ctx);
2157 Expect(!builtin->half_pixel_centers,
2158 NNAPIValidationFailureType::kUnsupportedOperandValue,
2159 "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2160 }
2161 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2162 Expect(input.type == kTfLiteFloat32,
2163 NNAPIValidationFailureType::kUnsupportedInputType,
2164 "NNAPI 1.0 & 1.1 only supports float input.", &val_ctx);
2165 }
2166 } break;
2167 case kTfLiteBuiltinResizeNearestNeighbor: {
2168 ExpectMaxOpVersion(version, 3, &val_ctx);
2169 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2170 &val_ctx);
2171 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2172 Expect(node->inputs->size >= 2,
2173 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2174 "Expected at least 2 inputs", &val_ctx);
2175 if (node->inputs->size >= 2) {
2176 Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2177 kTfLiteMmapRo,
2178 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2179 "The size input tensor must be constant.", &val_ctx);
2180 }
2181 auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
2182 node->builtin_data);
2183 if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2184 Expect(!builtin->align_corners,
2185 NNAPIValidationFailureType::kUnsupportedOperandValue,
2186 "NNAPI does not support align_corners == true.", &val_ctx);
2187 Expect(!builtin->half_pixel_centers,
2188 NNAPIValidationFailureType::kUnsupportedOperandValue,
2189 "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2190 }
2191 } break;
2192 case kTfLiteBuiltinSqueeze: {
2193 ExpectOpVersion(version, 1, &val_ctx);
2194 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2195 &val_ctx);
2196 auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
2197 if (android_sdk_version == kMinSdkVersionForNNAPI11) {
2198 Expect(builtin->num_squeeze_dims != 0,
2199 NNAPIValidationFailureType::kUnsupportedOperandValue,
2200 "NNAPI 1.1 does not support null squeeze_dims properly.",
2201 &val_ctx);
2202 }
2203 } break;
2204 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2205 ExpectMaxOpVersion(version, 2, &val_ctx);
2206 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2207 &val_ctx);
2208
2209 Expect(!IsHybridOperator(context, builtin_code, node),
2210 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2211 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2212
2213 Expect(node->inputs->size == 20 || node->inputs->size == 24,
2214 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2215 "Supporting only operation with 20 or 24 inputs", &val_ctx);
2216 } break;
2217 case kTfLiteBuiltinL2Normalization: {
2218 ExpectMaxOpVersion(version, 2, &val_ctx);
2219
2220 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2221 ExpectIsFloatOperator(context, node, &val_ctx);
2222
2223 const auto& input = context->tensors[node->inputs->data[0]];
2224 Expect(input.dims->size == 4,
2225 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2226 "Expected 4 inputs", &val_ctx);
2227 }
2228 auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
2229 Expect(builtin->activation == kTfLiteActNone,
2230 NNAPIValidationFailureType::kNoActivationExpected,
2231 "Expected no activation", &val_ctx);
2232 } break;
2233 case kTfLiteBuiltinLocalResponseNormalization: {
2234 ExpectOpVersion(version, 1, &val_ctx);
2235 } break;
2236 case kTfLiteBuiltinLshProjection: {
2237 ExpectOpVersion(version, 1, &val_ctx);
2238
2239 if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
2240 ->type == kTfLiteLshProjectionSparse) {
2241 // NNAPI does not support sparse projection correctly pre-Q
2242 // (b/111751836).
2243 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2244 NNAPIValidationFailureType::kUnsupportedInputType,
2245 "NNAPI does not support sparse projection correctly pre-Q",
2246 &val_ctx);
2247 Expect(node->inputs->size == 2,
2248 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2249 " NNAPI does not support weights for sparse projects.",
2250 &val_ctx);
2251 }
2252 } break;
2253 case kTfLiteBuiltinConcatenation: {
2254 ExpectMaxOpVersion(version, 2, &val_ctx);
2255 Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
2256 ->activation == kTfLiteActNone,
2257 NNAPIValidationFailureType::kNoActivationExpected,
2258 "No activation function supported", &val_ctx);
2259 Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
2260 NNAPIValidationFailureType::kUnsupportedOperandRank,
2261 "Input rank should be less than 4", &val_ctx);
2262
2263 const auto& input_type = context->tensors[node->inputs->data[0]].type;
2264 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2265 kTfLiteUInt8, kTfLiteInt8);
2266
2267 if (input_type == kTfLiteUInt8 &&
2268 android_sdk_version < kMinSdkVersionForNNAPI12) {
2269 auto first_param = context->tensors[node->inputs->data[0]].params;
2270 for (int i = 1; i < node->inputs->size; i++) {
2271 auto curr_param = context->tensors[node->inputs->data[i]].params;
2272 if (!Expect(curr_param.scale == first_param.scale &&
2273 curr_param.zero_point == first_param.zero_point,
2274 NNAPIValidationFailureType::kUnsupportedOperandValue,
2275 "NNAPI 1.0-1 only supported concatenating quantized "
2276 "tensor of the same scale and offset.",
2277 &val_ctx)) {
2278 break;
2279 }
2280 }
2281 }
2282 } break;
2283 case kTfLiteBuiltinDequantize: {
2284 // Allow dequantizing fp16->fp32.
2285 if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2286 context->tensors[node->inputs->data[0]].type == kTfLiteFloat16 &&
2287 context->tensors[node->inputs->data[0]].allocation_type !=
2288 kTfLiteMmapRo) {
2289 return true;
2290 }
2291 Expect(version == 1 || version == 2,
2292 NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2293 "Supported op versions are 1 and 2 only", &val_ctx);
2294
2295 const auto& input = context->tensors[node->inputs->data[0]];
2296 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2297 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
2298 } else {
2299 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
2300
2301 if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
2302 input.type == kTfLiteInt8) {
2303 const auto zero_point = input.params.zero_point;
2304 Expect(zero_point == 0,
2305 NNAPIValidationFailureType::kUnsupportedInputType,
2306 "NN API supports int8 type since version 1.2 but only for "
2307 "symmetric quantization.",
2308 &val_ctx);
2309 }
2310 }
2311 } break;
2312 case kTfLiteBuiltinDensify: {
2313 // Allow densifying sparse weights.
2314 if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2315 context->tensors[node->inputs->data[0]].allocation_type ==
2316 kTfLiteMmapRo) {
2317 return true;
2318 }
2319 return false;
2320 } break;
2321 case kTfLiteBuiltinFloor: {
2322 ExpectOpVersion(version, 1, &val_ctx);
2323 } break;
2324 case kTfLiteBuiltinRelu:
2325 case kTfLiteBuiltinReluN1To1:
2326 case kTfLiteBuiltinRelu6:
2327 case kTfLiteBuiltinLogistic: {
2328 ExpectMaxOpVersion(version, 2, &val_ctx);
2329 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2330 } break;
2331 case kTfLiteBuiltinTanh: {
2332 ExpectMaxOpVersion(version, 2, &val_ctx);
2333 const TfLiteType input_type =
2334 context->tensors[node->inputs->data[0]].type;
2335 Expect(IsFloat(input_type) ||
2336 (IsQuantized(input_type) &&
2337 android_sdk_version >= kMinSdkVersionForNNAPI12),
2338 NNAPIValidationFailureType::kUnsupportedInputType,
2339 " NNAPI only support float tanh.", &val_ctx);
2340 } break;
2341 case kTfLiteBuiltinSub: {
2342 ExpectMaxOpVersion(version, 3, &val_ctx);
2343 const TfLiteType input_type =
2344 context->tensors[node->inputs->data[0]].type;
2345 Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
2346 IsFloat(input_type)) ||
2347 (android_sdk_version >= kMinSdkVersionForNNAPI12 &&
2348 IsQuantized(input_type)) ||
2349 (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2350 IsInt32(input_type)),
2351 NNAPIValidationFailureType::kUnsupportedInputType,
2352 "NNAPI only support float sub.", &val_ctx);
2353 if (IsInt32(input_type)) {
2354 Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data)
2355 ->activation == kTfLiteActNone,
2356 NNAPIValidationFailureType::kNoActivationExpected,
2357 "No activation function supported", &val_ctx);
2358 }
2359 const int input0_rank =
2360 context->tensors[node->inputs->data[0]].dims->size;
2361 const int input1_rank =
2362 context->tensors[node->inputs->data[1]].dims->size;
2363 Expect(input0_rank <= 4 && input1_rank <= 4,
2364 NNAPIValidationFailureType::kUnsupportedOperandRank,
2365 "Input rank must be <= 4", &val_ctx);
2366 } break;
2367 case kTfLiteBuiltinDiv: {
2368 ExpectOpVersion(version, 1, &val_ctx);
2369 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2370 &val_ctx);
2371 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2372 NNAPIValidationFailureType::kUnsupportedInputType,
2373 "NNAPI only support float div.", &val_ctx);
2374 } break;
2375 case kTfLiteBuiltinPad:
2376 case kTfLiteBuiltinPadv2: {
2377 ExpectMaxOpVersion(version, 2, &val_ctx);
2378 ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2379 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2380 &val_ctx);
2381
2382 const TfLiteIntArrayView input_shape(
2383 context->tensors[node->inputs->data[0]].dims);
2384 Expect(!HasZeroes(input_shape),
2385 NNAPIValidationFailureType::kUnsupportedOperandValue,
2386 "NN API pad ops do not support input tensors with no elements",
2387 &val_ctx);
2388
2389 Expect(node->inputs->size >= 2,
2390 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2391 "Expecting at least 2 inputs", &val_ctx);
2392
2393 if (node->inputs->size == 3) {
2394 // This is going to be mapped with a PadV2
2395 Expect(
2396 android_sdk_version >= kMinSdkVersionForNNAPI12,
2397 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2398 "Specification of the padding value is supported from NNAPI 1.2.",
2399 &val_ctx);
2400 } else { // this is going to be mapped as Pad
2401 if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2402 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2403 NNAPIValidationFailureType::kUnsupportedInputType,
2404 "Only Float32 inputs are supported before NNAPI 1.2",
2405 &val_ctx);
2406 }
2407 }
2408 } break;
2409 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
2410 ExpectOpVersion(version, 1, &val_ctx);
2411 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2412 &val_ctx);
2413 Expect(!IsHybridOperator(context, builtin_code, node),
2414 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2415 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2416 } break;
2417 case kTfLiteBuiltinSpaceToBatchNd: {
2418 ExpectMaxOpVersion(version, 2, &val_ctx);
2419 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2420 &val_ctx);
2421 } break;
2422 case kTfLiteBuiltinBatchToSpaceNd: {
2423 ExpectMaxOpVersion(version, 2, &val_ctx);
2424 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2425 &val_ctx);
2426 auto crops = context->tensors[node->inputs->data[2]];
2427 auto crops_data = crops.data.i32;
2428 Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 &&
2429 crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0,
2430 NNAPIValidationFailureType::kUnsupportedOperandValue,
2431 "All crops should be 0.", &val_ctx);
2432 } break;
2433 case kTfLiteBuiltinStridedSlice: {
2434 ExpectMaxOpVersion(version, 2, &val_ctx);
2435 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2436 &val_ctx);
2437 } break;
2438 case kTfLiteBuiltinTranspose: {
2439 ExpectMaxOpVersion(version, 2, &val_ctx);
2440 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2441 &val_ctx);
2442 // Note that the permutation input tensor value dictates the output
2443 // dimensions.
2444 // TODO(b/110888333): Support dynamically-sized tensors in delegates.
2445 Expect((node->inputs->size > 1) &&
2446 (context->tensors[node->inputs->data[1]].allocation_type ==
2447 kTfLiteMmapRo),
2448 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2449 "Dynamically-sized tensors not supported.", &val_ctx);
2450 } break;
2451 case kTfLiteBuiltinAbs:
2452 case kTfLiteBuiltinExp:
2453 case kTfLiteBuiltinLog:
2454 case kTfLiteBuiltinRsqrt:
2455 case kTfLiteBuiltinPow: {
2456 ExpectOpVersion(version, 1, &val_ctx);
2457 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2458 &val_ctx);
2459 ExpectIsFloatOperator(context, node, &val_ctx);
2460 } break;
2461 case kTfLiteBuiltinSlice: {
2462 ExpectMaxOpVersion(version, 2, &val_ctx);
2463 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2464 &val_ctx);
2465 const auto input_type = context->tensors[node->inputs->data[0]].type;
2466 const auto begin_type = context->tensors[node->inputs->data[1]].type;
2467 const auto size_type = context->tensors[node->inputs->data[2]].type;
2468 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2469 kTfLiteUInt8, kTfLiteInt8);
2470 Expect(begin_type == kTfLiteInt32,
2471 NNAPIValidationFailureType::kUnsupportedInputType,
2472 "Begin type should be Int32", &val_ctx);
2473 Expect(size_type == kTfLiteInt32,
2474 NNAPIValidationFailureType::kUnsupportedInputType,
2475 "Size type should be Int32", &val_ctx);
2476 } break;
2477 case kTfLiteBuiltinSin: {
2478 ExpectOpVersion(version, 1, &val_ctx);
2479 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2480 &val_ctx);
2481 ExpectIsFloatOperator(context, node, &val_ctx);
2482 } break;
2483 case kTfLiteBuiltinTransposeConv: {
2484 ExpectMaxOpVersion(version, 3, &val_ctx);
2485 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2486 &val_ctx);
2487 Expect((node->inputs->size > 1) &&
2488 (context->tensors[node->inputs->data[0]].allocation_type ==
2489 kTfLiteMmapRo) &&
2490 (context->tensors[node->inputs->data[1]].allocation_type ==
2491 kTfLiteMmapRo),
2492 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2493 "Dynamically-sized tensors not supported.", &val_ctx);
2494 } break;
2495 case kTfLiteBuiltinSqrt: {
2496 ExpectOpVersion(version, 1, &val_ctx);
2497 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2498 &val_ctx);
2499 ExpectIsFloatOperator(context, node, &val_ctx);
2500 } break;
2501 case kTfLiteBuiltinRnn: {
2502 ExpectOpVersion(version, 1, &val_ctx);
2503 Expect(node->inputs->size == 5,
2504 NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2505 "Expected 5 input", &val_ctx);
2506 if (node->inputs->size >= 2) {
2507 Expect(
2508 context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2509 kTfLiteFloat32,
2510 NNAPIValidationFailureType::kUnsupportedInputType,
2511 "NNAPI only support float32 weights.", &val_ctx);
2512 }
2513 } break;
2514 case kTfLiteBuiltinSpaceToDepth: {
2515 ExpectMaxOpVersion(version, 2, &val_ctx);
2516 const TfLiteType input_type =
2517 context->tensors[node->inputs->data[0]].type;
2518 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2519 kTfLiteInt8);
2520 } break;
2521 case kTfLiteBuiltinSvdf: {
2522 ExpectOpVersion(version, 1, &val_ctx);
2523 Expect(node->inputs->size == 5,
2524 NNAPIValidationFailureType::kUnsupportedOperandRank,
2525 "Expected input of rank 5", &val_ctx);
2526 if (node->inputs->size >= 2) {
2527 Expect(
2528 context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2529 kTfLiteFloat32,
2530 NNAPIValidationFailureType::kUnsupportedInputType,
2531 "NNAPI only support float32 weights.", &val_ctx);
2532 }
2533 Expect(android_sdk_version >= kMinSdkVersionForNNAPI11,
2534 NNAPIValidationFailureType::kUnsupportedOperandRank,
2535 "SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx);
2536 Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
2537 .type == kTfLiteFloat32,
2538 NNAPIValidationFailureType::kUnsupportedInputType,
2539 "Weights should be Float32", &val_ctx);
2540 } break;
2541 case kTfLiteBuiltinLstm: {
2542 ExpectMaxOpVersion(version, 3, &val_ctx);
2543 Expect(
2544 android_sdk_version >= kMinSdkVersionForNNAPI11,
2545 NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2546 "NNAPI 1.0 has a bug for optional tensors which would affect LSTM.",
2547 &val_ctx);
2548 Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 ||
2549 !IsHybridOperator(context, builtin_code, node),
2550 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2551 "Hybrid operators not supported before NNAPI 1.2.", &val_ctx);
2552
2553 const auto weight_input_index =
2554 isLstmBasicKernel(node) ? 2 /* basic::kInputWeights */
2555 : 4 /* full::kInputToOutputWeightsTensor */;
2556
2557 const TfLiteType weight_type =
2558 context->tensors[node->inputs->data[weight_input_index]].type;
2559
2560 if (isLstmBasicKernel(node)) {
2561 Expect(weight_type == kTfLiteUInt8,
2562 NNAPIValidationFailureType::kUnsupportedInputType,
2563 "Basic LSTM Kernels support only UINT8 weights", &val_ctx);
2564
2565 const auto input_quantization_params =
2566 context->tensors[node->inputs->data[0]].params;
2567 Expect(input_quantization_params.scale == 1. / 128. &&
2568 input_quantization_params.zero_point == 128,
2569 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2570 "Invalid input quantization", &val_ctx);
2571
2572 const auto output_quantization_params =
2573 context->tensors[node->outputs->data[0]].params;
2574 Expect(output_quantization_params.scale == 1. / 128. &&
2575 output_quantization_params.zero_point == 128,
2576 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2577 "Invalid output quantization", &val_ctx);
2578
2579 const auto cell_state_quantization_params =
2580 context->tensors[node->outputs->data[1]].params;
2581 Expect(cell_state_quantization_params.scale == 16. / 32768. ||
2582 cell_state_quantization_params.zero_point == 0,
2583 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2584 "Invalid cell state quantization", &val_ctx);
2585
2586 auto is_const_tensor = [&node, &context](int tensor_idx) {
2587 return context->tensors[node->inputs->data[tensor_idx]]
2588 .allocation_type == kTfLiteMmapRo;
2589 };
2590
2591 Expect(is_const_tensor(2 /* kInputWeights */),
2592 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2593 "Weights tensor should be constant", &val_ctx);
2594 Expect(is_const_tensor(3 /* kInputBiases */),
2595 NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2596 "Biases tensor should be constant", &val_ctx);
2597
2598 return val_ctx.is_valid;
2599 } else {
2600 if (node->inputs->size == 24) {
2601 ExpectMinAndroidSdkVersion(android_sdk_version,
2602 kMinSdkVersionForNNAPI12, &val_ctx);
2603 }
2604
2605 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2606 Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 ||
2607 weight_type == kTfLiteInt8,
2608 NNAPIValidationFailureType::kUnsupportedInputType,
2609 "Weight has to be Float32 or UINT8 or INT8", &val_ctx);
2610 } else {
2611 Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8,
2612 NNAPIValidationFailureType::kUnsupportedInputType,
2613 "Weight has to be Float32 or UINT8", &val_ctx);
2614 }
2615 }
2616 } break;
2617 case kTfLiteBuiltinMean: {
2618 ExpectMaxOpVersion(version, 2, &val_ctx);
2619 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2620 &val_ctx);
2621 if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
2622 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
2623 IsQuantized(context->tensors[node->inputs->data[0]].type),
2624 NNAPIValidationFailureType::kUnsupportedInputType,
2625 "Expected Float32 or Quantized input", &val_ctx);
2626 } else {
2627 Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2628 NNAPIValidationFailureType::kUnsupportedInputType,
2629 "Expected Float32 input", &val_ctx);
2630 }
2631 Expect(context->tensors[node->outputs->data[0]].dims->size > 0,
2632 NNAPIValidationFailureType::kUnsupportedOutputType,
2633 "NNAPI does not support generating a scalar as output for MEAN.",
2634 &val_ctx);
2635
2636 auto input_param = context->tensors[node->inputs->data[0]].params;
2637 auto output_param = context->tensors[node->outputs->data[0]].params;
2638 Expect(input_param.scale == output_param.scale &&
2639 input_param.zero_point == output_param.zero_point,
2640 NNAPIValidationFailureType::kUnsupportedOutputType,
2641 "NNAPI requires that the input and output have the same "
2642 "quantization parameters.",
2643 &val_ctx);
2644 } break;
2645 case kTfLiteBuiltinEmbeddingLookup: {
2646 ExpectOpVersion(version, 1, &val_ctx);
2647 Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32,
2648 NNAPIValidationFailureType::kUnsupportedInputType,
2649 "NNAPI only support float32 values.", &val_ctx);
2650 } break;
2651 case kTfLiteBuiltinHashtableLookup: {
2652 ExpectOpVersion(version, 1, &val_ctx);
2653 Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32,
2654 NNAPIValidationFailureType::kUnsupportedOutputType,
2655 "NNAPI only support float32 output.", &val_ctx);
2656 } break;
2657 case kTfLiteBuiltinMaximum:
2658 case kTfLiteBuiltinMinimum: {
2659 ExpectMaxOpVersion(version, 3, &val_ctx);
2660 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2661 &val_ctx);
2662 const auto input_type = context->tensors[node->inputs->data[0]].type;
2663 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2664 kTfLiteInt8, kTfLiteInt32);
2665 const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]];
2666 if (operand0.dims->size == 0) {
2667 Expect(operand0.allocation_type == kTfLiteMmapRo,
2668 NNAPIValidationFailureType::kUnsupportedInputType,
2669 "Scalar operand should be constant", &val_ctx);
2670 }
2671 const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]];
2672 if (operand1.dims->size == 0) {
2673 Expect(operand1.allocation_type == kTfLiteMmapRo,
2674 NNAPIValidationFailureType::kUnsupportedInputType,
2675 "Scalar operand should be constant", &val_ctx);
2676 }
2677 } break;
2678 case kTfLiteBuiltinCast: {
2679 ExpectOpVersion(version, 1, &val_ctx);
2680 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2681 &val_ctx);
2682 const TfLiteType input_type =
2683 context->tensors[node->inputs->data[0]].type;
2684 const TfLiteType output_type =
2685 context->tensors[node->outputs->data[0]].type;
2686 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2687 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2688 kTfLiteUInt8, kTfLiteInt8);
2689
2690 ExpectTypeIn(
2691 output_type,
2692 {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
2693 NNAPIValidationFailureType::kUnsupportedOutputType,
2694 "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2695 "kTfLiteUInt8, kTfLiteInt8.",
2696 &val_ctx);
2697 } else {
2698 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2699 kTfLiteUInt8);
2700
2701 ExpectTypeIn(
2702 output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
2703 NNAPIValidationFailureType::kUnsupportedOutputType,
2704 "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2705 "kTfLiteUInt8.",
2706 &val_ctx);
2707 }
2708 } break;
2709 case kTfLiteBuiltinLeakyRelu:
2710 case kTfLiteBuiltinPrelu: {
2711 ExpectOpVersion(version, 1, &val_ctx);
2712 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2713 &val_ctx);
2714 const auto input_type = context->tensors[node->inputs->data[0]].type;
2715 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2716 kTfLiteInt8);
2717 } break;
2718 case kTfLiteBuiltinTile: {
2719 ExpectOpVersion(version, 1, &val_ctx);
2720 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2721 &val_ctx);
2722 const auto input_type = context->tensors[node->inputs->data[0]].type;
2723 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8,
2724 kTfLiteUInt8, kTfLiteInt32);
2725 const auto multipliers_type =
2726 context->tensors[node->inputs->data[1]].type;
2727 Expect(multipliers_type == kTfLiteInt32,
2728 NNAPIValidationFailureType::kUnsupportedInputType,
2729 "Multipliers should be Int32", &val_ctx);
2730 } break;
2731 case kTfLiteBuiltinLogicalOr:
2732 case kTfLiteBuiltinLogicalAnd:
2733 case kTfLiteBuiltinLogicalNot: {
2734 ExpectOpVersion(version, 1, &val_ctx);
2735 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2736 &val_ctx);
2737 const auto input_type = context->tensors[node->inputs->data[0]].type;
2738 Expect(input_type == kTfLiteBool,
2739 NNAPIValidationFailureType::kUnsupportedInputType,
2740 "Input should be bool", &val_ctx);
2741 } break;
2742 case kTfLiteBuiltinLess:
2743 case kTfLiteBuiltinLessEqual:
2744 case kTfLiteBuiltinGreater:
2745 case kTfLiteBuiltinGreaterEqual:
2746 case kTfLiteBuiltinEqual:
2747 case kTfLiteBuiltinNotEqual: {
2748 ExpectMaxOpVersion(version, 2, &val_ctx);
2749 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2750 &val_ctx);
2751 const auto input_type = context->tensors[node->inputs->data[0]].type;
2752 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2753 kTfLiteInt8, kTfLiteBool, kTfLiteInt32);
2754 } break;
2755 case kTfLiteBuiltinNeg: {
2756 ExpectMaxOpVersion(version, 2, &val_ctx);
2757 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2758 &val_ctx);
2759 const auto input_type = context->tensors[node->inputs->data[0]].type;
2760 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32);
2761 } break;
2762 case kTfLiteBuiltinTopkV2: {
2763 ExpectMaxOpVersion(version, 2, &val_ctx);
2764 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2765 &val_ctx);
2766 const auto& input_type = context->tensors[node->inputs->data[0]].type;
2767 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2768 kTfLiteUInt8, kTfLiteInt8);
2769 const auto& k_param = context->tensors[node->inputs->data[1]];
2770 Expect(k_param.type == kTfLiteInt32 &&
2771 k_param.allocation_type == kTfLiteMmapRo,
2772 NNAPIValidationFailureType::kUnsupportedInputType,
2773 "K param should be a constant of type Int32", &val_ctx);
2774 } break;
2775 case kTfLiteBuiltinSelect: {
2776 ExpectMaxOpVersion(version, 2, &val_ctx);
2777 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2778 &val_ctx);
2779 const auto value_type = context->tensors[node->inputs->data[1]].type;
2780 EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32,
2781 kTfLiteUInt8, kTfLiteInt8);
2782 TfLiteIntArray* condition_shape =
2783 context->tensors[node->inputs->data[0]].dims;
2784 TfLiteIntArray* input_shape =
2785 context->tensors[node->inputs->data[1]].dims;
2786 Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
2787 NNAPIValidationFailureType::kUnsupportedOperandValue,
2788 "Condition and inputs tensors should have the same shape",
2789 &val_ctx);
2790 } break;
2791 case kTfLiteBuiltinGather: {
2792 ExpectOpVersion(version, 2, &val_ctx);
2793 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2794 &val_ctx);
2795 const auto input_type = context->tensors[node->inputs->data[0]].type;
2796 const auto& positions = context->tensors[node->inputs->data[1]];
2797
2798 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2799 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2800
2801 Expect(positions.type == kTfLiteInt32,
2802 NNAPIValidationFailureType::kUnsupportedInputType,
2803 "Positions type should be one of kTfLiteInt32", &val_ctx);
2804 Expect(positions.dims->size != 0,
2805 NNAPIValidationFailureType::kUnsupportedOperandRank,
2806 "0-dimension args are not supported by NNAPI.", &val_ctx);
2807 } break;
2808 case kTfLiteBuiltinBidirectionalSequenceLstm: {
2809 ExpectOpVersion(version, 1, &val_ctx);
2810 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2811 &val_ctx);
2812 Expect(!IsHybridOperator(context, builtin_code, node),
2813 NNAPIValidationFailureType::kUnsupportedHybridOperator,
2814 "Hybrid version of this op is not supported by NN API.", &val_ctx);
2815 } break;
2816 case kTfLiteBuiltinExpandDims: {
2817 ExpectOpVersion(version, 1, &val_ctx);
2818 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2819 &val_ctx);
2820 const auto input_type = context->tensors[node->inputs->data[0]].type;
2821 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2822 kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2823 const auto axis = context->tensors[node->inputs->data[1]];
2824 Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2825 NNAPIValidationFailureType::kUnsupportedInputType,
2826 "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2827 } break;
2828 case kTfLiteBuiltinSplit: {
2829 ExpectOpVersion(version, 3, &val_ctx);
2830 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2831 &val_ctx);
2832 // Tensor indices: split_dim: 0, value: 1
2833 const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
2834 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2835 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2836 kTfLiteInt8, kTfLiteInt32);
2837 } else {
2838 EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2839 kTfLiteInt32);
2840 }
2841 const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
2842 Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2843 NNAPIValidationFailureType::kUnsupportedInputType,
2844 "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2845 } break;
2846 case kTfLiteBuiltinLogSoftmax: {
2847 ExpectOpVersion(version, 1, &val_ctx);
2848 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2849 &val_ctx);
2850 const auto input_type = context->tensors[node->inputs->data[0]].type;
2851 Expect(input_type == kTfLiteFloat32,
2852 NNAPIValidationFailureType::kUnsupportedInputType,
2853 "Input should be Float32.", &val_ctx);
2854 } break;
2855 case kTfLiteBuiltinQuantize: {
2856 ExpectMaxOpVersion(version, 2, &val_ctx);
2857 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2858 &val_ctx);
2859 const auto value_type = context->tensors[node->inputs->data[0]].type;
2860 Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
2861 NNAPIValidationFailureType::kUnsupportedInputType,
2862 "Value should be quantized or Float32.", &val_ctx);
2863 if (IsQuantized(value_type)) {
2864 const auto quantization_params =
2865 context->tensors[node->inputs->data[0]].params;
2866 Expect(quantization_params.scale > 0.f,
2867 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2868 "Quantization scale should be > 0.", &val_ctx);
2869 }
2870 const auto output_type = context->tensors[node->outputs->data[0]].type;
2871 if (android_sdk_version < kMinSdkVersionForNNAPI13) {
2872 Expect(output_type == kTfLiteUInt8,
2873 NNAPIValidationFailureType::kUnsupportedOutputType,
2874 "Output should be kTfLiteUInt8.", &val_ctx);
2875 } else {
2876 ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
2877 NNAPIValidationFailureType::kUnsupportedOutputType,
2878 "Output should be kTfLiteUInt8.", &val_ctx);
2879 }
2880 const auto quantization_params =
2881 context->tensors[node->outputs->data[0]].params;
2882 Expect(quantization_params.scale > 0.f,
2883 NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2884 "Quantization scale should be > 0.", &val_ctx);
2885 } break;
2886 case kTfLiteBuiltinReduceAny: {
2887 ExpectOpVersion(version, 2, &val_ctx);
2888 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2889 &val_ctx);
2890 Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2891 NNAPIValidationFailureType::kUnsupportedOutputType,
2892 "NNAPI does not support generating a scalar as output.", &val_ctx);
2893 } break;
2894 case kTfLiteBuiltinReduceMin:
2895 case kTfLiteBuiltinReduceMax: {
2896 ExpectMaxOpVersion(version, 2, &val_ctx);
2897 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2898 &val_ctx);
2899 const auto input_tensor = context->tensors[node->inputs->data[0]];
2900 const auto input_type = input_tensor.type;
2901 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2902 kTfLiteInt8);
2903 Expect(input_tensor.dims->size != 0,
2904 NNAPIValidationFailureType::kUnsupportedOutputType,
2905 "NNAPI does not support generating a scalar as output.", &val_ctx);
2906 } break;
2907 case kTfLiteBuiltinDepthToSpace: {
2908 const TfLiteType input_type =
2909 context->tensors[node->inputs->data[0]].type;
2910 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2911 kTfLiteInt8);
2912 } break;
2913 case kTfLiteBuiltinReduceProd:
2914 case kTfLiteBuiltinSum: {
2915 ExpectOpVersion(version, 1, &val_ctx);
2916 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2917 &val_ctx);
2918 Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2919 NNAPIValidationFailureType::kUnsupportedOutputType,
2920 "NNAPI does not support generating a scalar as output", &val_ctx);
2921 const auto input_type = context->tensors[node->inputs->data[0]].type;
2922 Expect(input_type == kTfLiteFloat32,
2923 NNAPIValidationFailureType::kUnsupportedInputType,
2924 "NNAPI only supports floating point input.", &val_ctx);
2925 } break;
2926 case kTfLiteBuiltinElu: {
2927 ExpectOpVersion(version, 1, &val_ctx);
2928 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2929 &val_ctx);
2930 const auto input_type = context->tensors[node->inputs->data[0]].type;
2931 Expect(input_type == kTfLiteFloat32,
2932 NNAPIValidationFailureType::kUnsupportedInputType,
2933 "NNAPI only supports floating point input.", &val_ctx);
2934 } break;
2935 case kTfLiteBuiltinFill: {
2936 ExpectOpVersion(version, 1, &val_ctx);
2937 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2938 &val_ctx);
2939 const auto& dims_tensor = context->tensors[node->inputs->data[0]];
2940 Expect(IsConstantTensor(&dims_tensor),
2941 NNAPIValidationFailureType::kUnsupportedInputType,
2942 "NNAPI doesn't support dynamic dimensions tensor.", &val_ctx);
2943 EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64);
2944 if (IsConstantTensor(&dims_tensor)) {
2945 Expect(dims_tensor.dims->data[0] != 0,
2946 NNAPIValidationFailureType::kUnsupportedOperandValue,
2947 "NNAPI doesn't support generating scalars from FILL", &val_ctx);
2948 if (dims_tensor.type == kTfLiteInt64) {
2949 bool fit_in_int32 =
2950 std::all_of(dims_tensor.data.i64,
2951 dims_tensor.data.i64 + dims_tensor.dims->data[0],
2952 [](int64_t dim) {
2953 return std::numeric_limits<int32_t>::min() <= dim &&
2954 dim <= std::numeric_limits<int32_t>::max();
2955 });
2956 Expect(fit_in_int32,
2957 NNAPIValidationFailureType::kUnsupportedOperandValue,
2958 "NNAPI only supports int32 dimensions tensor. If the "
2959 "dimensions type is int64 and they are constant we can "
2960 "convert them to int32 if the value isn't too large.",
2961 &val_ctx);
2962 }
2963 }
2964 const auto& value_tensor = context->tensors[node->inputs->data[1]];
2965 EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32,
2966 kTfLiteInt64);
2967 if (value_tensor.type == kTfLiteInt64 &&
2968 IsConstantTensor(&value_tensor)) {
2969 Expect(
2970 *value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
2971 *value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
2972 NNAPIValidationFailureType::kUnsupportedInputType,
2973 "NNAPI only supports int32 input. If the input type is int64 and "
2974 "constant we can convert it to int32 if the value isn't too "
2975 "large.",
2976 &val_ctx);
2977 }
2978 } break;
2979 case kTfLiteBuiltinPack: {
2980 ExpectOpVersion(version, 2, &val_ctx);
2981 ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2982 &val_ctx);
2983 const auto input_type = context->tensors[node->inputs->data[0]].type;
2984 EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32,
2985 kTfLiteInt8);
2986 auto builtin = reinterpret_cast<TfLitePackParams*>(node->builtin_data);
2987 Expect(builtin->axis != -1 &&
2988 builtin->axis !=
2989 context->tensors[node->inputs->data[0]].dims->size,
2990 NNAPIValidationFailureType::kUnsupportedOperandValue,
2991 "NNAPI does not support axis being the last dimension", &val_ctx);
2992 } break;
2993 default:
2994 // All other operators are not mapped.
2995 AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
2996 "Unsupported operation type.", &val_ctx);
2997 }
2998 return val_ctx.is_valid;
2999 } // NOLINT(readability/fn_size)
3000
Map(TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const NNAPIOpMappingArgs & mapping_args,ANeuralNetworksOperationType * nn_op_type)3001 TfLiteStatus NNAPIDelegateKernel::Map(
3002 TfLiteContext* context, int builtin_code, int version,
3003 int android_sdk_version, const NNAPIOpMappingArgs& mapping_args,
3004 ANeuralNetworksOperationType* nn_op_type) {
3005 auto add_zero_bias = [mapping_args](int input_id, int filter_id,
3006 int num_elements) -> void {
3007 // NNAPI requires a bias tensor, so we allocate a new tensor to fill
3008 // it with zeroes. It is deleted with other tensors in the context
3009 // during subgraph destructor call.
3010 int bias_index = -1;
3011 mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index);
3012 TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index];
3013 const auto input_type = mapping_args.context->tensors[input_id].type;
3014 if (input_type == kTfLiteFloat32) {
3015 bias_tensor->type = kTfLiteFloat32;
3016 } else {
3017 bias_tensor->type = kTfLiteInt32;
3018 }
3019 // Create an array with a required bias shape and resize the bias
3020 // tensor.
3021 TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1);
3022 bias_shape->data[0] = num_elements;
3023 bias_tensor->allocation_type = kTfLiteDynamic;
3024 mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor,
3025 bias_shape);
3026 // Set tensor's values to zeroes and add it using AddVector*, so
3027 // that the values are copied to NNAPI. We don't use the AddTensor
3028 // function because it doesn't copy values and the tensor we just
3029 // created is not in the node->inputs.
3030 if (input_type == kTfLiteFloat32) {
3031 memset(bias_tensor->data.f, 0, num_elements * sizeof(float));
3032 mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f,
3033 num_elements);
3034 } else {
3035 memset(bias_tensor->data.i32, 0, num_elements * sizeof(int));
3036 const TfLiteTensor& input_tensor =
3037 mapping_args.context->tensors[input_id];
3038 const TfLiteTensor& filter_tensor =
3039 mapping_args.context->tensors[filter_id];
3040 // NNAPI requires bias scale to be a product of an input scale and
3041 // a filter scale.
3042 bias_tensor->params.scale =
3043 input_tensor.params.scale * filter_tensor.params.scale;
3044 mapping_args.builder->AddVectorInt32Operand(
3045 bias_tensor->data.i32, num_elements, bias_tensor->params.scale,
3046 /*zero_point=*/0);
3047 }
3048 };
3049 switch (builtin_code) {
3050 case kTfLiteBuiltinAdd: {
3051 auto builtin =
3052 reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data);
3053 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3054 *nn_op_type = ANEURALNETWORKS_ADD;
3055 } break;
3056 case kTfLiteBuiltinArgMax: {
3057 *nn_op_type = ANEURALNETWORKS_ARGMAX;
3058 } break;
3059 case kTfLiteBuiltinArgMin: {
3060 *nn_op_type = ANEURALNETWORKS_ARGMIN;
3061 } break;
3062 case kTfLiteBuiltinMul: {
3063 auto builtin =
3064 reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data);
3065 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3066 *nn_op_type = ANEURALNETWORKS_MUL;
3067 } break;
3068 case kTfLiteBuiltinAveragePool2d: {
3069 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3070 *nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
3071 } break;
3072 case kTfLiteBuiltinMaxPool2d: {
3073 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3074 *nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
3075 } break;
3076 case kTfLiteBuiltinL2Pool2d: {
3077 mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3078 *nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
3079 } break;
3080 case kTfLiteBuiltinConv2d: {
3081 auto builtin =
3082 reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
3083 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3084 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3085 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3086 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3087 // NNAPI supports dilated Conv2D since NNAPI 1.2.
3088 if (builtin->dilation_width_factor != 1 ||
3089 builtin->dilation_height_factor != 1) {
3090 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
3091 mapping_args.builder->AddScalarInt32Operand(
3092 builtin->dilation_width_factor);
3093 mapping_args.builder->AddScalarInt32Operand(
3094 builtin->dilation_height_factor);
3095 }
3096 *nn_op_type = ANEURALNETWORKS_CONV_2D;
3097 } break;
3098 case kTfLiteBuiltinDepthwiseConv2d: {
3099 auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
3100 mapping_args.node->builtin_data);
3101 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3102 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3103 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3104 mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier);
3105 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3106 if (builtin->dilation_width_factor != 1 ||
3107 builtin->dilation_height_factor != 1) {
3108 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format.
3109 mapping_args.builder->AddScalarInt32Operand(
3110 builtin->dilation_width_factor);
3111 mapping_args.builder->AddScalarInt32Operand(
3112 builtin->dilation_height_factor);
3113 }
3114 *nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
3115 } break;
3116 case kTfLiteBuiltinFullyConnected: {
3117 const bool is_bias_present =
3118 mapping_args.node->inputs->size == 3 &&
3119 mapping_args.node->inputs->data[2] != kTfLiteOptionalTensor;
3120 if (!is_bias_present) {
3121 const int input_tensor_id =
3122 mapping_args.node->inputs->data[/*kInputTensor*/ 0];
3123 const int filter_tensor_id =
3124 mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3125 const int num_units =
3126 mapping_args.context->tensors[filter_tensor_id].dims->data[0];
3127 add_zero_bias(input_tensor_id, filter_tensor_id, num_units);
3128 }
3129 auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
3130 mapping_args.node->builtin_data);
3131 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3132 *nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
3133 } break;
3134 case kTfLiteBuiltinHardSwish: {
3135 *nn_op_type = ANEURALNETWORKS_HARD_SWISH;
3136 } break;
3137 case kTfLiteBuiltinSoftmax: {
3138 auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
3139 mapping_args.node->builtin_data);
3140 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3141 // Optional scalar specifying the dimension the activation would be
3142 // performed on is not added. Default to -1.
3143 *nn_op_type = ANEURALNETWORKS_SOFTMAX;
3144 } break;
3145 case kTfLiteBuiltinReshape: {
3146 if (mapping_args.node->inputs->size == 1) {
3147 // if no new_shape tensor, construct the new shape from params.
3148 auto* params = reinterpret_cast<TfLiteReshapeParams*>(
3149 mapping_args.node->builtin_data);
3150 int num_dimensions = params->num_dimensions;
3151 std::vector<int32_t> output_shape(num_dimensions);
3152 for (int i = 0; i < num_dimensions; ++i) {
3153 output_shape[i] = params->shape[i];
3154 }
3155 mapping_args.builder->AddVectorInt32Operand(
3156 output_shape.data(), static_cast<uint32_t>(num_dimensions));
3157 }
3158 *nn_op_type = ANEURALNETWORKS_RESHAPE;
3159 } break;
3160 case kTfLiteBuiltinResizeBilinear: {
3161 const int output_id = mapping_args.node->outputs->data[0];
3162 auto& output = mapping_args.context->tensors[output_id];
3163 const int output_height = output.dims->data[1];
3164 const int output_width = output.dims->data[2];
3165 mapping_args.builder->AddScalarInt32Operand(output_width);
3166 mapping_args.builder->AddScalarInt32Operand(output_height);
3167 auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
3168 mapping_args.node->builtin_data);
3169 if (builtin->align_corners == true ||
3170 builtin->half_pixel_centers == true) {
3171 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
3172 mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3173 mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3174 }
3175 *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
3176 } break;
3177 case kTfLiteBuiltinResizeNearestNeighbor: {
3178 const TfLiteTensor& new_shape =
3179 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3180 // NNAPI uses scalar inputs for height and width.
3181 mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
3182 mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
3183 mapping_args.builder->AddScalarBoolOperand(false); // Use NHWC format
3184 auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
3185 mapping_args.node->builtin_data);
3186 if (builtin->align_corners == true ||
3187 builtin->half_pixel_centers == true) {
3188 mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3189 mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3190 }
3191 *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
3192 } break;
3193 case kTfLiteBuiltinSqueeze: {
3194 auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
3195 mapping_args.node->builtin_data);
3196 // Note that we add the squeeze dimensions even if the dimensions
3197 // were unspecified (empty), as NNAPI requires the operand.
3198 mapping_args.builder->AddVectorInt32Operand(
3199 builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
3200 static_cast<uint32_t>(builtin->num_squeeze_dims));
3201 *nn_op_type = ANEURALNETWORKS_SQUEEZE;
3202 } break;
3203 case kTfLiteBuiltinUnidirectionalSequenceLstm: {
3204 auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
3205 mapping_args.node->builtin_data);
3206 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3207 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3208 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3209 mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3210 const bool hybrid_op = IsHybridOperator(
3211 mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm,
3212 mapping_args.node);
3213 if (mapping_args.node->inputs->size == 24) {
3214 // Add layer normalization tensors if they are provided.
3215 for (int i = 20; i < 24; ++i) {
3216 const int input_index = mapping_args.node->inputs->data[i];
3217 if (input_index != kTfLiteOptionalTensor) {
3218 mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3219 } else {
3220 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3221 }
3222 }
3223 } else {
3224 for (int i = 0; i < 4; ++i) {
3225 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3226 }
3227 }
3228
3229 *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM;
3230 } break;
3231 case kTfLiteBuiltinL2Normalization: {
3232 *nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
3233 } break;
3234 case kTfLiteBuiltinLocalResponseNormalization: {
3235 auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
3236 mapping_args.node->builtin_data);
3237 mapping_args.builder->AddScalarInt32Operand(builtin->radius);
3238 mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
3239 mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
3240 mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3241 *nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
3242 } break;
3243 case kTfLiteBuiltinLshProjection: {
3244 auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
3245 mapping_args.node->builtin_data);
3246 int type = builtin->type;
3247 // In Android Q+, NNAPI uses 3 to denote
3248 // kTfLiteLshProjectionSparse.
3249 const int kNNAPILshProjectionSparse = 3;
3250 if (builtin->type == kTfLiteLshProjectionSparse) {
3251 type = kNNAPILshProjectionSparse;
3252 // Add NNAPI null weight operand.
3253 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3254 }
3255 mapping_args.builder->AddScalarInt32Operand(type);
3256 *nn_op_type = ANEURALNETWORKS_LSH_PROJECTION;
3257 } break;
3258 case kTfLiteBuiltinConcatenation: {
3259 auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
3260 mapping_args.node->builtin_data);
3261 int axis = builtin->axis < 0
3262 ? mapping_args.context
3263 ->tensors[mapping_args.node->inputs->data[0]]
3264 .dims->size +
3265 builtin->axis
3266 : builtin->axis;
3267 mapping_args.builder->AddScalarInt32Operand(axis);
3268 *nn_op_type = ANEURALNETWORKS_CONCATENATION;
3269 } break;
3270 case kTfLiteBuiltinDequantize: {
3271 *nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
3272 } break;
3273 case kTfLiteBuiltinFloor: {
3274 *nn_op_type = ANEURALNETWORKS_FLOOR;
3275 } break;
3276 case kTfLiteBuiltinRelu: {
3277 *nn_op_type = ANEURALNETWORKS_RELU;
3278 } break;
3279 case kTfLiteBuiltinReluN1To1: {
3280 *nn_op_type = ANEURALNETWORKS_RELU1;
3281 } break;
3282 case kTfLiteBuiltinRelu6: {
3283 *nn_op_type = ANEURALNETWORKS_RELU6;
3284 } break;
3285 case kTfLiteBuiltinLogistic: {
3286 *nn_op_type = ANEURALNETWORKS_LOGISTIC;
3287 } break;
3288 case kTfLiteBuiltinTanh: {
3289 *nn_op_type = ANEURALNETWORKS_TANH;
3290 } break;
3291 case kTfLiteBuiltinSub: {
3292 auto builtin =
3293 reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data);
3294 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3295 *nn_op_type = ANEURALNETWORKS_SUB;
3296 } break;
3297 case kTfLiteBuiltinDiv: {
3298 auto builtin =
3299 reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data);
3300 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3301 *nn_op_type = ANEURALNETWORKS_DIV;
3302 } break;
3303 case kTfLiteBuiltinPad:
3304 case kTfLiteBuiltinPadv2: {
3305 // We want to map to PAD as much as possible since it is more widely
3306 // supported. We map to PadV2 only when there is the need to specify
3307 // the padding value
3308 if (mapping_args.node->inputs->size == 2) {
3309 *nn_op_type = ANEURALNETWORKS_PAD;
3310 } else {
3311 const int constant_value_id = mapping_args.node->inputs->data[2];
3312 if (constant_value_id == kTfLiteOptionalTensor) {
3313 *nn_op_type = ANEURALNETWORKS_PAD;
3314 } else {
3315 *nn_op_type = ANEURALNETWORKS_PAD_V2;
3316 }
3317 }
3318 } break;
3319 case kTfLiteBuiltinUnidirectionalSequenceRnn: {
3320 auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>(
3321 mapping_args.node->builtin_data);
3322 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3323 mapping_args.builder->AddScalarInt32Operand(builtin->time_major);
3324 *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN;
3325 } break;
3326 case kTfLiteBuiltinSpaceToBatchNd: {
3327 *nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
3328 } break;
3329 case kTfLiteBuiltinBatchToSpaceNd: {
3330 *nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
3331 } break;
3332 case kTfLiteBuiltinStridedSlice: {
3333 auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
3334 mapping_args.node->builtin_data);
3335 mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
3336 mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
3337 mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask);
3338 *nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
3339 } break;
3340 case kTfLiteBuiltinTranspose: {
3341 *nn_op_type = ANEURALNETWORKS_TRANSPOSE;
3342 } break;
3343 case kTfLiteBuiltinAbs: {
3344 *nn_op_type = ANEURALNETWORKS_ABS;
3345 } break;
3346 case kTfLiteBuiltinExp: {
3347 *nn_op_type = ANEURALNETWORKS_EXP;
3348 } break;
3349 case kTfLiteBuiltinLog: {
3350 *nn_op_type = ANEURALNETWORKS_LOG;
3351 } break;
3352 case kTfLiteBuiltinRsqrt: {
3353 *nn_op_type = ANEURALNETWORKS_RSQRT;
3354 } break;
3355 case kTfLiteBuiltinPow: {
3356 *nn_op_type = ANEURALNETWORKS_POW;
3357 } break;
3358 case kTfLiteBuiltinSlice: {
3359 *nn_op_type = ANEURALNETWORKS_SLICE;
3360 } break;
3361 case kTfLiteBuiltinSin: {
3362 *nn_op_type = ANEURALNETWORKS_SIN;
3363 } break;
3364 case kTfLiteBuiltinTransposeConv: {
3365 int input_tensor_flags = 0;
3366 const int input_tensor_id =
3367 mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
3368 const int weight_tensor_id =
3369 mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3370
3371 // Transpose convolution doesn't have hybrid variation.
3372 const bool hybrid_op = false;
3373
3374 if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3375 mapping_args.builder->AddTensorInput(
3376 input_tensor_id, hybrid_op,
3377 input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED);
3378
3379 } else {
3380 mapping_args.builder->AddTensorInput(
3381 input_tensor_id, hybrid_op,
3382 input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION);
3383 }
3384 // Transpose convlution uses per-channel quantization with int8 inputs
3385 // even if the number of channels in quantization parameters is equal to 1
3386 // (as opposed to conv2d, which uses per-tensor quantization in this
3387 // case).
3388 mapping_args.builder->AddTensorInput(
3389 weight_tensor_id, hybrid_op,
3390 input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL);
3391
3392 const bool is_bias_present =
3393 mapping_args.node->inputs->size == 4 &&
3394 mapping_args.node->inputs->data[/*kBiasTensor*/ 3] !=
3395 kTfLiteOptionalTensor;
3396
3397 if (is_bias_present) {
3398 mapping_args.builder->AddTensorInput(
3399 mapping_args.node->inputs->data[/*kBiasTensor*/ 3], hybrid_op);
3400 } else {
3401 const TfLiteTensor& output_shape =
3402 mapping_args.context->tensors[mapping_args.node->inputs
3403 ->data[/*kOutputShapeTensor*/ 0]];
3404 const int output_depth = output_shape.data.i32[3];
3405 add_zero_bias(input_tensor_id, weight_tensor_id, output_depth);
3406 }
3407 mapping_args.builder->AddTensorInput(
3408 mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
3409
3410 auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
3411 mapping_args.node->builtin_data);
3412 mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3413 mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3414 mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3415 mapping_args.builder->AddScalarInt32Operand(
3416 /*ANEURALNETWORKS_FUSED_NONE*/ 0);
3417 // Use NHWC layout for input and output.
3418 mapping_args.builder->AddScalarBoolOperand(false);
3419 *nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV;
3420 } break;
3421 case kTfLiteBuiltinSqrt: {
3422 *nn_op_type = ANEURALNETWORKS_SQRT;
3423 } break;
3424 case kTfLiteBuiltinRnn: {
3425 // NNAPI need both state_in and state_out.
3426 int ann_index;
3427 mapping_args.builder->AddStateFloat32Tensor(
3428 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
3429 &ann_index);
3430 mapping_args.model_state_outputs->push_back(ann_index);
3431 mapping_args.model_state_tfl_inputs->push_back(
3432 mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
3433 auto builtin =
3434 reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data);
3435 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3436 *nn_op_type = ANEURALNETWORKS_RNN;
3437 } break;
3438 case kTfLiteBuiltinSpaceToDepth: {
3439 auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(
3440 mapping_args.node->builtin_data);
3441 mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3442 *nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
3443 } break;
3444 case kTfLiteBuiltinSvdf: {
3445 // NNAPI need both state_in and state_out.
3446 int ann_index;
3447 mapping_args.builder->AddStateFloat32Tensor(
3448 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4],
3449 &ann_index);
3450 mapping_args.model_state_outputs->push_back(ann_index);
3451 mapping_args.model_state_tfl_inputs->push_back(
3452 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]);
3453
3454 auto builtin =
3455 reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data);
3456 mapping_args.builder->AddScalarInt32Operand(builtin->rank);
3457 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3458 *nn_op_type = ANEURALNETWORKS_SVDF;
3459 } break;
3460 case kTfLiteBuiltinLstm: {
3461 if (isLstmBasicKernel(mapping_args.node)) {
3462 const auto output_dims =
3463 mapping_args.context->tensors[mapping_args.node->outputs->data[1]]
3464 .dims;
3465
3466 // Inputs kInputData
3467 mapping_args.builder->AddTensorInput(
3468 mapping_args.node->inputs->data[0 /* kInputData */],
3469 /* hybrid_op */ false,
3470 /* scalar_as_tensor */ false);
3471
3472 // The 8 weights tensors are set decomposing the
3473 // kInputWeights param
3474 const auto weight_tensor =
3475 mapping_args.context->tensors[mapping_args.node->inputs
3476 ->data[2 /* kInputWeights */]];
3477
3478 std::vector<uint8_t> recurrent_to_input;
3479 std::vector<uint8_t> input_to_input;
3480 std::vector<uint8_t> recurrent_to_cell;
3481 std::vector<uint8_t> input_to_cell;
3482 std::vector<uint8_t> recurrent_to_forget;
3483 std::vector<uint8_t> input_to_forget;
3484 std::vector<uint8_t> recurrent_to_output;
3485 std::vector<uint8_t> input_to_output;
3486 tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor(
3487 weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input,
3488 &input_to_input, &recurrent_to_cell, &input_to_cell,
3489 &recurrent_to_forget, &input_to_forget, &recurrent_to_output,
3490 &input_to_output);
3491
3492 TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2);
3493 TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2);
3494 tflite::delegate::nnapi::SetWeightSubmatrixDims(
3495 weight_tensor.dims, recurrent_weight_dims, input_weight_dims);
3496
3497 int new_tensor_index = -1;
3498
3499 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3500 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3501 input_weight_dims, input_to_input, weight_tensor.params,
3502 &new_tensor_index);
3503
3504 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3505 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3506 input_weight_dims, input_to_forget, weight_tensor.params,
3507 &new_tensor_index);
3508
3509 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3510 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3511 input_weight_dims, input_to_cell, weight_tensor.params,
3512 &new_tensor_index);
3513
3514 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3515 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3516 input_weight_dims, input_to_output, weight_tensor.params,
3517 &new_tensor_index);
3518
3519 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3520 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3521 recurrent_weight_dims, recurrent_to_input, weight_tensor.params,
3522 &new_tensor_index);
3523
3524 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3525 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3526 recurrent_weight_dims, recurrent_to_forget, weight_tensor.params,
3527 &new_tensor_index);
3528
3529 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3530 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3531 recurrent_weight_dims, recurrent_to_cell, weight_tensor.params,
3532 &new_tensor_index);
3533
3534 mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3535 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3536 recurrent_weight_dims, recurrent_to_output, weight_tensor.params,
3537 &new_tensor_index);
3538
3539 TfLiteIntArrayFree(input_weight_dims);
3540 TfLiteIntArrayFree(recurrent_weight_dims);
3541
3542 // Biases have to be split in four.
3543 const auto bias_size = output_dims->data[1];
3544 const TfLiteTensor& biases_tensor =
3545 mapping_args.context->tensors[mapping_args.node->inputs
3546 ->data[3 /* kInputBiases */]];
3547
3548 std::vector<int32_t> input_bias;
3549 std::vector<int32_t> cell_bias;
3550 std::vector<int32_t> forget_bias;
3551 std::vector<int32_t> output_bias;
3552 delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size,
3553 &input_bias, &cell_bias,
3554 &forget_bias, &output_bias);
3555
3556 int input_bias_tensor = -1;
3557 mapping_args.builder->AddNewInputConstantTensor<int32_t>(
3558 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias,
3559 biases_tensor.params, &input_bias_tensor);
3560 int forget_bias_tensor = -1;
3561 mapping_args.builder->AddNewInputConstantTensor(
3562 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3563 forget_bias, biases_tensor.params, &forget_bias_tensor);
3564 int cell_gate_bias_tensor = -1;
3565 mapping_args.builder->AddNewInputConstantTensor(
3566 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias,
3567 biases_tensor.params, &cell_gate_bias_tensor);
3568 int output_gate_bias_tensor = -1;
3569 mapping_args.builder->AddNewInputConstantTensor(
3570 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3571 output_bias, biases_tensor.params, &output_gate_bias_tensor);
3572
3573 mapping_args.builder->AddTensorInput(
3574 mapping_args.node->inputs->data[4 /* kInputPrevState */],
3575 /* hybrid_op */ false,
3576 /* scalar_as_tensor */ false);
3577
3578 // kInputPrevActivation
3579 mapping_args.builder->AddTensorInput(
3580 mapping_args.node->inputs->data[1 /* kInputPrevActivation */],
3581 /* hybrid_op */ false,
3582 /* scalar_as_tensor */ false);
3583
3584 // Configuring the copy from the activation, state outputs
3585 // to their associated inputs
3586 mapping_args.feedback_loops->push_back(std::make_tuple(
3587 mapping_args.node->outputs->data[0 /*kOutputActivation*/],
3588 mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));
3589
3590 mapping_args.feedback_loops->push_back(std::make_tuple(
3591 mapping_args.node->outputs->data[1 /*kOutputState*/],
3592 mapping_args.node->inputs->data[4 /*kInputPrevState*/]));
3593
3594 // OUTPUTS
3595 // Setting only the first two since the remaining ones are
3596 // ignored by NNAPI
3597 mapping_args.builder->AddTensorOutput(
3598 mapping_args.node->outputs->data[1 /* kOutputState */], 0);
3599
3600 mapping_args.builder->AddTensorOutput(
3601 mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);
3602
3603 *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
3604 } else {
3605 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
3606 mapping_args.node->builtin_data);
3607 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3608 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3609 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3610
3611 // Current NNAPI implementation requires the scratch_buffer as
3612 // output.
3613 mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
3614
3615 // NNAPI need both state_in and state_out for cell_state and
3616 // output_state.
3617 int ann_index;
3618 mapping_args.builder->AddStateFloat32Tensor(
3619 mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18],
3620 &ann_index);
3621 mapping_args.model_state_outputs->push_back(ann_index);
3622 mapping_args.model_state_tfl_inputs->push_back(
3623 mapping_args.node->inputs
3624 ->data[/*kInputActivationStateTensor*/ 18]);
3625 mapping_args.builder->AddStateFloat32Tensor(
3626 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
3627 &ann_index);
3628 mapping_args.model_state_outputs->push_back(ann_index);
3629 mapping_args.model_state_tfl_inputs->push_back(
3630 mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
3631
3632 const bool hybrid_op = IsHybridOperator(
3633 mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node);
3634
3635 if (mapping_args.node->inputs->size == 24) {
3636 for (int i = 20; i < 24; ++i) {
3637 const auto input_index = mapping_args.node->inputs->data[i];
3638 if (input_index != kTfLiteOptionalTensor) {
3639 mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3640 } else {
3641 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3642 }
3643 }
3644 }
3645
3646 *nn_op_type = ANEURALNETWORKS_LSTM;
3647 }
3648 } break;
3649 case kTfLiteBuiltinMean: {
3650 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3651 mapping_args.node->builtin_data);
3652 int32_t keep_dims = 0;
3653 if (builtin->keep_dims) keep_dims = 1;
3654 mapping_args.builder->AddScalarInt32Operand(keep_dims);
3655 *nn_op_type = ANEURALNETWORKS_MEAN;
3656 } break;
3657 case kTfLiteBuiltinEmbeddingLookup: {
3658 *nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
3659 } break;
3660 case kTfLiteBuiltinHashtableLookup: {
3661 *nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
3662 } break;
3663 case kTfLiteBuiltinMaximum: {
3664 *nn_op_type = ANEURALNETWORKS_MAXIMUM;
3665 } break;
3666 case kTfLiteBuiltinMinimum: {
3667 *nn_op_type = ANEURALNETWORKS_MINIMUM;
3668 } break;
3669 case kTfLiteBuiltinCast: {
3670 *nn_op_type = ANEURALNETWORKS_CAST;
3671 } break;
3672 case kTfLiteBuiltinLeakyRelu: {
3673 const auto input_type =
3674 mapping_args.context->tensors[mapping_args.node->inputs->data[0]]
3675 .type;
3676 auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>(
3677 mapping_args.node->builtin_data);
3678
3679 TfLiteTensor alpha_tensor;
3680 alpha_tensor.type = input_type;
3681 alpha_tensor.allocation_type = kTfLiteDynamic;
3682 alpha_tensor.dims = TfLiteIntArrayCreate(1);
3683 alpha_tensor.dims->data[0] = 1;
3684 alpha_tensor.params.zero_point = 0;
3685
3686 int new_tensor_index = -1;
3687 if (input_type == kTfLiteFloat32) {
3688 alpha_tensor.params.scale = 0;
3689 std::vector<float> alpha_value = {builtin->alpha};
3690 mapping_args.builder->AddNewInputConstantTensor(
3691 ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims,
3692 alpha_value, alpha_tensor.params, &new_tensor_index);
3693 } else if (input_type == kTfLiteInt8 &&
3694 android_sdk_version >= kMinSdkVersionForNNAPI13) {
3695 alpha_tensor.params.scale = builtin->alpha;
3696 std::vector<int8_t> alpha_value = {1};
3697 mapping_args.builder->AddNewInputConstantTensor(
3698 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
3699 alpha_tensor.dims, alpha_value, alpha_tensor.params,
3700 &new_tensor_index);
3701 } else {
3702 alpha_tensor.params.scale = builtin->alpha;
3703 std::vector<uint8_t> alpha_value = {1};
3704 mapping_args.builder->AddNewInputConstantTensor(
3705 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3706 alpha_tensor.dims, alpha_value, alpha_tensor.params,
3707 &new_tensor_index);
3708 }
3709
3710 *nn_op_type = ANEURALNETWORKS_PRELU;
3711 } break;
3712 case kTfLiteBuiltinPrelu: {
3713 *nn_op_type = ANEURALNETWORKS_PRELU;
3714 } break;
3715 case kTfLiteBuiltinTile: {
3716 *nn_op_type = ANEURALNETWORKS_TILE;
3717 } break;
3718 case kTfLiteBuiltinLogicalOr: {
3719 *nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
3720 } break;
3721 case kTfLiteBuiltinLogicalAnd: {
3722 *nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
3723 } break;
3724 case kTfLiteBuiltinLogicalNot: {
3725 *nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
3726 } break;
3727 case kTfLiteBuiltinLess: {
3728 *nn_op_type = ANEURALNETWORKS_LESS;
3729 } break;
3730 case kTfLiteBuiltinLessEqual: {
3731 *nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
3732 } break;
3733 case kTfLiteBuiltinGreater: {
3734 *nn_op_type = ANEURALNETWORKS_GREATER;
3735 } break;
3736 case kTfLiteBuiltinGreaterEqual: {
3737 *nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
3738 } break;
3739 case kTfLiteBuiltinEqual: {
3740 *nn_op_type = ANEURALNETWORKS_EQUAL;
3741 } break;
3742 case kTfLiteBuiltinNotEqual: {
3743 *nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
3744 } break;
3745 case kTfLiteBuiltinNeg: {
3746 *nn_op_type = ANEURALNETWORKS_NEG;
3747 } break;
3748 case kTfLiteBuiltinTopkV2: {
3749 const TfLiteTensor& k_param =
3750 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3751 mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32);
3752 *nn_op_type = ANEURALNETWORKS_TOPK_V2;
3753 } break;
3754 case kTfLiteBuiltinSelect: {
3755 *nn_op_type = ANEURALNETWORKS_SELECT;
3756 } break;
3757 case kTfLiteBuiltinGather: {
3758 auto builtin = reinterpret_cast<TfLiteGatherParams*>(
3759 mapping_args.node->builtin_data);
3760 mapping_args.builder->AddScalarInt32Operand(builtin->axis);
3761 mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
3762 /* hybrid_op */ false,
3763 /* tensor_flags */ 0);
3764 *nn_op_type = ANEURALNETWORKS_GATHER;
3765 } break;
3766 case kTfLiteBuiltinBidirectionalSequenceLstm: {
3767 auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
3768 mapping_args.node->builtin_data);
3769 mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3770 mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3771 mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3772 mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs);
3773 mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3774 // TF Lite doesn't support layer normalization in bidirectional
3775 // sequence LSTM, so we insert optional tensors for NNAPI.
3776 for (int i = 0; i < 8; ++i) {
3777 mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3778 }
3779 *nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM;
3780 } break;
3781 case kTfLiteBuiltinExpandDims: {
3782 const TfLiteTensor& axis_param =
3783 mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3784 mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32);
3785 *nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
3786 } break;
3787 case kTfLiteBuiltinSplit: {
3788 const TfLiteTensor& axis =
3789 mapping_args.context->tensors[mapping_args.node->inputs->data[0]];
3790 auto builtin =
3791 reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data);
3792 mapping_args.builder->AddScalarInt32Operand(*axis.data.i32);
3793 mapping_args.builder->AddScalarInt32Operand(builtin->num_splits);
3794 *nn_op_type = ANEURALNETWORKS_SPLIT;
3795 } break;
3796 case kTfLiteBuiltinLogSoftmax: {
3797 // Scaling and axis are hardcoded to respectively 1 and -1
3798 // in TFLite.
3799 mapping_args.builder->AddScalarFloat32Operand(1);
3800 mapping_args.builder->AddScalarInt32Operand(-1);
3801 *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
3802 } break;
3803 case kTfLiteBuiltinQuantize: {
3804 auto input_index = mapping_args.node->inputs->data[0];
3805 // NNAPI doesn't support requantization cases but only quantizations
3806 // from float. Dequantizing our input adding a Dequantize node before
3807 // this one.
3808 if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
3809 mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
3810 mapping_args.node_index);
3811 }
3812
3813 *nn_op_type = ANEURALNETWORKS_QUANTIZE;
3814 } break;
3815 case kTfLiteBuiltinReduceAny: {
3816 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3817 mapping_args.node->builtin_data);
3818 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3819 *nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
3820 } break;
3821 case kTfLiteBuiltinReduceMin: {
3822 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3823 mapping_args.node->builtin_data);
3824 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3825 *nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
3826 } break;
3827 case kTfLiteBuiltinReduceMax: {
3828 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3829 mapping_args.node->builtin_data);
3830 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3831 *nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
3832 } break;
3833 case kTfLiteBuiltinDepthToSpace: {
3834 auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>(
3835 mapping_args.node->builtin_data);
3836 mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3837 *nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE;
3838 } break;
3839 case kTfLiteBuiltinReduceProd: {
3840 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3841 mapping_args.node->builtin_data);
3842 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3843 *nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
3844 } break;
3845 case kTfLiteBuiltinSum: {
3846 auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3847 mapping_args.node->builtin_data);
3848 mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3849 *nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
3850 } break;
3851 case kTfLiteBuiltinElu: {
3852 mapping_args.builder->AddScalarFloat32Operand(1.0);
3853 *nn_op_type = ANEURALNETWORKS_ELU;
3854 } break;
3855 case kTfLiteBuiltinFill: {
3856 *nn_op_type = ANEURALNETWORKS_FILL;
3857 } break;
3858 default:
3859 // All other operators are not mapped.
3860 return kTfLiteError;
3861 }
3862 return kTfLiteOk;
3863 }
3864
3865 // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params,int * nnapi_errno)3866 TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
3867 const TfLiteDelegateParams* params,
3868 int* nnapi_errno) {
3869 for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
3870 nodes_.push_back(node_index);
3871 }
3872
3873 // Initialize densify map and dequantize map.
3874 densify_output_to_node_mapping_ = std::vector<int>(context->tensors_size, -1);
3875 non_const_dequantize_output_to_node_mapping_ =
3876 std::vector<int>(context->tensors_size, -1);
3877 const auto delegate_options =
3878 StatefulNnApiDelegate::GetOptions(params->delegate);
3879 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
3880 ShouldUseTargetDevices(delegate_options, nnapi_)) {
3881 TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
3882 nnapi_errno, &nnapi_devices_));
3883
3884 if (nnapi_devices_.empty()) {
3885 context->ReportError(
3886 context, "NNAPI delegate requested but no accelerators available.");
3887 return kTfLiteError;
3888 }
3889 }
3890
3891 // Mark the handle backed tensors.
3892 tensor_memory_map_ =
3893 &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate);
3894
3895 if (!nn_model_) {
3896 ANeuralNetworksModel* model = nullptr;
3897 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3898 nnapi_->ANeuralNetworksModel_create(&model),
3899 "creating NNAPI model", nnapi_errno);
3900 nn_model_.reset(model);
3901
3902 TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
3903 params->input_tensors,
3904 params->output_tensors, nnapi_errno));
3905 }
3906
3907 auto* cache = StatefulNnApiDelegate::GetCache(params->delegate);
3908 if (cache) {
3909 // Compilation caching is enabled, construct the uint8 token.
3910 uint64_t token_parts[4];
3911 // model_token is incorporated into parition_key by TFLite Serialization.
3912 // NNAPI uses 256-bit key, but we can just tile the unique 64-bit
3913 // fingerprint from TFLite.
3914 auto partition_entry = cache->GetEntryForKernel(kNnapiId, context, params);
3915 token_parts[0] = partition_entry.GetFingerprint();
3916 token_parts[1] = partition_entry.GetFingerprint();
3917 token_parts[2] = partition_entry.GetFingerprint();
3918 token_parts[3] = partition_entry.GetFingerprint();
3919 // TODO(b/172238515): get token size from header instead of hardcoding.
3920 // Allocate one extra 'null' byte to avoid bugs with backends that might
3921 // be doing strlen() on the token ptr.
3922 std::vector<uint8_t> nnapi_cache_token(33, 0);
3923 // Copy the token bits.
3924 uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
3925 for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
3926 nnapi_cache_token[i] = p[i];
3927 }
3928
3929 nn_compilation_cache_token_ = nnapi_cache_token;
3930 }
3931
3932 initialised_ = true;
3933
3934 return kTfLiteOk;
3935 }
3936
Prepare(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)3937 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
3938 TfLiteNode* node, int* nnapi_errno) {
3939 if (!initialised_) {
3940 return kTfLiteError;
3941 }
3942
3943 const auto delegate_options =
3944 StatefulNnApiDelegate::GetOptions(node->delegate);
3945 if (nn_compilation_) {
3946 return kTfLiteOk;
3947 }
3948
3949 ANeuralNetworksCompilation* compilation = nullptr;
3950 if (!nnapi_devices_.empty()) {
3951 // Compile for the selected accelerator.
3952 RETURN_TFLITE_ERROR_IF_NN_ERROR(
3953 context,
3954 nnapi_->ANeuralNetworksCompilation_createForDevices(
3955 nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
3956 &compilation),
3957 "creating NNAPI model for given devices", nnapi_errno);
3958 } else {
3959 // Trying to call ANeuralNetworksCompilation_create when the delegate is
3960 // constructed from a support library would result in a crash.
3961 if (nnapi_->ANeuralNetworksCompilation_create != nullptr) {
3962 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3963 nnapi_->ANeuralNetworksCompilation_create(
3964 nn_model_.get(), &compilation),
3965 "creating NNAPI compilation",
3966 nnapi_errno);
3967 } else {
3968 TF_LITE_KERNEL_LOG(
3969 context,
3970 "Attempted to call ANeuralNetworksCompilation_create from NNAPI "
3971 "delegate that is constructed from a support library");
3972 return kTfLiteError;
3973 }
3974 }
3975
3976 auto preference = delegate_options.execution_preference;
3977 if (preference !=
3978 StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
3979 const int preference_result =
3980 nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
3981 preference);
3982 if (preference_result != ANEURALNETWORKS_NO_ERROR) {
3983 nnapi_->ANeuralNetworksCompilation_free(compilation);
3984 compilation = nullptr;
3985 }
3986 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
3987 "setting compilation preferences",
3988 nnapi_errno);
3989 }
3990
3991 if (!nn_compilation_cache_token_.empty()) {
3992 const char* cache_dir = delegate_options.cache_dir;
3993 const int set_caching_result =
3994 nnapi_->ANeuralNetworksCompilation_setCaching(
3995 compilation, cache_dir, nn_compilation_cache_token_.data());
3996 if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
3997 nnapi_->ANeuralNetworksCompilation_free(compilation);
3998 compilation = nullptr;
3999 }
4000 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
4001 "configuring NNAPI caching", nnapi_errno);
4002 }
4003 // Set compilation timeout if applicable.
4004 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4005 if (delegate_options.max_compilation_timeout_duration_ns > 0) {
4006 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4007 context,
4008 nnapi_->ANeuralNetworksCompilation_setTimeout(
4009 compilation,
4010 delegate_options.max_compilation_timeout_duration_ns),
4011 "setting compilation timeout", nnapi_errno);
4012 }
4013 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4014 context,
4015 nnapi_->ANeuralNetworksCompilation_setPriority(
4016 compilation, delegate_options.execution_priority),
4017 "setting compilation priority", nnapi_errno);
4018 }
4019 const int finish_result =
4020 nnapi_->ANeuralNetworksCompilation_finish(compilation);
4021 if (finish_result != ANEURALNETWORKS_NO_ERROR) {
4022 nnapi_->ANeuralNetworksCompilation_free(compilation);
4023 compilation = nullptr;
4024 }
4025 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
4026 "completing NNAPI compilation", nnapi_errno);
4027 nn_compilation_.reset(compilation);
4028
4029 bool should_use_burst_mode = delegate_options.use_burst_computation;
4030 // Override should_use_burst_mode to true if the selected NNAPI devices are of
4031 // NNAPI feature level 5 or higher.
4032 if (!nnapi_devices_.empty() &&
4033 target_feature_level_ >= kNNAPIRuntimeFeatureLevel5) {
4034 should_use_burst_mode = true;
4035 }
4036 // Create burst object to be reused across a sequence of executions
4037 if (should_use_burst_mode &&
4038 nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
4039 nnapi_->ANeuralNetworksBurst_create) {
4040 ANeuralNetworksBurst* burst = nullptr;
4041 const int create_burst_result =
4042 nnapi_->ANeuralNetworksBurst_create(nn_compilation_.get(), &burst);
4043 if (create_burst_result != ANEURALNETWORKS_NO_ERROR) {
4044 nnapi_->ANeuralNetworksBurst_free(burst);
4045 burst = nullptr;
4046 }
4047 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, create_burst_result,
4048 "creating NNAPI burst", nnapi_errno);
4049 nn_burst_.reset(burst);
4050 }
4051
4052 return kTfLiteOk;
4053 }
4054
GetOperationsSupportedByTargetNnApiDevices(TfLiteContext * context,std::vector<int> * supported_nodes,int * nnapi_errno)4055 TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
4056 TfLiteContext* context, std::vector<int>* supported_nodes,
4057 int* nnapi_errno) {
4058 if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
4059 return kTfLiteError;
4060 }
4061
4062 const auto nnapi_model_size = nnapi_to_tflite_op_mapping_.size();
4063
4064 // Determine the list of operations the device actually supports
4065 std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]);
4066
4067 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4068 context,
4069 nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
4070 nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
4071 nnapi_ops_support_flags.get()),
4072 "Checking supported operations for devices", nnapi_errno);
4073
4074 // A TfLite op is supported only if all the associated NNAPI ones are.
4075 auto tflite_ops_support_status = std::map<int, bool>();
4076 std::for_each(nodes_.begin(), nodes_.end(),
4077 [&tflite_ops_support_status](int tflite_node_index) {
4078 tflite_ops_support_status[tflite_node_index] = true;
4079 });
4080 for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size;
4081 nnapi_op_index++) {
4082 const auto tflite_op_index = nnapi_to_tflite_op_mapping_[nnapi_op_index];
4083 tflite_ops_support_status[tflite_op_index] &=
4084 nnapi_ops_support_flags[nnapi_op_index];
4085 if (!tflite_ops_support_status[tflite_op_index]) {
4086 if (std::count(non_const_dequantize_output_to_node_mapping_.begin(),
4087 non_const_dequantize_output_to_node_mapping_.end(), -1) <
4088 non_const_dequantize_output_to_node_mapping_.size() ||
4089 std::count(densify_output_to_node_mapping_.begin(),
4090 densify_output_to_node_mapping_.end(),
4091 -1) < densify_output_to_node_mapping_.size()) {
4092 // Only allow full model delegation for sparse model.
4093 return kTfLiteOk;
4094 }
4095 }
4096 }
4097
4098 supported_nodes->clear();
4099 std::for_each(nodes_.begin(), nodes_.end(),
4100 [&supported_nodes, &tflite_ops_support_status](int node_index) {
4101 if (tflite_ops_support_status[node_index]) {
4102 supported_nodes->push_back(node_index);
4103 }
4104 });
4105
4106 return kTfLiteOk;
4107 }
4108
Invoke(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)4109 TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
4110 TfLiteNode* node, int* nnapi_errno) {
4111 const bool allow_padding =
4112 nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13 &&
4113 nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding != nullptr;
4114 const auto delegate_options =
4115 StatefulNnApiDelegate::GetOptions(node->delegate);
4116
4117 // Check for conditions where we need to re-create NN Execution object and
4118 // re-configure the settings and inputs / outputs.
4119 bool should_reset_execution = false;
4120 if (nnapi_->nnapi_runtime_feature_level <= kMinSdkVersionForNNAPI13 ||
4121 delegate_options.allow_dynamic_dimensions) {
4122 // Must reset execution before Android API 31, or using dynamic dimensions.
4123 should_reset_execution = true;
4124 } else {
4125 // For Android API 31+, check for BufferHandle changes and reset the
4126 // execution if any.
4127 std::vector<int> curr_in_tensor_handle_map(context->tensors_size);
4128 for (int i = 0; i < curr_in_tensor_handle_map.size(); i++) {
4129 curr_in_tensor_handle_map[i] = context->tensors[i].buffer_handle;
4130 }
4131 if (!(tensor_handle_map_ == curr_in_tensor_handle_map)) {
4132 should_reset_execution = true;
4133 tensor_handle_map_ = curr_in_tensor_handle_map;
4134 }
4135 }
4136 if (should_reset_execution) {
4137 ANeuralNetworksExecution* execution = nullptr;
4138 RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4139 nnapi_->ANeuralNetworksExecution_create(
4140 nn_compilation_.get(), &execution),
4141 "creating NNAPI execution", nnapi_errno);
4142 if (nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13) {
4143 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4144 context,
4145 nnapi_->ANeuralNetworksExecution_setReusable(execution,
4146 /*reusable=*/true),
4147 "making execution reusable", nnapi_errno);
4148 }
4149 nn_execution_.reset(execution);
4150
4151 // Allow padding bytes for execution inputs & outputs if applicable.
4152 if (allow_padding) {
4153 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4154 context,
4155 nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding(
4156 nn_execution_.get(),
4157 /*enable=*/true),
4158 "setting allow padding for execution intputs and outputs",
4159 nnapi_errno);
4160 }
4161 // Set compilation timeout if applicable.
4162 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4163 if (delegate_options.max_execution_timeout_duration_ns > 0) {
4164 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4165 context,
4166 nnapi_->ANeuralNetworksExecution_setTimeout(
4167 nn_execution_.get(),
4168 delegate_options.max_execution_timeout_duration_ns),
4169 "setting execution timeout", nnapi_errno);
4170 }
4171 if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
4172 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4173 context,
4174 nnapi_->ANeuralNetworksExecution_setLoopTimeout(
4175 nn_execution_.get(),
4176 delegate_options.max_execution_loop_timeout_duration_ns),
4177 "setting execution loop timeout", nnapi_errno);
4178 }
4179 }
4180 // Check if the size of input and output memory pool needs to be resized.
4181 if (delegate_options.allow_dynamic_dimensions) {
4182 size_t total_input_byte_size = 0;
4183 // Make the TensorFlow Lite inputs and outputs to ann_indices.
4184 for (int i : TfLiteIntArrayView(node->inputs)) {
4185 // Constant tensors are not NNAPI inputs.
4186 if (i != kTfLiteOptionalTensor &&
4187 context->tensors[i].allocation_type != kTfLiteMmapRo &&
4188 // The delegate might not have mapped this input (this can
4189 // happen if one tensor is split in several ones)
4190 operand_mapping_.lite_index_to_ann(i) != -1) {
4191 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4192 continue;
4193 }
4194 const TfLiteType nn_type_conversion =
4195 operand_mapping_.lite_index_to_ann_type_conversion(i);
4196 int tensor_size = 0;
4197 if (nn_type_conversion == kTfLiteNoType) {
4198 tensor_size = context->tensors[i].bytes;
4199 } else {
4200 size_t type_size;
4201 TF_LITE_ENSURE_OK(
4202 context,
4203 GetSizeOfType(context, nn_type_conversion, &type_size));
4204 tensor_size = NumElements(&context->tensors[i]) * type_size;
4205 }
4206 total_input_byte_size += tensor_size;
4207 total_input_byte_size += GetNumPaddingBytes(tensor_size);
4208 }
4209 }
4210 if (total_input_byte_size > nn_input_memory_->get_byte_size()) {
4211 nn_input_memory_.reset(
4212 new NNMemory(nnapi_, "input_pool", total_input_byte_size));
4213 }
4214
4215 size_t total_output_byte_size = 0;
4216 for (int i : TfLiteIntArrayView(node->outputs)) {
4217 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4218 continue;
4219 }
4220 total_output_byte_size += context->tensors[i].bytes;
4221 total_output_byte_size += GetNumPaddingBytes(context->tensors[i].bytes);
4222 }
4223 if (total_output_byte_size > nn_output_memory_->get_byte_size()) {
4224 nn_output_memory_.reset(
4225 new NNMemory(nnapi_, "output_pool", total_output_byte_size));
4226 }
4227 }
4228 }
4229 // Set the input tensor buffers. Note: we access tflite tensors using
4230 // absolute indices but NN api indices inputs by relative indices.
4231 int relative_input_index = 0;
4232
4233 const bool use_int8_asymm_signed =
4234 target_feature_level_ >= kMinSdkVersionForNNAPI13;
4235
4236 size_t input_offset = 0;
4237 for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
4238 if (absolute_input_index == kTfLiteOptionalTensor) {
4239 continue;
4240 }
4241 ANeuralNetworksOperandType input_nn_operand_type;
4242 ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr;
4243 TfLiteTensor* tensor = &context->tensors[absolute_input_index];
4244 TfLiteType ann_type_equivalent =
4245 operand_mapping_.lite_index_to_ann_type_conversion(
4246 absolute_input_index);
4247 if (delegate_options.allow_dynamic_dimensions &&
4248 HasUnspecifiedDimension(tensor)) {
4249 input_nn_operand_type =
4250 ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
4251 input_nn_operand_type_ptr = &input_nn_operand_type;
4252 }
4253 if (tensor->allocation_type != kTfLiteMmapRo) {
4254 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4255 tensor->buffer_handle < tensor_memory_map_->size()) {
4256 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4257 context,
4258 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4259 nn_execution_.get(), relative_input_index,
4260 input_nn_operand_type_ptr,
4261 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4262 tensor->bytes),
4263 "associating NNAPI execution input with a memory object", tensor,
4264 nnapi_errno);
4265 relative_input_index++;
4266 continue;
4267 }
4268 int tensor_size = 0;
4269 int padding_bytes = 0;
4270 if (ann_type_equivalent != kTfLiteNoType) {
4271 const auto num_elements = NumElements(tensor);
4272 uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
4273 if (tensor->type == kTfLiteUInt8 &&
4274 ann_type_equivalent == kTfLiteInt32) {
4275 for (int i = 0; i < num_elements; ++i) {
4276 reinterpret_cast<int32_t*>(input_ptr)[i] =
4277 static_cast<const int32_t>(tensor->data.uint8[i]);
4278 }
4279 } else if (tensor->type == kTfLiteInt8 &&
4280 ann_type_equivalent == kTfLiteUInt8) {
4281 // Explicitly convert int8 values to uint8 values.
4282 for (int i = 0; i < num_elements; ++i) {
4283 input_ptr[i] = static_cast<const uint8_t>(
4284 static_cast<int32_t>(tensor->data.int8[i]) + 128);
4285 }
4286 } else if (tensor->type == kTfLiteInt8 &&
4287 ann_type_equivalent == kTfLiteInt32) {
4288 if (use_int8_asymm_signed) {
4289 for (int i = 0; i < num_elements; ++i) {
4290 reinterpret_cast<int32_t*>(input_ptr)[i] =
4291 static_cast<const int32_t>(tensor->data.int8[i]);
4292 }
4293 } else {
4294 for (int i = 0; i < num_elements; ++i) {
4295 reinterpret_cast<int32_t*>(input_ptr)[i] =
4296 static_cast<const int32_t>(tensor->data.int8[i]) + 128;
4297 }
4298 }
4299 } else if (tensor->type == kTfLiteInt64 &&
4300 ann_type_equivalent == kTfLiteInt32) {
4301 // Check that values fit into int32.
4302 int32_t* input_ptr_i32 = reinterpret_cast<int32_t*>(input_ptr);
4303 for (int i = 0; i < num_elements; ++i) {
4304 if (input_ptr_i32[i] < std::numeric_limits<int32_t>::min() ||
4305 input_ptr_i32[i] > std::numeric_limits<int32_t>::max()) {
4306 TF_LITE_KERNEL_LOG(context,
4307 "NN API Delegate: int64 value out of bounds "
4308 "for int32 target NNAPI tensor\n");
4309 return kTfLiteError;
4310 }
4311 input_ptr_i32[i] = static_cast<int32_t>(tensor->data.i64[i]);
4312 }
4313 } else {
4314 TF_LITE_KERNEL_LOG(
4315 context,
4316 "NN API Delegate: unsupported tensor types conversion: "
4317 "from type code %d to type code %d.\n",
4318 tensor->type, ann_type_equivalent);
4319 return kTfLiteError;
4320 }
4321 size_t type_size;
4322 TF_LITE_ENSURE_OK(
4323 context, GetSizeOfType(context, ann_type_equivalent, &type_size));
4324 tensor_size = NumElements(tensor) * type_size;
4325 padding_bytes = GetNumPaddingBytes(tensor_size);
4326 if (should_reset_execution) {
4327 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4328 context,
4329 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4330 nn_execution_.get(), relative_input_index,
4331 input_nn_operand_type_ptr, nn_input_memory_->get_handle(),
4332 input_offset, GetNNTensorSize(tensor_size, allow_padding)),
4333 "associating NNAPI execution input with a memory object", tensor,
4334 nnapi_errno);
4335 }
4336 } else {
4337 // copy data to pre-allocated shared memory.
4338 memcpy(nn_input_memory_->get_data_ptr() + input_offset,
4339 tensor->data.raw, tensor->bytes);
4340 tensor_size = tensor->bytes;
4341 padding_bytes = GetNumPaddingBytes(tensor_size);
4342 if (should_reset_execution) {
4343 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4344 context,
4345 nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4346 nn_execution_.get(), relative_input_index,
4347 input_nn_operand_type_ptr, nn_input_memory_->get_handle(),
4348 input_offset, GetNNTensorSize(tensor_size, allow_padding)),
4349 "associating NNAPI execution input with a memory object", tensor,
4350 nnapi_errno);
4351 }
4352 }
4353 input_offset += tensor_size + padding_bytes;
4354 relative_input_index++;
4355 }
4356 }
4357
4358 // Set the output tensor buffers.
4359 int relative_output_index = 0;
4360 size_t output_offset = 0;
4361 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4362 // If the NNAPI implementation doesn't have some of the outputs
4363 // they are left unmapped and we should not try to read their value here
4364 if (operand_mapping_.lite_index_to_ann(output_index) == -1) {
4365 continue;
4366 }
4367 ANeuralNetworksOperandType output_nn_operand_type;
4368 ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr;
4369 TfLiteTensor* tensor = &context->tensors[output_index];
4370 if (delegate_options.allow_dynamic_dimensions &&
4371 HasUnspecifiedDimension(tensor)) {
4372 TfLiteType ann_type_equivalent =
4373 operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4374 output_nn_operand_type =
4375 ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
4376 output_nn_operand_type_ptr = &output_nn_operand_type;
4377 }
4378 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4379 tensor->buffer_handle < tensor_memory_map_->size() &&
4380 should_reset_execution) {
4381 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4382 context,
4383 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4384 nn_execution_.get(), relative_output_index,
4385 output_nn_operand_type_ptr,
4386 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4387 tensor->bytes),
4388 "associating NNAPI execution output to a memory object", tensor,
4389 nnapi_errno);
4390
4391 } else {
4392 int padding_bytes = GetNumPaddingBytes(tensor->bytes);
4393 if (should_reset_execution) {
4394 RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4395 context,
4396 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4397 nn_execution_.get(), relative_output_index,
4398 output_nn_operand_type_ptr, nn_output_memory_->get_handle(),
4399 output_offset, GetNNTensorSize(tensor->bytes, allow_padding)),
4400 "associating NNAPI execution output to a memory object", tensor,
4401 nnapi_errno);
4402 }
4403 output_offset += tensor->bytes + padding_bytes;
4404 }
4405 relative_output_index++;
4406 }
4407
4408 // Set memory for NNAPI state_outputs.
4409 for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
4410 int state_tensor_idx = model_state_tfl_inputs_[i];
4411 TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
4412 int padding_bytes = GetNumPaddingBytes(tensor->bytes);
4413 if (should_reset_execution) {
4414 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4415 context,
4416 nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4417 nn_execution_.get(), relative_output_index, nullptr,
4418 nn_output_memory_->get_handle(), output_offset,
4419 GetNNTensorSize(tensor->bytes, allow_padding)),
4420 "associating NNAPI execution state output to a memory object",
4421 nnapi_errno);
4422 }
4423 output_offset += tensor->bytes + padding_bytes;
4424 relative_output_index++;
4425 }
4426
4427 // Invoke ANN in blocking fashion.
4428 if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
4429 ANeuralNetworksEvent* event = nullptr;
4430 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4431 context,
4432 nnapi_->ANeuralNetworksExecution_startCompute(nn_execution_.get(),
4433 &event),
4434 "starting async computation", nnapi_errno);
4435 const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
4436 nnapi_->ANeuralNetworksEvent_free(event);
4437 RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
4438 "waiting for async computation completion",
4439 nnapi_errno);
4440 } else {
4441 // Use Burst mode by default for NNAPI 1.2+.
4442 if (nn_burst_) {
4443 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4444 context,
4445 nnapi_->ANeuralNetworksExecution_burstCompute(nn_execution_.get(),
4446 nn_burst_.get()),
4447 "running burst computation", nnapi_errno);
4448 } else {
4449 // Use synchronous execution for NNAPI 1.2+ as a fallback.
4450 RETURN_TFLITE_ERROR_IF_NN_ERROR(
4451 context,
4452 nnapi_->ANeuralNetworksExecution_compute(nn_execution_.get()),
4453 "running computation", nnapi_errno);
4454 }
4455 }
4456
4457 // copy results from shared memory to the destination.
4458 output_offset = 0;
4459 for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4460 TfLiteTensor* tensor = &context->tensors[output_index];
4461 if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
4462 continue;
4463 }
4464 TfLiteType ann_type_equivalent =
4465 operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4466 if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
4467 // Explicitly convert uint8 values to int8 values.
4468 uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
4469 nn_output_memory_->get_data_ptr() + output_offset);
4470 const auto num_elements = NumElements(tensor);
4471 for (int i = 0; i < num_elements; ++i) {
4472 output_ptr[i] =
4473 static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
4474 }
4475 }
4476 memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
4477 tensor->bytes);
4478 output_offset += tensor->bytes;
4479 output_offset += GetNumPaddingBytes(tensor->bytes);
4480 }
4481 // The state_out of previous invocation need to be copied to state_in of
4482 // current invocation.
4483 for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
4484 int state_tensor_idx = model_state_tfl_inputs_[i];
4485 TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
4486 memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
4487 tensor->bytes);
4488 output_offset += tensor->bytes;
4489 output_offset += GetNumPaddingBytes(tensor->bytes);
4490 }
4491
4492 // copy output of all output tensors in feedback_loops_ into the
4493 // associated input
4494 for (auto feedback_loop : feedback_loops_) {
4495 int output_tensor_idx;
4496 int input_tensor_idx;
4497 std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
4498 TfLiteTensor& src = context->tensors[output_tensor_idx];
4499 TfLiteTensor& dest = context->tensors[input_tensor_idx];
4500
4501 memcpy(dest.data.raw, src.data.raw, src.bytes);
4502 }
4503
4504 return kTfLiteOk;
4505 }
4506
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,int tflite_node_index,NNAPIOpBuilder * builder,int * nnapi_errno)4507 void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded(
4508 const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
4509 int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) {
4510 // Depending on the operator and the input data format, Dequantize
4511 // operators may need to be added. For example when the input is
4512 // floating-point but weights are quantized then the weights will first be
4513 // dequantized to the same format as the input before being passed to the
4514 // operator.
4515
4516 // The tensor determining whether the inputs should be floating-point.
4517 int input_tensor_index = -1;
4518 std::vector<int> inputs_to_potentially_dequantize;
4519
4520 switch (builtin_code) {
4521 case kTfLiteBuiltinConv2d:
4522 case kTfLiteBuiltinFullyConnected: {
4523 input_tensor_index = 0;
4524 // Weights and bias are inputs #1 and #2 respectively and may require
4525 // dequantization.
4526 inputs_to_potentially_dequantize = {1, 2};
4527 break;
4528 }
4529 case kTfLiteBuiltinLstm: {
4530 input_tensor_index = 0;
4531 inputs_to_potentially_dequantize = {1, 2, 3, 4, 5, 6, 7,
4532 8, 9, 10, 11, 12, 13, 14,
4533 15, 16, 17, 20, 21, 22, 23};
4534 break;
4535 }
4536 default:
4537 return;
4538 }
4539
4540 int tensor_id = node->inputs->data[input_tensor_index];
4541 if (tensor_id < 0) return;
4542
4543 // Nothing to do if the input is not floating-point.
4544 if (!IsFloat(context->tensors[tensor_id].type)) return;
4545
4546 for (int i : inputs_to_potentially_dequantize) {
4547 if (i < 0 || i >= node->inputs->size) continue; // Ignore invalid index.
4548 tensor_id = node->inputs->data[i];
4549 if (tensor_id < 0) continue; // Ignore optional input.
4550
4551 const TfLiteType type = context->tensors[tensor_id].type;
4552 // Nothing to do for this tensor if it's not quantized.
4553 if (!IsQuantized(type)) continue;
4554
4555 // Insert Dequantize operator if it hasn't been done already and change
4556 // the node's input accordingly.
4557 builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index);
4558 }
4559 }
4560
DensifyAndDequantizeConstTensor(TfLiteContext * context,int densify_node_id,bool should_dequantize,NNAPIOpBuilder & builder)4561 TfLiteStatus NNAPIDelegateKernel::DensifyAndDequantizeConstTensor(
4562 TfLiteContext* context, int densify_node_id, bool should_dequantize,
4563 NNAPIOpBuilder& builder) {
4564 TfLiteNode* densify_node;
4565 TfLiteRegistration* reg;
4566 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
4567 context, densify_node_id, &densify_node, ®));
4568 int sparse_weight_tid = densify_node->inputs->data[0];
4569 auto input_tensor = context->tensors[sparse_weight_tid];
4570 auto output_tensor = context->tensors[densify_node->outputs->data[0]];
4571 if (input_tensor.sparsity == nullptr) {
4572 return kTfLiteError;
4573 }
4574 const int dims_count = output_tensor.dims->size;
4575 std::vector<int> vector_shape(dims_count);
4576 for (int i = 0; i < dims_count; i++) {
4577 vector_shape[i] = output_tensor.dims->data[i];
4578 }
4579 size_t dense_size;
4580 int new_tensor_index = -1;
4581 switch (input_tensor.type) {
4582 case kTfLiteFloat32: {
4583 dense_size = output_tensor.bytes / sizeof(float);
4584 std::vector<float> output_data(dense_size);
4585 tflite::optimize::sparsity::FormatConverter<float> converter(
4586 vector_shape, *input_tensor.sparsity);
4587 converter.SparseToDense(static_cast<const float*>(input_tensor.data.data),
4588 dense_size, output_data.data(), context);
4589 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
4590 ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
4591 output_data, output_tensor.params, &new_tensor_index));
4592 break;
4593 }
4594 case kTfLiteFloat16: {
4595 dense_size = output_tensor.bytes / sizeof(Eigen::half);
4596 std::vector<uint16_t> output_data(dense_size);
4597 Eigen::half* unpacked_fp16_data =
4598 reinterpret_cast<Eigen::half*>(output_data.data());
4599 tflite::optimize::sparsity::FormatConverter<Eigen::half> converter(
4600 vector_shape, *input_tensor.sparsity);
4601 converter.SparseToDense(
4602 static_cast<const Eigen::half*>(input_tensor.data.data), dense_size,
4603 unpacked_fp16_data, context);
4604 if (should_dequantize) {
4605 // we need to dequantize the fp16 dense tensor
4606 std::vector<float> float_dense_data(dense_size);
4607 for (int i = 0; i < dense_size; ++i) {
4608 float_dense_data[i] = fp16_ieee_to_fp32_value(
4609 reinterpret_cast<uint16_t*>(output_data.data())[i]);
4610 }
4611 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
4612 ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
4613 float_dense_data, output_tensor.params, &new_tensor_index));
4614 } else {
4615 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<uint16_t>(
4616 ANEURALNETWORKS_TENSOR_FLOAT16, kTfLiteFloat16, output_tensor.dims,
4617 output_data, output_tensor.params, &new_tensor_index));
4618 }
4619 break;
4620 }
4621 case kTfLiteInt8: {
4622 dense_size = output_tensor.bytes / sizeof(int8_t);
4623 std::vector<int8_t> output_data(dense_size);
4624 tflite::optimize::sparsity::FormatConverter<int8_t> converter(
4625 vector_shape, *input_tensor.sparsity);
4626 converter.SparseToDense(
4627 static_cast<const int8_t*>(input_tensor.data.data), dense_size,
4628 output_data.data(), context);
4629 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<int8_t>(
4630 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
4631 output_tensor.dims, output_data, output_tensor.params,
4632 &new_tensor_index));
4633 break;
4634 }
4635 default: {
4636 return kTfLiteError;
4637 }
4638 }
4639 return kTfLiteOk;
4640 }
4641
AddOpsAndTensors(TfLiteContext * context,int * nnapi_errno,bool allow_dynamic_dimensions)4642 TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
4643 TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) {
4644 DequantizeMapping dequantize_mapping;
4645 // The operand builder allows creating a single op. It is created outside
4646 // the for loop to avoid reallocating the vectors.
4647 NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
4648 &dequantize_mapping, &allocation_memory_mapping_,
4649 &nnapi_to_tflite_op_mapping_, nn_model_.get(),
4650 nnapi_errno, allow_dynamic_dimensions);
4651 // If we have target accelerators the target SDK version might be
4652 // different than the current android version.
4653 target_feature_level_ = nnapi_->nnapi_runtime_feature_level;
4654 if (!nnapi_devices_.empty()) {
4655 TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
4656 context, nnapi_, nnapi_devices_, &target_feature_level_, nnapi_errno));
4657 }
4658 // First path, handle const fp16->fp32 dequantize and densify if needed.
4659 for (auto node_index : nodes_) {
4660 TfLiteNode* node = nullptr;
4661 TfLiteRegistration* registration = nullptr;
4662 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
4663 context, node_index, &node, ®istration));
4664 if (IsDequantizeConstFloat16(context, node, registration)) {
4665 builder.AddTensorInput(node->inputs->data[0], /*hybrid_op=*/false,
4666 NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION);
4667 }
4668 if (IsDensifyConstTensor(context, node, registration)) {
4669 densify_output_to_node_mapping_[node->outputs->data[0]] = node_index;
4670 }
4671 if (IsDequantizeNonConstFloat16(context, node, registration)) {
4672 non_const_dequantize_output_to_node_mapping_[node->outputs->data[0]] =
4673 node_index;
4674 }
4675 }
4676 // Clear the input and output lists for the dequantize path.
4677 builder.ClearInputOuputLists();
4678
4679 // Add other tensors.
4680 for (auto node_index : nodes_) {
4681 // Obtain the op and registration.
4682 TfLiteNode* node;
4683 TfLiteRegistration* reg;
4684 TF_LITE_ENSURE_STATUS(
4685 context->GetNodeAndRegistration(context, node_index, &node, ®));
4686 // skip DENSIFY -> DEQUANTIZE as they are handled elsewhere.
4687 if (IsDensifyConstTensor(context, node, reg) ||
4688 IsDequantizeNonConstFloat16(context, node, reg)) {
4689 continue;
4690 }
4691
4692 // Delegate PACK by lowering it into CONCAT + RESHAPE.
4693 if (reg->builtin_code == kTfLiteBuiltinPack) {
4694 TF_LITE_ENSURE_STATUS(
4695 builder.TransformPackIntoSupportedOps(node_index, node, reg));
4696 continue;
4697 }
4698 // Fully quantized full LSTM.
4699 if (target_feature_level_ >= kMinSdkVersionForNNAPI13 &&
4700 reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4701 context->tensors[node->inputs->data[0]].type == kTfLiteInt8) {
4702 const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM;
4703
4704 constexpr int kInputTensor = 0;
4705 constexpr int kInputToInputWeightsTensor = 1;
4706 constexpr int kRecurrentToInputWeightsTensor = 5;
4707 constexpr int kInputGateBiasTensor = 12;
4708 constexpr int kForgetGateBiasTensor = 13;
4709 constexpr int kCellGateBiasTensor = 14;
4710 constexpr int kOutputGateBiasTensor = 15;
4711 constexpr int kProjectionWeightsTensor = 16;
4712 constexpr int kProjectionBiasTensor = 17;
4713 constexpr int kPrevOutputTensor = 18;
4714
4715 // Add input tensors.
4716 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4717 const auto input_index = node->inputs->data[input_pos];
4718 if (input_index == kTfLiteOptionalTensor) {
4719 if (input_pos == kInputToInputWeightsTensor ||
4720 input_pos == kRecurrentToInputWeightsTensor ||
4721 input_pos == kProjectionWeightsTensor) {
4722 TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0));
4723 } else if (input_pos == kInputGateBiasTensor ||
4724 input_pos == kForgetGateBiasTensor ||
4725 input_pos == kCellGateBiasTensor ||
4726 input_pos == kOutputGateBiasTensor ||
4727 input_pos == kProjectionBiasTensor) {
4728 TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0));
4729 } else { // cell-to-* and layer norm weights.
4730 TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0));
4731 }
4732 } else {
4733 // Only input and previous output use INT8_ASYM_SIGNED.
4734 int flags =
4735 (input_pos == kInputTensor || input_pos == kPrevOutputTensor)
4736 ? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED
4737 : 0;
4738 TF_LITE_ENSURE_STATUS(
4739 builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags));
4740 }
4741 }
4742
4743 // Add clip parameters.
4744 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
4745 TF_LITE_ENSURE_STATUS(
4746 builder.AddScalarFloat32Operand(builtin->cell_clip));
4747 TF_LITE_ENSURE_STATUS(
4748 builder.AddScalarFloat32Operand(builtin->proj_clip));
4749
4750 // Add quantization parameters for intermediate tensors.
4751 TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5);
4752 for (int intermediate_pos = 0;
4753 intermediate_pos < node->intermediates->size; ++intermediate_pos) {
4754 const auto intermediate_index =
4755 node->intermediates->data[intermediate_pos];
4756 const TfLiteTensor& tensor = context->tensors[intermediate_index];
4757 TfLiteAffineQuantization* quantization_params =
4758 static_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
4759 if (intermediate_pos == 4) {
4760 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4761 quantization_params->zero_point->data[0]));
4762 }
4763 TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand(
4764 quantization_params->scale->data[0]));
4765 }
4766
4767 // Activation state output.
4768 int ann_index;
4769 builder.AddStateInt8AsymTensor(
4770 node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index);
4771 model_state_outputs_.push_back(ann_index);
4772 model_state_tfl_inputs_.push_back(
4773 node->inputs->data[/*kInputActivationStateTensor*/ 18]);
4774
4775 // Cell state output.
4776 builder.AddStateInt16Tensor(
4777 node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index);
4778 model_state_outputs_.push_back(ann_index);
4779 model_state_tfl_inputs_.push_back(
4780 node->inputs->data[/*kInputCellStateTensor*/ 19]);
4781
4782 // Add output tensors.
4783 for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
4784 const auto output_index = node->outputs->data[output_pos];
4785 TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(
4786 output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
4787 }
4788
4789 builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index);
4790 continue;
4791 }
4792
4793 const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
4794 const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
4795 const bool need_int8_conversion =
4796 target_feature_level_ < kMinSdkVersionForNNAPI13 &&
4797 NeedInt8Conversion(context, reg->builtin_code, node);
4798 const bool use_int8_asymm_signed =
4799 target_feature_level_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
4800
4801 // skip DEQUANTIZE (fp16 -> fp32) as it is handled elsewhere
4802 if (IsDequantizeConstFloat16(context, node, reg)) {
4803 continue;
4804 }
4805
4806 int input_tensor_flags = 0;
4807 if (scalar_as_tensor) {
4808 input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
4809 }
4810 if (use_int8_asymm_signed) {
4811 input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
4812 }
4813
4814 // On SDK level less than 30, h_swish will be lowered into supported NNAPI
4815 // operations. Since SDK level 30, h_swish is supported as a single
4816 // operation.
4817 if (reg->builtin_code == kTfLiteBuiltinHardSwish &&
4818 nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) {
4819 builder.TransformHardSwishIntoSupportedOps(
4820 node->inputs->data[0], node->outputs->data[0], need_int8_conversion,
4821 node_index);
4822 continue;
4823 }
4824 // Map inputs to NN API tensor indices.
4825 for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4826 if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
4827 // Everything is added during Map since input tensors
4828 // have different order.
4829 continue;
4830 }
4831 if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
4832 node->inputs->data[input_pos] == kTfLiteOptionalTensor) {
4833 // skip optional bias and handle it during mapping
4834 continue;
4835 }
4836 const auto input_index = node->inputs->data[input_pos];
4837 // handle sparse weights for Conv2d
4838 if (reg->builtin_code == kTfLiteBuiltinConv2d && input_pos == 1) {
4839 int densify_node_id = -1;
4840 bool should_dequantize = false;
4841 int dequantize_node_id =
4842 non_const_dequantize_output_to_node_mapping_[input_index];
4843 if (dequantize_node_id != -1) {
4844 should_dequantize = true;
4845 // Find densify->dequantize pattern.
4846 TfLiteNode* dequant_node;
4847 TfLiteRegistration* reg;
4848 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
4849 context, dequantize_node_id, &dequant_node, ®));
4850 densify_node_id =
4851 densify_output_to_node_mapping_[dequant_node->inputs->data[0]];
4852 } else {
4853 densify_node_id = densify_output_to_node_mapping_[input_index];
4854 }
4855 if (densify_node_id != -1) {
4856 TF_LITE_ENSURE_STATUS(DensifyAndDequantizeConstTensor(
4857 context, densify_node_id, should_dequantize, builder));
4858 continue;
4859 }
4860 }
4861 if (need_int8_conversion &&
4862 (input_pos == 0 ||
4863 reg->builtin_code == kTfLiteBuiltinFullyConnected ||
4864 reg->builtin_code == kTfLiteBuiltinConv2d ||
4865 reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d ||
4866 reg->builtin_code == kTfLiteBuiltinAdd ||
4867 reg->builtin_code == kTfLiteBuiltinMul ||
4868 reg->builtin_code == kTfLiteBuiltinSub ||
4869 reg->builtin_code == kTfLiteBuiltinConcatenation ||
4870 reg->builtin_code == kTfLiteBuiltinMaximum ||
4871 reg->builtin_code == kTfLiteBuiltinMinimum ||
4872 reg->builtin_code == kTfLiteBuiltinLeakyRelu ||
4873 reg->builtin_code == kTfLiteBuiltinLess ||
4874 reg->builtin_code == kTfLiteBuiltinLessEqual ||
4875 reg->builtin_code == kTfLiteBuiltinPrelu ||
4876 reg->builtin_code == kTfLiteBuiltinGreater ||
4877 reg->builtin_code == kTfLiteBuiltinGreaterEqual ||
4878 reg->builtin_code == kTfLiteBuiltinEqual ||
4879 reg->builtin_code == kTfLiteBuiltinNotEqual ||
4880 reg->builtin_code == kTfLiteBuiltinSelect)) {
4881 // Only selected inputs require int8 conversion.
4882 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(
4883 input_index, hybrid_op,
4884 input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION));
4885 continue;
4886 }
4887 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4888 input_pos >= 20) {
4889 // Skip layer normalization weights. They are added in the Map
4890 // function (after all the other inputs added there) since layer
4891 // normalization weights are the last four inputs of the LSTM op in
4892 // NNAPI.
4893 continue;
4894 }
4895 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
4896 // Configuring all inputs in the Map function
4897 continue;
4898 }
4899 if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) {
4900 if (input_pos >= 20) {
4901 // Skip layer normalization weights. They are added in the Map
4902 // function (after all the other inputs added there) since layer
4903 // normalization weights are the last four inputs of the
4904 // unidirectional sequence LSTM op in NNAPI.
4905 continue;
4906 }
4907 if (input_index == kTfLiteOptionalTensor) {
4908 TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4909 continue;
4910 }
4911 }
4912 if ((reg->builtin_code == kTfLiteBuiltinSplit) &&
4913 (input_index == node->inputs->data[0])) {
4914 // Skip the axis input tensor; it will be added as a scalar operand
4915 // by the Map() mapping.
4916 continue;
4917 }
4918
4919 // Pad and Padv2 have an optional parameter for a pad value which has
4920 // to be converted to a scalar type in NN API.
4921 if ((reg->builtin_code == kTfLiteBuiltinPadv2 ||
4922 reg->builtin_code == kTfLiteBuiltinPad) &&
4923 node->inputs->size == 3 && input_pos == 2) {
4924 const int constant_value_id = node->inputs->data[2];
4925 if (constant_value_id == kTfLiteOptionalTensor) {
4926 continue;
4927 }
4928 const TfLiteTensor constant_value = context->tensors[constant_value_id];
4929
4930 switch (constant_value.type) {
4931 case kTfLiteFloat32:
4932 if (constant_value.allocation_type == kTfLiteMmapRo) {
4933 builder.AddScalarFloat32Operand(*constant_value.data.f);
4934 } else {
4935 builder.AddSingleValueTensorAsScalarOperand(
4936 constant_value_id, ANEURALNETWORKS_FLOAT32);
4937 }
4938 break;
4939 case kTfLiteUInt8:
4940 if (constant_value.allocation_type == kTfLiteMmapRo) {
4941 builder.AddScalarInt32Operand(
4942 static_cast<int32_t>(*constant_value.data.uint8));
4943 } else {
4944 builder.AddSingleValueTensorAsScalarOperand(
4945 constant_value_id, ANEURALNETWORKS_INT32);
4946 }
4947 break;
4948 case kTfLiteInt8:
4949 if (constant_value.allocation_type == kTfLiteMmapRo) {
4950 if (need_int8_conversion) {
4951 builder.AddScalarInt32Operand(
4952 static_cast<int32_t>(*constant_value.data.int8) + 128);
4953 } else {
4954 builder.AddScalarInt32Operand(*constant_value.data.int8);
4955 }
4956 } else {
4957 builder.AddSingleValueTensorAsScalarOperand(
4958 constant_value_id, ANEURALNETWORKS_INT32);
4959 }
4960 break;
4961 default:
4962 context->ReportError(context,
4963 "Unsupported type of pad value for pad_v2\n");
4964 return kTfLiteError;
4965 }
4966 continue;
4967 }
4968
4969 if (input_index == kTfLiteOptionalTensor &&
4970 (reg->builtin_code == kTfLiteBuiltinLstm ||
4971 reg->builtin_code == kTfLiteBuiltinSvdf ||
4972 reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) {
4973 // properly handle the optional tensor for LSTM and SVDF.
4974 // currently only support float32.
4975 TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4976 } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear ||
4977 reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) {
4978 if (input_pos == 0) {
4979 // Only the first input tensor is added. The second one,
4980 // specifying the output height and width, is not added and
4981 // instead the height and width will be added individually as
4982 // scalars by the mapping function returned by Map().
4983 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4984 input_tensor_flags));
4985 }
4986 } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
4987 // The K parameter tensor is not handled here but by the functor
4988 // returned by Map, the input tensor is instead added in
4989 // the else clause below
4990 continue;
4991 } else if (reg->builtin_code == kTfLiteBuiltinGather) {
4992 // Everything else is added during Map since input tensors
4993 // have different order.
4994 if (input_pos == 0) {
4995 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4996 input_tensor_flags));
4997 }
4998 continue;
4999 } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
5000 input_pos == 1) {
5001 // The axis param is added during Map
5002 continue;
5003 } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd &&
5004 input_pos == 2) {
5005 // NNAPI does not support crops.
5006 // The Map function will check if all crops are zero.
5007 continue;
5008 } else if (reg->builtin_code == kTfLiteBuiltinArgMin ||
5009 reg->builtin_code == kTfLiteBuiltinArgMax) {
5010 // The first input tensor is added as is. The second one, specifying
5011 // the axis, needs to be converted to a scalar since TFLite uses a
5012 // tensor but NNAPI uses a scalar as the axis.
5013 if (input_pos == 0) {
5014 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5015 input_tensor_flags));
5016 } else {
5017 const int axis_id = node->inputs->data[1];
5018 const TfLiteTensor& axis_tensor = context->tensors[axis_id];
5019 switch (axis_tensor.type) {
5020 case kTfLiteInt32:
5021 if (axis_tensor.allocation_type == kTfLiteMmapRo) {
5022 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5023 static_cast<int32_t>(*axis_tensor.data.i32)));
5024 } else {
5025 TF_LITE_ENSURE_STATUS(
5026 builder.AddSingleValueTensorAsScalarOperand(
5027 axis_id, ANEURALNETWORKS_INT32));
5028 }
5029 break;
5030 case kTfLiteInt64:
5031 // Map() function already makes sure int64 input is constant.
5032 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5033 static_cast<int32_t>(*axis_tensor.data.i64)));
5034 break;
5035 default:
5036 return kTfLiteError;
5037 }
5038 }
5039 } else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
5040 reg->builtin_code == kTfLiteBuiltinMinimum) {
5041 const TfLiteTensor& operand_tensor =
5042 context->tensors[node->inputs->data[input_pos]];
5043 if (operand_tensor.dims->size == 0) {
5044 int tensor_index;
5045
5046 TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type,
5047 kTfLiteMmapRo);
5048 switch (operand_tensor.type) {
5049 case kTfLiteFloat32:
5050 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5051 ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1},
5052 std::vector<float>(1, operand_tensor.data.f[0]),
5053 operand_tensor.params, &tensor_index));
5054 break;
5055 case kTfLiteUInt8:
5056 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5057 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1},
5058 std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
5059 operand_tensor.params, &tensor_index));
5060 break;
5061 case kTfLiteInt8: {
5062 auto params = operand_tensor.params;
5063 if (params.scale == 0.0) {
5064 params.scale = 1.0;
5065 }
5066
5067 if (use_int8_asymm_signed) {
5068 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5069 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
5070 operand_tensor.type, {1},
5071 std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
5072 &tensor_index));
5073 } else {
5074 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5075 ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
5076 {1},
5077 std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
5078 params, &tensor_index));
5079 }
5080 } break;
5081 case kTfLiteInt32:
5082 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5083 ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
5084 std::vector<int32_t>(1, operand_tensor.data.i32[0]),
5085 operand_tensor.params, &tensor_index));
5086 break;
5087 default:
5088 return kTfLiteError;
5089 }
5090 } else {
5091 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5092 input_tensor_flags));
5093 }
5094 } else if ((reg->builtin_code == kTfLiteBuiltinReduceAny ||
5095 reg->builtin_code == kTfLiteBuiltinReduceMax ||
5096 reg->builtin_code == kTfLiteBuiltinReduceMin ||
5097 reg->builtin_code == kTfLiteBuiltinReduceProd ||
5098 reg->builtin_code == kTfLiteBuiltinSum) &&
5099 (input_pos == 1)) {
5100 // The axis needs, be converted to a tensor if specified as scalar
5101 const TfLiteTensor& axis_tensor =
5102 context->tensors[node->inputs->data[input_pos]];
5103 if (axis_tensor.dims->size == 0) {
5104 TF_LITE_ENSURE_STATUS(
5105 builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
5106 } else {
5107 TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5108 input_tensor_flags));
5109 }
5110 } else if (reg->builtin_code == kTfLiteBuiltinFill) {
5111 if (input_pos == 0) {
5112 const int dims_id = node->inputs->data[0];
5113 const TfLiteTensor& dims_tensor = context->tensors[dims_id];
5114 switch (dims_tensor.type) {
5115 case kTfLiteInt32:
5116 TF_LITE_ENSURE_STATUS(
5117 builder.AddTensorInput(input_index, hybrid_op));
5118 break;
5119 case kTfLiteInt64: {
5120 // We made sure that dimensions are constant and fit into int32
5121 // in Map(), so we can safely create a new tensor with casted
5122 // values.
5123 const int dims_size = dims_tensor.dims->data[0];
5124 std::vector<int32_t> dims_int32(dims_size);
5125 std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size,
5126 dims_int32.begin());
5127 int new_tensor_index = -1;
5128 builder.AddNewInputConstantTensor(
5129 ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims,
5130 dims_int32, dims_tensor.params, &new_tensor_index);
5131 } break;
5132 default:
5133 return kTfLiteError;
5134 }
5135 } else {
5136 const int value_id = node->inputs->data[1];
5137 const TfLiteTensor& value_tensor = context->tensors[value_id];
5138 switch (value_tensor.type) {
5139 case kTfLiteFloat32:
5140 if (value_tensor.allocation_type == kTfLiteMmapRo) {
5141 TF_LITE_ENSURE_STATUS(
5142 builder.AddScalarFloat32Operand(*value_tensor.data.f));
5143 } else {
5144 TF_LITE_ENSURE_STATUS(
5145 builder.AddSingleValueTensorAsScalarOperand(
5146 value_id, ANEURALNETWORKS_FLOAT32));
5147 }
5148 break;
5149 case kTfLiteInt32:
5150 if (value_tensor.allocation_type == kTfLiteMmapRo) {
5151 TF_LITE_ENSURE_STATUS(
5152 builder.AddScalarInt32Operand(*value_tensor.data.i32));
5153 } else {
5154 TF_LITE_ENSURE_STATUS(
5155 builder.AddSingleValueTensorAsScalarOperand(
5156 value_id, ANEURALNETWORKS_INT32));
5157 }
5158 break;
5159 case kTfLiteInt64:
5160 if (value_tensor.allocation_type == kTfLiteMmapRo) {
5161 // Map() function already makes sure const int64 input fits into
5162 // int32.
5163 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5164 static_cast<int32_t>(*value_tensor.data.i64)));
5165 } else {
5166 TF_LITE_ENSURE_STATUS(
5167 builder.AddSingleValueTensorAsScalarOperand(
5168 value_id, ANEURALNETWORKS_INT32));
5169 }
5170 break;
5171 default:
5172 return kTfLiteError;
5173 }
5174 }
5175 } else {
5176 TF_LITE_ENSURE_STATUS(
5177 builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
5178 }
5179 }
5180
5181 // Get op type and operands
5182 // Fails if the Validate function failed
5183 int nn_op_type;
5184 TF_LITE_ENSURE_STATUS(
5185 Map(context, reg->builtin_code, reg->version, target_feature_level_,
5186 {context, &builder, node, node_index, &model_state_outputs_,
5187 &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
5188 &nn_op_type));
5189
5190 // Map outputs to NN API tensor indices.
5191 int output_tensor_flags = 0;
5192 if (need_int8_conversion) {
5193 output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
5194 }
5195 if (use_int8_asymm_signed) {
5196 output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
5197 }
5198 // fc_nn_intermediate_output_index is used to indicate whether additional
5199 // RESHAPE op is needed.
5200 int fc_nn_intermediate_output_index = -1;
5201 for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
5202 auto output_index = node->outputs->data[output_pos];
5203
5204 // Outputs for basic LSTM cell are set in the Map function since
5205 if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
5206 continue;
5207 }
5208 // Handle FC with keep_num_dims==true.
5209 if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
5210 reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data)
5211 ->keep_num_dims) {
5212 auto& output_tensor = context->tensors[output_index];
5213
5214 int num_units = output_tensor.dims->data[output_tensor.dims->size - 1];
5215 std::vector<uint32_t> output_dims(2);
5216 output_dims[0] = NumElements(output_tensor.dims) / num_units;
5217 output_dims[1] = num_units;
5218 TF_LITE_ENSURE_STATUS(builder.AddIntermediateOutputTensor(
5219 output_tensor.type, output_dims.size(), output_dims.data(),
5220 output_tensor.params.scale, output_tensor.params.zero_point,
5221 &fc_nn_intermediate_output_index));
5222 } else {
5223 TF_LITE_ENSURE_STATUS(
5224 builder.AddTensorOutput(output_index, output_tensor_flags));
5225 }
5226 }
5227
5228 // Dequantize operators may have to be added in case inputs are to be
5229 // floating-point.
5230 AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
5231 node_index, &builder, nnapi_errno);
5232
5233 TF_LITE_ENSURE_OK(context_,
5234 builder.FinalizeAddOperation(nn_op_type, node_index));
5235 if (fc_nn_intermediate_output_index > -1) {
5236 TF_LITE_ENSURE_STATUS(builder.AppendReshape(
5237 fc_nn_intermediate_output_index, node->outputs->data[0], node_index));
5238 }
5239 }
5240 return kTfLiteOk;
5241 }
5242
BuildGraph(TfLiteContext * context,const StatefulNnApiDelegate::Options & delegate_options,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors,int * nnapi_errno)5243 TfLiteStatus NNAPIDelegateKernel::BuildGraph(
5244 TfLiteContext* context,
5245 const StatefulNnApiDelegate::Options& delegate_options,
5246 const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
5247 int* nnapi_errno) {
5248 // Build the ops and tensors.
5249 TF_LITE_ENSURE_STATUS(AddOpsAndTensors(
5250 context, nnapi_errno, delegate_options.allow_dynamic_dimensions));
5251 // Map input and output tensor indices to ANN
5252 std::vector<uint32_t> inputs;
5253 inputs.reserve(input_tensors->size);
5254 std::vector<uint32_t> outputs;
5255 outputs.reserve(output_tensors->size);
5256
5257 size_t total_input_byte_size = 0;
5258 // Make the TensorFlow Lite inputs and outputs to ann_indices.
5259 for (int i : TfLiteIntArrayView(input_tensors)) {
5260 // Constant tensors are not NNAPI inputs.
5261 if (i != kTfLiteOptionalTensor &&
5262 context->tensors[i].allocation_type != kTfLiteMmapRo &&
5263 // The delegate might not have mapped this input (this can
5264 // happen if one tensor is split in several ones)
5265 operand_mapping_.lite_index_to_ann(i) != -1) {
5266 inputs.push_back(operand_mapping_.lite_index_to_ann(i));
5267 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
5268 continue;
5269 }
5270 const TfLiteType nn_type_conversion =
5271 operand_mapping_.lite_index_to_ann_type_conversion(i);
5272 int tensor_size = 0;
5273 if (nn_type_conversion == kTfLiteNoType) {
5274 tensor_size = context->tensors[i].bytes;
5275 } else {
5276 size_t type_size;
5277 TF_LITE_ENSURE_OK(
5278 context, GetSizeOfType(context, nn_type_conversion, &type_size));
5279 tensor_size = NumElements(&context->tensors[i]) * type_size;
5280 }
5281 total_input_byte_size += tensor_size;
5282 total_input_byte_size += GetNumPaddingBytes(tensor_size);
5283 }
5284 }
5285
5286 size_t total_output_byte_size = 0;
5287 for (int i : TfLiteIntArrayView(output_tensors)) {
5288 const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i);
5289 // Unmapped outputs are not added
5290 if (output_tensor_ann_index != -1) {
5291 outputs.push_back(output_tensor_ann_index);
5292 }
5293 if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
5294 continue;
5295 }
5296 total_output_byte_size += context->tensors[i].bytes;
5297 total_output_byte_size += GetNumPaddingBytes(context->tensors[i].bytes);
5298 }
5299
5300 // Add state output tensors as model outputs.
5301 for (int i = 0; i < model_state_outputs_.size(); i++) {
5302 outputs.push_back(model_state_outputs_[i]);
5303 auto tfl_state_idx = model_state_tfl_inputs_[i];
5304 total_output_byte_size += context->tensors[tfl_state_idx].bytes;
5305 total_output_byte_size +=
5306 GetNumPaddingBytes(context->tensors[tfl_state_idx].bytes);
5307 }
5308
5309 // Tell ANN to declare inputs/outputs
5310 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5311 context,
5312 nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
5313 nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
5314 outputs.data()),
5315 "identifying model inputs and outputs", nnapi_errno);
5316
5317 auto allow_fp16 =
5318 context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
5319 if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
5320 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5321 context,
5322 nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
5323 nn_model_.get(), allow_fp16),
5324 "set relaxed computation mode for fp32 if possible", nnapi_errno);
5325 }
5326
5327 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5328 context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
5329 "finalizing the model", nnapi_errno);
5330
5331 // Create shared memory pool for inputs and outputs.
5332 nn_input_memory_.reset(
5333 new NNMemory(nnapi_, "input_pool", total_input_byte_size));
5334 nn_output_memory_.reset(
5335 new NNMemory(nnapi_, "output_pool", total_output_byte_size));
5336
5337 return kTfLiteOk;
5338 }
5339
5340 } // namespace nnapi
5341 } // namespace delegate
5342
5343 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
5344 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11;
5345 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
5346 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
5347
Data(const NnApi * nnapi)5348 StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {}
Data(std::unique_ptr<const NnApi> nnapi)5349 StatefulNnApiDelegate::Data::Data(std::unique_ptr<const NnApi> nnapi)
5350 : nnapi(nnapi.get()), owned_nnapi(std::move(nnapi)) {}
5351
~Data()5352 StatefulNnApiDelegate::Data::~Data() {
5353 std::for_each(std::begin(delegate_state_cache),
5354 std::end(delegate_state_cache),
5355 [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
5356 delete entry.second;
5357 });
5358 }
5359
CacheDelegateKernel(const TfLiteDelegateParams * delegate_params,NNAPIDelegateKernel * delegate_state)5360 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
5361 const TfLiteDelegateParams* delegate_params,
5362 NNAPIDelegateKernel* delegate_state) {
5363 const int cache_key = delegate_params->nodes_to_replace->data[0];
5364 delegate_state_cache.emplace(cache_key, delegate_state);
5365 }
5366
MaybeGetCachedDelegateKernel(const TfLiteDelegateParams * delegate_params)5367 NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel(
5368 const TfLiteDelegateParams* delegate_params) {
5369 const int cache_key = delegate_params->nodes_to_replace->data[0];
5370 const auto cached_state = delegate_state_cache.find(cache_key);
5371 if (cached_state != std::end(delegate_state_cache)) {
5372 auto result = cached_state->second;
5373 delegate_state_cache.erase(cached_state);
5374 return result;
5375 } else {
5376 return nullptr;
5377 }
5378 }
5379
StatefulNnApiDelegateConstructorImpl(const Options & options)5380 void StatefulNnApiDelegate::StatefulNnApiDelegateConstructorImpl(
5381 const Options& options) {
5382 if (options.accelerator_name) {
5383 delegate_data_.accelerator_name = options.accelerator_name;
5384 }
5385 if (options.cache_dir) {
5386 delegate_data_.cache_dir = options.cache_dir;
5387 }
5388 if (options.model_token) {
5389 delegate_data_.model_token = options.model_token;
5390 }
5391 delegate_data_.execution_preference = options.execution_preference;
5392 delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
5393 delegate_data_.max_number_delegated_partitions =
5394 options.max_number_delegated_partitions;
5395 delegate_data_.allow_fp16 = options.allow_fp16;
5396 delegate_data_.execution_priority = options.execution_priority;
5397 delegate_data_.max_compilation_timeout_duration_ns =
5398 options.max_compilation_timeout_duration_ns;
5399 delegate_data_.max_execution_timeout_duration_ns =
5400 options.max_execution_timeout_duration_ns;
5401 delegate_data_.max_execution_loop_timeout_duration_ns =
5402 options.max_execution_loop_timeout_duration_ns;
5403 if (delegate_data_.nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
5404 delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
5405 }
5406 delegate_data_.use_burst_computation = options.use_burst_computation;
5407 TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
5408 "Created TensorFlow Lite delegate for NNAPI.");
5409 Prepare = DoPrepare;
5410 CopyFromBufferHandle = DoCopyFromBufferHandle;
5411 CopyToBufferHandle = DoCopyToBufferHandle;
5412 FreeBufferHandle = DoFreeBufferHandle;
5413 data_ = &delegate_data_;
5414 if (delegate_data_.allow_dynamic_dimensions) {
5415 flags |= kTfLiteDelegateFlagsAllowDynamicTensors;
5416 flags |= kTfLiteDelegateFlagsRequirePropagatedShapes;
5417 }
5418 }
5419
StatefulNnApiDelegate(const NnApi * nnapi)5420 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi)
5421 : StatefulNnApiDelegate(nnapi, Options()) {}
5422
StatefulNnApiDelegate(Options options)5423 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
5424 : StatefulNnApiDelegate(NnApiImplementation(), options) {}
5425
StatefulNnApiDelegate(const NnApiSLDriverImplFL5 * nnapi_support_library_driver,Options options)5426 StatefulNnApiDelegate::StatefulNnApiDelegate(
5427 const NnApiSLDriverImplFL5* nnapi_support_library_driver, Options options)
5428 : TfLiteDelegate(TfLiteDelegateCreate()),
5429 delegate_data_(
5430 CreateNnApiFromSupportLibrary(nnapi_support_library_driver)) {
5431 StatefulNnApiDelegateConstructorImpl(options);
5432 }
5433
StatefulNnApiDelegate(const NnApi * nnapi,Options options)5434 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
5435 Options options)
5436 : TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) {
5437 StatefulNnApiDelegateConstructorImpl(options);
5438 }
5439
StatefulNnApiDelegate()5440 StatefulNnApiDelegate::StatefulNnApiDelegate()
5441 : StatefulNnApiDelegate(Options()) {}
5442
GetOptions(TfLiteDelegate * delegate)5443 const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
5444 TfLiteDelegate* delegate) {
5445 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5446 StatefulNnApiDelegate::Options options;
5447 options.execution_preference = delegate_data->execution_preference;
5448 options.accelerator_name = delegate_data->accelerator_name.empty()
5449 ? nullptr
5450 : delegate_data->accelerator_name.c_str();
5451 options.cache_dir = delegate_data->cache_dir.empty()
5452 ? nullptr
5453 : delegate_data->cache_dir.c_str();
5454 options.model_token = delegate_data->model_token.empty()
5455 ? nullptr
5456 : delegate_data->model_token.c_str();
5457 options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
5458 options.max_number_delegated_partitions =
5459 delegate_data->max_number_delegated_partitions;
5460 options.allow_fp16 = delegate_data->allow_fp16;
5461 options.execution_priority = delegate_data->execution_priority;
5462 options.max_compilation_timeout_duration_ns =
5463 delegate_data->max_compilation_timeout_duration_ns;
5464 options.max_execution_timeout_duration_ns =
5465 delegate_data->max_execution_timeout_duration_ns;
5466 options.max_execution_loop_timeout_duration_ns =
5467 delegate_data->max_execution_loop_timeout_duration_ns;
5468 options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
5469 options.use_burst_computation = delegate_data->use_burst_computation;
5470 return options;
5471 }
5472
5473 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
GetTensorMemoryMap(TfLiteDelegate * delegate)5474 StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
5475 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5476 return delegate_data->tensor_memory_map;
5477 }
5478
GetCache(TfLiteDelegate * delegate)5479 delegates::Serialization* StatefulNnApiDelegate::GetCache(
5480 TfLiteDelegate* delegate) {
5481 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5482 return delegate_data->cache.get();
5483 }
5484
RegisterNnapiMemory(ANeuralNetworksMemory * memory,CopyToHostTensorFnPtr callback,void * callback_context)5485 TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
5486 ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback,
5487 void* callback_context) {
5488 int map_size = delegate_data_.tensor_memory_map.size();
5489 for (int i = 0; i < map_size; i++) {
5490 if (delegate_data_.tensor_memory_map[i].memory == nullptr) {
5491 delegate_data_.tensor_memory_map[i] = {memory, callback,
5492 callback_context};
5493 return i;
5494 }
5495 }
5496 delegate_data_.tensor_memory_map.push_back(
5497 {memory, callback, callback_context});
5498 return map_size;
5499 }
5500
DoCopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)5501 TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle(
5502 TfLiteContext* context, TfLiteDelegate* delegate,
5503 TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
5504 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5505 if (buffer_handle < 0 ||
5506 buffer_handle >= delegate_data->tensor_memory_map.size()) {
5507 return kTfLiteError;
5508 }
5509 auto memory = delegate_data->tensor_memory_map[buffer_handle].memory;
5510 auto callback = delegate_data->tensor_memory_map[buffer_handle].callback;
5511 auto callback_context =
5512 delegate_data->tensor_memory_map[buffer_handle].callback_context;
5513 if (!memory || !callback) {
5514 return kTfLiteError;
5515 }
5516 return callback(tensor, memory, 0, tensor->bytes, callback_context);
5517 }
5518
DoCopyToBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)5519 TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle(
5520 TfLiteContext* context, TfLiteDelegate* delegate,
5521 TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
5522 return kTfLiteError;
5523 }
5524
DoFreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)5525 void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context,
5526 TfLiteDelegate* delegate,
5527 TfLiteBufferHandle* handle) {
5528 auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5529 if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) {
5530 delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr};
5531 *handle = kTfLiteNullBufferHandle;
5532 }
5533 }
5534
GetNnApiErrno() const5535 int StatefulNnApiDelegate::GetNnApiErrno() const {
5536 return delegate_data_.nnapi_errno;
5537 }
5538
5539 // static
GetNodesSupportedByAccelerator(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,const std::vector<int> & supported_nodes,std::vector<int> * device_supported_nodes,int * num_partitions,TfLiteDelegateParams ** params_array,int * nnapi_errno)5540 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
5541 TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
5542 const std::vector<int>& supported_nodes,
5543 std::vector<int>* device_supported_nodes, int* num_partitions,
5544 TfLiteDelegateParams** params_array, int* nnapi_errno) {
5545 auto* delegate_data = static_cast<Data*>(delegate->data_);
5546 // The first entry in the array is the element count
5547
5548 auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
5549 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5550 context, supported_nodes_int_array.get(), params_array, num_partitions));
5551 // For each partition check if which nodes are actually supported by the
5552 // target accelerators.
5553 delegate_data->delegate_state_cache.clear();
5554 for (int idx = 0; idx < *num_partitions; idx++) {
5555 const auto& partition_params = (*params_array)[idx];
5556 std::unique_ptr<NNAPIDelegateKernel> kernel_state(
5557 new NNAPIDelegateKernel(nnapi));
5558 TfLiteDelegateParams params_with_delegate = partition_params;
5559 params_with_delegate.delegate = delegate;
5560 TF_LITE_ENSURE_STATUS(
5561 kernel_state->Init(context, ¶ms_with_delegate, nnapi_errno));
5562 std::vector<int> supported_partition_nodes;
5563 TF_LITE_ENSURE_STATUS(
5564 kernel_state->GetOperationsSupportedByTargetNnApiDevices(
5565 context, &supported_partition_nodes, nnapi_errno));
5566 device_supported_nodes->insert(device_supported_nodes->end(),
5567 supported_partition_nodes.begin(),
5568 supported_partition_nodes.end());
5569
5570 bool model_fully_supported = (supported_partition_nodes.size() ==
5571 partition_params.nodes_to_replace->size);
5572 if (model_fully_supported) {
5573 delegate_data->CacheDelegateKernel(&partition_params,
5574 kernel_state.release());
5575 }
5576 }
5577
5578 if (device_supported_nodes->size() != supported_nodes.size()) {
5579 // We changed the set of nodes to delegate this will create a different
5580 // partitioning layout.
5581 auto device_sup_nodes_int_array =
5582 BuildTfLiteIntArray(*device_supported_nodes);
5583 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5584 context, device_sup_nodes_int_array.get(), params_array,
5585 num_partitions));
5586 }
5587
5588 return kTfLiteOk;
5589 }
5590
5591 // static
LimitDelegatedPartitions(int max_partitions,std::vector<TfLiteDelegateParams> partition_params_array,std::vector<int> * nodes_to_delegate)5592 TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
5593 int max_partitions,
5594 std::vector<TfLiteDelegateParams> partition_params_array,
5595 std::vector<int>* nodes_to_delegate) {
5596 int num_partitions = partition_params_array.size();
5597 if (max_partitions <= 0 || num_partitions <= max_partitions) {
5598 return kTfLiteOk;
5599 }
5600
5601 int number_delegated_partitions = std::count_if(
5602 partition_params_array.begin(), partition_params_array.end(),
5603 [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
5604 return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
5605 partition_params.nodes_to_replace->data[0]) !=
5606 nodes_to_delegate->end();
5607 });
5608
5609 if (number_delegated_partitions > max_partitions) {
5610 std::sort(partition_params_array.begin(), partition_params_array.end(),
5611 [](const TfLiteDelegateParams& left,
5612 const TfLiteDelegateParams& right) -> bool {
5613 // Reverse sort
5614 return left.nodes_to_replace->size >
5615 right.nodes_to_replace->size;
5616 });
5617
5618 nodes_to_delegate->clear();
5619
5620 for (int i = 0; i < max_partitions; i++) {
5621 const TfLiteDelegateParams& partition_params = partition_params_array[i];
5622
5623 nodes_to_delegate->insert(nodes_to_delegate->end(),
5624 partition_params.nodes_to_replace->data,
5625 partition_params.nodes_to_replace->data +
5626 partition_params.nodes_to_replace->size);
5627 }
5628 }
5629
5630 return kTfLiteOk;
5631 }
5632
GetSupportedOpsWithFp16WeightRemapping(TfLiteContext * context,int target_feature_level,bool is_accelerator_specified,int max_number_delegated_partitions)5633 static std::vector<int> GetSupportedOpsWithFp16WeightRemapping(
5634 TfLiteContext* context, int target_feature_level,
5635 bool is_accelerator_specified, int max_number_delegated_partitions) {
5636 std::vector<int> supported_nodes;
5637 delegates::IsNodeSupportedFn node_supported_fn =
5638 [=](TfLiteContext* context, TfLiteNode* node,
5639 TfLiteRegistration* registration,
5640 std::string* unsupported_details) -> bool {
5641 std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
5642 const auto is_supported = NNAPIDelegateKernel::Validate(
5643 context, registration->builtin_code, registration->version,
5644 target_feature_level, node, is_accelerator_specified, &map_failures);
5645 if (!is_supported) {
5646 if (unsupported_details) {
5647 for (auto& failure : map_failures) {
5648 unsupported_details->append(failure.message.c_str());
5649 }
5650 }
5651 return false;
5652 }
5653 return true;
5654 };
5655
5656 delegates::FP16GraphPartitionHelper partition_helper(context,
5657 node_supported_fn);
5658 std::set<std::string> unsupported_nodes_info;
5659 if (partition_helper.Partition(&unsupported_nodes_info) == kTfLiteOk) {
5660 // By default, we simply get 1st largest partition as
5661 // 'max_delegate_partions'
5662 // is set to 1 by default.
5663 supported_nodes = partition_helper.GetNodesOfFirstNLargestPartitions(
5664 max_number_delegated_partitions);
5665 }
5666 return supported_nodes;
5667 }
5668
DoPrepare(TfLiteContext * context,TfLiteDelegate * delegate)5669 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
5670 TfLiteDelegate* delegate) {
5671 auto* delegate_data = static_cast<Data*>(delegate->data_);
5672 int* nnapi_errno = &(delegate_data->nnapi_errno);
5673 const NnApi* nnapi = delegate_data->nnapi;
5674
5675 // Resetting the error code when the delegate is initialized
5676 // by TFLite. This causes the error to be reset if reusing the same
5677 // StatefulNnApiDelegate after a failure
5678 *nnapi_errno = 0;
5679
5680 // Do not check nodes_ if NN API is unavailable.
5681 if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
5682 !nnapi->nnapi_exists) {
5683 return kTfLiteOk;
5684 }
5685
5686 int target_feature_level = nnapi->android_sdk_version;
5687 const StatefulNnApiDelegate::Options delegate_options =
5688 StatefulNnApiDelegate::GetOptions(delegate);
5689 // For NNAPI 1.2+, check if there is any accelerator available.
5690 // If not, don't delegate to NNAPI's CPU reference implementation unless
5691 // it has been specified as target accelerator.
5692 if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5693 if (ShouldUseTargetDevices(delegate_options, nnapi)) {
5694 std::vector<ANeuralNetworksDevice*> devices;
5695 TF_LITE_ENSURE_STATUS(
5696 GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
5697
5698 if (devices.empty()) {
5699 if (delegate_options.accelerator_name) {
5700 // There was a selected device and it is not available.
5701 return kTfLiteError;
5702 } else {
5703 // Only nnapi-reference is available but was disabled by the delegate
5704 // options
5705 return kTfLiteOk;
5706 }
5707 }
5708
5709 TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
5710 context, nnapi, devices, &target_feature_level, nnapi_errno));
5711 } else {
5712 // If no accelerator is specified, only use NNAPI if an accelerator is
5713 // available. Any available accelerator will make the device_count larger
5714 // than 1. More sophisticated check and allowlisting can be added later.
5715 uint32_t device_count = 0;
5716 RETURN_TFLITE_ERROR_IF_NN_ERROR(
5717 context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
5718 "getting number of NNAPI devices", nnapi_errno);
5719 if (device_count <= 1) {
5720 return kTfLiteOk;
5721 }
5722 }
5723 }
5724
5725 std::vector<int> supported_nodes;
5726 // We don't care about all nodes_, we only care about ones in the
5727 // current plan.
5728 TfLiteIntArray* plan;
5729 TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
5730
5731 // Check for every node if it is supported
5732 const bool is_accelerator_specified = ShouldUseTargetDevices(
5733 delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
5734 std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
5735 bool should_prune_fp16_dequantize = false;
5736 for (int i = 0; i < plan->size; ++i) {
5737 const int node_id = plan->data[i];
5738 TfLiteNode* node = nullptr;
5739 TfLiteRegistration* registration = nullptr;
5740 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5741 context, node_id, &node, ®istration));
5742 if (IsDequantizeConstFloat16(context, node, registration)) {
5743 should_prune_fp16_dequantize = true;
5744 break;
5745 }
5746 }
5747 if (should_prune_fp16_dequantize) {
5748 supported_nodes = GetSupportedOpsWithFp16WeightRemapping(
5749 context, target_feature_level, is_accelerator_specified,
5750 delegate_options.max_number_delegated_partitions);
5751 } else {
5752 for (int node_index : TfLiteIntArrayView(plan)) {
5753 TfLiteNode* node;
5754 TfLiteRegistration* registration;
5755 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5756 context, node_index, &node, ®istration));
5757 if (NNAPIDelegateKernel::Validate(
5758 context, registration->builtin_code, registration->version,
5759 target_feature_level, node, is_accelerator_specified,
5760 &map_failures)) {
5761 supported_nodes.push_back(node_index);
5762 }
5763 #ifdef NNAPI_VERBOSE_VALIDATION
5764 for (auto& failure : map_failures) {
5765 TFLITE_LOG_PROD(
5766 TFLITE_LOG_WARNING,
5767 "Operator %s (v%d) refused by NNAPI delegate: %s",
5768 tflite::EnumNameBuiltinOperator(
5769 static_cast<BuiltinOperator>(registration->builtin_code)),
5770 registration->version, failure.message.c_str());
5771 }
5772 map_failures.clear();
5773 #endif
5774 }
5775 }
5776
5777 // If there are no delegated nodes, short-circuit node replacement.
5778 if (supported_nodes.empty()) {
5779 return kTfLiteOk;
5780 }
5781
5782 // NN API Delegate Registration (the pseudo kernel that will invoke NN
5783 // API node sub sets)
5784 static const TfLiteRegistration nnapi_delegate_kernel = {
5785 .init = [](TfLiteContext* context, const char* buffer,
5786 size_t length) -> void* {
5787 const TfLiteDelegateParams* params =
5788 reinterpret_cast<const TfLiteDelegateParams*>(buffer);
5789
5790 auto* delegate_data = static_cast<Data*>(params->delegate->data_);
5791 int* nnapi_errno = &(delegate_data->nnapi_errno);
5792
5793 NNAPIDelegateKernel* kernel_state =
5794 delegate_data->MaybeGetCachedDelegateKernel(params);
5795 if (!kernel_state) {
5796 kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi);
5797 kernel_state->Init(context, params, nnapi_errno);
5798 }
5799
5800 return kernel_state;
5801 },
5802
5803 .free = [](TfLiteContext* context, void* buffer) -> void {
5804 delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
5805 },
5806
5807 .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5808 NNAPIDelegateKernel* state =
5809 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5810 int* nnapi_errno =
5811 &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5812 return state->Prepare(context, node, nnapi_errno);
5813 },
5814
5815 .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5816 NNAPIDelegateKernel* state =
5817 reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5818 int* nnapi_errno =
5819 &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5820 return state->Invoke(context, node, nnapi_errno);
5821 },
5822
5823 .profiling_string = nullptr,
5824 .builtin_code = kTfLiteBuiltinDelegate,
5825 .custom_name = "TfLiteNnapiDelegate",
5826 .version = 1,
5827 };
5828
5829 // Initialize caching, if applicable, from Options.
5830 const char* cache_dir = delegate_options.cache_dir;
5831 const char* model_token = delegate_options.model_token;
5832 delegates::SerializationParams params = {model_token, cache_dir};
5833 if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
5834 model_token) {
5835 delegate_data->cache.reset(new delegates::Serialization(params));
5836 }
5837
5838 delegates::Serialization* cache_ptr = delegate_data->cache.get();
5839
5840 if (cache_ptr) {
5841 // Reuse cached delegation decision if possible.
5842 std::string accelerator_id = NnApiBackendId(delegate_options);
5843 TfLiteIntArray* cached_nodes_to_delegate = nullptr;
5844 if (delegates::GetDelegatedNodes(context, cache_ptr, accelerator_id,
5845 &cached_nodes_to_delegate) == kTfLiteOk) {
5846 if (cached_nodes_to_delegate->size == 0) return kTfLiteOk;
5847 auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
5848 context, nnapi_delegate_kernel, cached_nodes_to_delegate, delegate);
5849 TfLiteIntArrayFree(cached_nodes_to_delegate);
5850 return status;
5851 }
5852 }
5853
5854 std::vector<int> nodes_to_delegate;
5855
5856 int num_partitions;
5857 TfLiteDelegateParams* params_array;
5858 if (is_accelerator_specified &&
5859 nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5860 // Filtering out nodes not supported by target accelerators.
5861 // Cannot query supported operation before NNAPI 1.2
5862 TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
5863 context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
5864 &num_partitions, ¶ms_array, nnapi_errno));
5865 } else {
5866 nodes_to_delegate = supported_nodes;
5867 auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
5868 TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5869 context, supported_nodes_int_array.get(), ¶ms_array,
5870 &num_partitions));
5871 }
5872
5873 TF_LITE_ENSURE_STATUS(
5874 LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
5875 std::vector<TfLiteDelegateParams>(
5876 params_array, params_array + num_partitions),
5877 &nodes_to_delegate));
5878
5879 auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
5880
5881 if (cache_ptr) {
5882 // Cache list of nodes to be delegated for later.
5883 std::string accelerator_id = NnApiBackendId(delegate_options);
5884 if (delegates::SaveDelegatedNodes(context, cache_ptr, accelerator_id,
5885 nodes_to_delegate_int_array.get()) !=
5886 kTfLiteOk) {
5887 // Not a critical error.
5888 TF_LITE_KERNEL_LOG(context, "Could not save delegated nodes");
5889 }
5890 }
5891
5892 if (nodes_to_delegate_int_array->size == 0) {
5893 return kTfLiteOk;
5894 } else {
5895 // Request TFLite to partition the graph and make kernels
5896 // for each independent node sub set a new nnapi_delegate_kernel.
5897 return context->ReplaceNodeSubsetsWithDelegateKernels(
5898 context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
5899 delegate);
5900 }
5901 }
5902
5903 // Returns a singleton NNAPI Delegate that can check for support of ops.
NnApiDelegate()5904 TfLiteDelegate* NnApiDelegate() {
5905 static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate();
5906 return delegate;
5907 }
5908
5909 } // namespace tflite
5910