• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
16 
17 #include <algorithm>
18 #include <cstdarg>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstdio>
22 #include <cstring>
23 #include <functional>
24 #include <initializer_list>
25 #include <iostream>
26 #include <iterator>
27 #include <limits>
28 #include <map>
29 #include <memory>
30 #include <string>
31 #include <tuple>
32 #include <utility>
33 #include <vector>
34 
35 #include "tensorflow/lite/c/c_api_types.h"
36 #include "tensorflow/lite/delegates/serialization.h"
37 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
38 #include "tensorflow/lite/nnapi/sl/public/NeuralNetworksSupportLibraryImpl.h"
39 
40 #ifdef __ANDROID__
41 #include <sys/system_properties.h>
42 #endif
43 
44 #if defined __ANDROID__ || defined __unix__
45 #define TFLITE_NNAPI_ALLOW_MMAP_SHARING
46 #include <sys/mman.h>
47 #include <unistd.h>
48 #endif
49 
50 #include "fp16.h"
51 #include "tensorflow/lite/allocation.h"
52 #include "tensorflow/lite/builtin_op_data.h"
53 #include "tensorflow/lite/builtin_ops.h"
54 #include "tensorflow/lite/c/builtin_op_data.h"
55 #include "tensorflow/lite/c/common.h"
56 #include "tensorflow/lite/context_util.h"
57 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h"
58 #include "tensorflow/lite/delegates/nnapi/quant_lstm_sup.h"
59 #include "tensorflow/lite/delegates/utils.h"
60 #include "tensorflow/lite/kernels/kernel_util.h"
61 #include "tensorflow/lite/minimal_logging.h"
62 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
63 #include "tensorflow/lite/nnapi/nnapi_util.h"
64 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
65 #include "tensorflow/lite/util.h"
66 #ifdef NNAPI_VERBOSE_VALIDATION
67 #include "tensorflow/lite/schema/schema_generated.h"
68 #endif
69 #include "utils/hash/farmhash.h"
70 
71 namespace tflite {
72 namespace {
73 
74 static const char kNnapiId[] = "nnapi_";
75 
76 // Returns a string ID unique to what accelerator is run by NNAPI, based on
77 // user params. Assumes that the default accelerator is same across runs.
78 // Used for caching nodes to be delegated for a model.
NnApiBackendId(const StatefulNnApiDelegate::Options & delegate_options)79 std::string NnApiBackendId(
80     const StatefulNnApiDelegate::Options& delegate_options) {
81   std::string delegate_id = kNnapiId;
82   if (delegate_options.accelerator_name) {
83     delegate_id += delegate_options.accelerator_name;
84   }
85   return delegate_id;
86 }
87 
88 // Returns the enum name corresponding to the given error code if the given
89 // value corresponds to an of the error codes in the enumeration above or
90 // an message with the unknown code.
91 // LINT.IfChange(NnApiErrorDescription)
NnApiErrorDescription(int error_code)92 std::string NnApiErrorDescription(int error_code) {
93   switch (error_code) {
94     case ANEURALNETWORKS_NO_ERROR:
95       return "ANEURALNETWORKS_NO_ERROR";
96     case ANEURALNETWORKS_OUT_OF_MEMORY:
97       return "ANEURALNETWORKS_OUT_OF_MEMORY";
98     case ANEURALNETWORKS_INCOMPLETE:
99       return "ANEURALNETWORKS_INCOMPLETE";
100     case ANEURALNETWORKS_UNEXPECTED_NULL:
101       return "ANEURALNETWORKS_UNEXPECTED_NULL";
102     case ANEURALNETWORKS_BAD_DATA:
103       return "ANEURALNETWORKS_BAD_DATA";
104     case ANEURALNETWORKS_OP_FAILED:
105       return "ANEURALNETWORKS_OP_FAILED";
106     case ANEURALNETWORKS_BAD_STATE:
107       return "ANEURALNETWORKS_BAD_STATE";
108     case ANEURALNETWORKS_UNMAPPABLE:
109       return "ANEURALNETWORKS_UNMAPPABLE";
110     case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
111       return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
112     case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
113       return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
114     case ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT:
115       return "ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT";
116     case ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT:
117       return "ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT";
118     case ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT:
119       return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_TRANSIENT";
120     case ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT:
121       return "ANEURALNETWORKS_RESOURCE_EXHAUSTED_PERSISTENT";
122     case ANEURALNETWORKS_DEAD_OBJECT:
123       return "ANEURALNETWORKS_DEAD_OBJECT";
124     default:
125       return "Unknown NNAPI error code: " + std::to_string(error_code);
126   }
127 }
128 // LINT.ThenChange()
129 
130 #define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno)  \
131   do {                                                                      \
132     const auto _code = (code);                                              \
133     const auto _call_desc = (call_desc);                                    \
134     if (_code != ANEURALNETWORKS_NO_ERROR) {                                \
135       const auto error_desc = NnApiErrorDescription(_code);                 \
136       TF_LITE_KERNEL_LOG(context,                                           \
137                          "NN API returned error %s at line %d while %s.\n", \
138                          error_desc.c_str(), __LINE__, _call_desc);         \
139       *p_errno = _code;                                                     \
140       return kTfLiteError;                                                  \
141     }                                                                       \
142   } while (0)
143 
144 #define RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(context, code, call_desc, \
145                                                    p_tensor, p_errno)        \
146   do {                                                                       \
147     const auto _code = (code);                                               \
148     const auto _call_desc = (call_desc);                                     \
149     if (_code != ANEURALNETWORKS_NO_ERROR) {                                 \
150       const auto error_desc = NnApiErrorDescription(_code);                  \
151       TF_LITE_KERNEL_LOG(context,                                            \
152                          "NN API returned error %s at line %d while %s "     \
153                          "for tensor '%s'.\n",                               \
154                          error_desc.c_str(), __LINE__, _call_desc,           \
155                          (p_tensor)->name ? (p_tensor)->name : "no-name");   \
156       *p_errno = _code;                                                      \
157       return kTfLiteError;                                                   \
158     }                                                                        \
159   } while (0)
160 
IsFloat(TfLiteType type)161 bool IsFloat(TfLiteType type) {
162   switch (type) {
163     case kTfLiteFloat32:
164       return true;
165     default:
166       return false;
167   }
168 }
169 
IsFloatOrUInt8(TfLiteType type)170 bool IsFloatOrUInt8(TfLiteType type) {
171   switch (type) {
172     case kTfLiteFloat32:
173     case kTfLiteUInt8:
174       return true;
175     default:
176       return false;
177   }
178 }
179 
IsQuantized(TfLiteType type)180 bool IsQuantized(TfLiteType type) {
181   switch (type) {
182     case kTfLiteUInt8:
183     case kTfLiteInt8:
184       return true;
185     default:
186       // kTfLiteInt16 isn't supported as quantized type yet.
187       return false;
188   }
189 }
190 
IsInt32(TfLiteType type)191 bool IsInt32(TfLiteType type) {
192   switch (type) {
193     case kTfLiteInt32:
194       return true;
195     default:
196       return false;
197   }
198 }
199 
IsFloatOrQuantized(TfLiteType type)200 bool IsFloatOrQuantized(TfLiteType type) {
201   switch (type) {
202     case kTfLiteFloat32:
203     case kTfLiteUInt8:
204     case kTfLiteInt8:
205       return true;
206     default:
207       return false;
208   }
209 }
210 
IsFloatOrInt32(TfLiteType type)211 bool IsFloatOrInt32(TfLiteType type) {
212   switch (type) {
213     case kTfLiteFloat32:
214     case kTfLiteInt32:
215       return true;
216     default:
217       return false;
218   }
219 }
220 
IsFloatQuantizedOrInt32(TfLiteType type)221 bool IsFloatQuantizedOrInt32(TfLiteType type) {
222   switch (type) {
223     case kTfLiteFloat32:
224     case kTfLiteUInt8:
225     case kTfLiteInt8:
226     case kTfLiteInt32:
227       return true;
228     default:
229       return false;
230   }
231 }
232 
IsScalarInputSupported(int builtin_code)233 bool IsScalarInputSupported(int builtin_code) {
234   switch (builtin_code) {
235     case kTfLiteBuiltinAdd:
236     case kTfLiteBuiltinMul:
237     case kTfLiteBuiltinSub:
238     case kTfLiteBuiltinDiv:
239     case kTfLiteBuiltinEqual:
240     case kTfLiteBuiltinNotEqual:
241     case kTfLiteBuiltinGreater:
242     case kTfLiteBuiltinGreaterEqual:
243     case kTfLiteBuiltinLess:
244     case kTfLiteBuiltinLessEqual:
245     case kTfLiteBuiltinPow:
246     case kTfLiteBuiltinMaximum:
247     case kTfLiteBuiltinMinimum:
248     case kTfLiteBuiltinPrelu:
249     case kTfLiteBuiltinLeakyRelu:
250       return true;
251     default:
252       return false;
253   }
254 }
255 
256 // Check if the operation requires explicit conversion from int8 to uint8
257 // values.
NeedInt8Conversion(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)258 bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
259                         const TfLiteNode* node) {
260   const int input_id = node->inputs->data[0];
261   const TfLiteType input_type = context->tensors[input_id].type;
262   switch (builtin_code) {
263     case kTfLiteBuiltinConv2d:
264     case kTfLiteBuiltinDepthwiseConv2d:
265     case kTfLiteBuiltinFullyConnected: {
266       if (input_type == kTfLiteInt8) {
267         const int weights_id = node->inputs->data[1];
268         const auto& weights_tensor = context->tensors[weights_id];
269         if ((weights_tensor.type == kTfLiteInt8 ||
270              weights_tensor.type == kTfLiteUInt8) &&
271             weights_tensor.quantization.type == kTfLiteAffineQuantization) {
272           return true;
273         }
274       }
275       return false;
276     }
277     case kTfLiteBuiltinTransposeConv: {
278       // Transpose convolution has a different order of inputs:
279       // 0: output_shape, 1: filter, 2: input, 3: bias.
280       const int input_id = 2;
281       const TfLiteType input_type = context->tensors[input_id].type;
282       if (input_type == kTfLiteInt8) {
283         return true;
284       }
285       return false;
286     }
287     case kTfLiteBuiltinSelect: {
288       const auto value_type = context->tensors[node->inputs->data[1]].type;
289       return value_type == kTfLiteInt8;
290     }
291     case kTfLiteBuiltinAdd:
292     case kTfLiteBuiltinArgMax:
293     case kTfLiteBuiltinArgMin:
294     case kTfLiteBuiltinAveragePool2d:
295     case kTfLiteBuiltinBatchToSpaceNd:
296     case kTfLiteBuiltinConcatenation:
297     case kTfLiteBuiltinEqual:
298     case kTfLiteBuiltinExpandDims:
299     case kTfLiteBuiltinGather:
300     case kTfLiteBuiltinGreater:
301     case kTfLiteBuiltinGreaterEqual:
302     case kTfLiteBuiltinHardSwish:
303     case kTfLiteBuiltinL2Normalization:
304     case kTfLiteBuiltinLeakyRelu:
305     case kTfLiteBuiltinLess:
306     case kTfLiteBuiltinLessEqual:
307     case kTfLiteBuiltinLogistic:
308     case kTfLiteBuiltinMaximum:
309     case kTfLiteBuiltinMaxPool2d:
310     case kTfLiteBuiltinMean:
311     case kTfLiteBuiltinMinimum:
312     case kTfLiteBuiltinMul:
313     case kTfLiteBuiltinNotEqual:
314     case kTfLiteBuiltinPad:
315     case kTfLiteBuiltinPadv2:
316     case kTfLiteBuiltinPrelu:
317     case kTfLiteBuiltinReduceMax:
318     case kTfLiteBuiltinReduceMin:
319     case kTfLiteBuiltinRelu:
320     case kTfLiteBuiltinReluN1To1:
321     case kTfLiteBuiltinRelu6:
322     case kTfLiteBuiltinResizeBilinear:
323     case kTfLiteBuiltinResizeNearestNeighbor:
324     case kTfLiteBuiltinReshape:
325     case kTfLiteBuiltinSlice:
326     case kTfLiteBuiltinSoftmax:
327     case kTfLiteBuiltinSpaceToBatchNd:
328     case kTfLiteBuiltinSpaceToDepth:
329     case kTfLiteBuiltinDepthToSpace:
330     case kTfLiteBuiltinStridedSlice:
331     case kTfLiteBuiltinSub:
332     case kTfLiteBuiltinTanh:
333     case kTfLiteBuiltinTile:
334     case kTfLiteBuiltinTopkV2:
335     case kTfLiteBuiltinTranspose: {
336       return input_type == kTfLiteInt8;
337     }
338     default:
339       return false;
340   }
341 }
342 
343 constexpr int kLstmFullKernelInputSize = 24;
344 // The 20 input version is deprecated and kept only to
345 // support old model. The latest version of the LSTM Full Kernel
346 // is the one with 24 inputs
347 constexpr int kLstmFullKernelNoOptionalParamsInputSize = 20;
348 constexpr int kLstmBasicKernelInputSize = 5;
349 
isLstmBasicKernel(const TfLiteNode * node)350 inline bool isLstmBasicKernel(const TfLiteNode* node) {
351   return node->inputs->size == kLstmBasicKernelInputSize;
352 }
353 
isLstmFullKernel(const TfLiteNode * node)354 inline bool isLstmFullKernel(const TfLiteNode* node) {
355   return node->inputs->size == kLstmFullKernelInputSize ||
356          node->inputs->size == kLstmFullKernelNoOptionalParamsInputSize;
357 }
358 
IsHybridOperator(const TfLiteContext * context,int builtin_code,const TfLiteNode * node)359 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
360                       const TfLiteNode* node) {
361   switch (builtin_code) {
362     case kTfLiteBuiltinConv2d:
363     case kTfLiteBuiltinFullyConnected: {
364       const int input_id = node->inputs->data[0];
365       const int filter_id = node->inputs->data[1];
366       const TfLiteType input_type = context->tensors[input_id].type;
367       const TfLiteType filter_type = context->tensors[filter_id].type;
368       return IsFloat(input_type) && IsQuantized(filter_type);
369     }
370     case kTfLiteBuiltinLstm: {
371       const int input_id = node->inputs->data[0];
372       // Input #1 is optional so use #2 to determine if hybrid.
373       const int weights_id = node->inputs->data[2];
374       const TfLiteType input_type = context->tensors[input_id].type;
375       const TfLiteType weights_type = context->tensors[weights_id].type;
376       return isLstmFullKernel(node) && IsFloat(input_type) &&
377              IsQuantized(weights_type);
378     }
379     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
380       const int input_id = node->inputs->data[0];
381       // Input #1 is optional so use #2 to determine if hybrid.
382       const int weights_id = node->inputs->data[2];
383       const TfLiteType input_type = context->tensors[input_id].type;
384       const TfLiteType weights_type = context->tensors[weights_id].type;
385       return IsFloat(input_type) && IsQuantized(weights_type);
386     }
387     case kTfLiteBuiltinBidirectionalSequenceLstm: {
388       const int input_id = node->inputs->data[0];
389       // Input #1 is optional so use #2 to determine if hybrid.
390       const int weights_id = node->inputs->data[2];
391       const TfLiteType input_type = context->tensors[input_id].type;
392       const TfLiteType weights_type = context->tensors[weights_id].type;
393       return IsFloat(input_type) && IsQuantized(weights_type);
394     }
395     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
396       const int input_id = node->inputs->data[0];
397       const int weights_id = node->inputs->data[1];
398       const TfLiteType input_type = context->tensors[input_id].type;
399       const TfLiteType weights_type = context->tensors[weights_id].type;
400       return IsFloat(input_type) && IsQuantized(weights_type);
401     }
402     default:
403       return false;
404   }
405 }
406 
IsDequantizeConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)407 bool IsDequantizeConstFloat16(TfLiteContext* context, const TfLiteNode* node,
408                               const TfLiteRegistration* registration) {
409   return registration->builtin_code == kTfLiteBuiltinDequantize &&
410          context->tensors[node->inputs->data[0]].type ==
411              TfLiteType::kTfLiteFloat16 &&
412          IsConstantTensor(&context->tensors[node->inputs->data[0]]);
413 }
414 
IsDequantizeNonConstFloat16(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)415 bool IsDequantizeNonConstFloat16(TfLiteContext* context, const TfLiteNode* node,
416                                  const TfLiteRegistration* registration) {
417   return registration->builtin_code == kTfLiteBuiltinDequantize &&
418          context->tensors[node->inputs->data[0]].type ==
419              TfLiteType::kTfLiteFloat16 &&
420          !IsConstantTensor(&context->tensors[node->inputs->data[0]]);
421 }
422 
IsDensifyConstTensor(TfLiteContext * context,const TfLiteNode * node,const TfLiteRegistration * registration)423 bool IsDensifyConstTensor(TfLiteContext* context, const TfLiteNode* node,
424                           const TfLiteRegistration* registration) {
425   return registration->builtin_code == kTfLiteBuiltinDensify &&
426          IsConstantTensor(&context->tensors[node->inputs->data[0]]);
427 }
428 
HasUnspecifiedDimension(const TfLiteTensor * tensor)429 bool HasUnspecifiedDimension(const TfLiteTensor* tensor) {
430   if (tensor->dims_signature) {
431     for (int i : TfLiteIntArrayView(tensor->dims_signature)) {
432       if (i == -1) return true;
433     }
434   }
435   return false;
436 }
437 
ConvertTensorTypeToNNType(const TfLiteTensor * tensor,TfLiteType ann_type_equivalent)438 ANeuralNetworksOperandType ConvertTensorTypeToNNType(
439     const TfLiteTensor* tensor, TfLiteType ann_type_equivalent) {
440   int32_t nn_type = 0;
441   float scale = 0.0f;
442   int32_t zero_point = 0;
443   switch (tensor->type) {
444     case kTfLiteFloat32:
445       nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
446       break;
447     case kTfLiteUInt8:
448       nn_type = ann_type_equivalent == kTfLiteInt32
449                     ? ANEURALNETWORKS_TENSOR_INT32
450                     : ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
451       scale = tensor->params.scale;
452       zero_point = tensor->params.zero_point;
453       if (scale == 0) {
454         // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
455         // with zero scale are not valid in NNAPI.
456         scale = 1;
457       }
458       break;
459     case kTfLiteInt8:
460       nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
461       scale = tensor->params.scale;
462       zero_point = tensor->params.zero_point;
463       if (ann_type_equivalent == kTfLiteUInt8) {
464         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
465         zero_point += 128;
466       } else if (ann_type_equivalent == kTfLiteInt32) {
467         nn_type = ANEURALNETWORKS_TENSOR_INT32;
468         zero_point += 128;
469       }
470       if (scale == 0) {
471         // TENSOR_QUANT8_ASYMM and ANEURALNETWORKS_TENSOR_QUANT8_ASYMM
472         // with zero scale are not valid in NNAPI.
473         scale = 1;
474       }
475       break;
476     case kTfLiteInt32:
477       nn_type = ANEURALNETWORKS_TENSOR_INT32;
478       scale = tensor->params.scale;
479       zero_point = tensor->params.zero_point;
480       break;
481     case kTfLiteBool:
482       nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
483       break;
484     case kTfLiteInt16:
485       nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
486       scale = tensor->params.scale;
487       zero_point = tensor->params.zero_point;
488       break;
489     default:
490       break;
491   }
492   uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
493   uint32_t* tensor_dims = reinterpret_cast<uint32_t*>(tensor->dims->data);
494   static uint32_t scalar_rank = 1;
495   // treat scalar input as single cell tensor in NNAPI.
496   if (tensor_rank == 0) {
497     tensor_rank = scalar_rank;
498     tensor_dims = &scalar_rank;
499   }
500   ANeuralNetworksOperandType nn_operand_type{
501       .type = nn_type,
502       .dimensionCount = tensor_rank,
503       .dimensions = tensor_dims,
504       .scale = scale,
505       .zeroPoint = zero_point,
506   };
507   return nn_operand_type;
508 }
509 
510 // NNAPI in API 31 hard-code the preferred alignment/padding with 64 bytes.
511 constexpr size_t kDefaultByteAlignmentForNNAPI = 64;
512 
GetNumPaddingBytes(size_t byte_size)513 static size_t GetNumPaddingBytes(size_t byte_size) {
514   size_t num_padding_bytes = 0;
515   if (byte_size % kDefaultByteAlignmentForNNAPI) {
516     num_padding_bytes = kDefaultByteAlignmentForNNAPI -
517                         (byte_size % kDefaultByteAlignmentForNNAPI);
518   }
519   return num_padding_bytes;
520 }
521 
GetNNTensorSize(size_t tensor_size,bool allow_padding)522 static size_t GetNNTensorSize(size_t tensor_size, bool allow_padding) {
523   size_t padding_bytes = GetNumPaddingBytes(tensor_size);
524   size_t nn_tensor_size = tensor_size;
525   if (allow_padding) {
526     nn_tensor_size += padding_bytes;
527   }
528   return nn_tensor_size;
529 }
530 
531 // Return NNAPI device handle with the provided null-terminated device name.
532 // Returns kTfLiteError in case of any NNAPI error and if no device with the
533 // given name can be found.
GetDeviceHandle(const NnApi * nnapi,TfLiteContext * context,const char * device_name_ptr,ANeuralNetworksDevice ** result,int * nnapi_errno)534 TfLiteStatus GetDeviceHandle(const NnApi* nnapi, TfLiteContext* context,
535                              const char* device_name_ptr,
536                              ANeuralNetworksDevice** result, int* nnapi_errno) {
537   if (!device_name_ptr) return kTfLiteError;
538   *result = nullptr;
539   std::string device_name(device_name_ptr);
540   uint32_t num_devices = 0;
541   nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
542 
543   for (uint32_t i = 0; i < num_devices; i++) {
544     ANeuralNetworksDevice* device = nullptr;
545     const char* buffer = nullptr;
546     RETURN_TFLITE_ERROR_IF_NN_ERROR(
547         context, nnapi->ANeuralNetworks_getDevice(i, &device),
548         "Searching for target device", nnapi_errno);
549 
550     RETURN_TFLITE_ERROR_IF_NN_ERROR(
551         context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
552         "Searching for target device", nnapi_errno);
553 
554     if (device_name == buffer) {
555       *result = device;
556       return kTfLiteOk;
557     }
558   }
559 
560   context->ReportError(context,
561                        "Could not find the specified NNAPI accelerator: %s. "
562                        "Must be one of: {%s}.",
563                        device_name_ptr,
564                        nnapi::GetStringDeviceNamesList(nnapi).c_str());
565   return kTfLiteError;
566 }
567 
568 // Compute the hash of a TfLiteIntArray.
GetHash(const TfLiteIntArray * int_array,uint64_t combine_with=0)569 uint64_t GetHash(const TfLiteIntArray* int_array, uint64_t combine_with = 0) {
570   constexpr auto kHashConst = 0x9e3779b97f4a7800ULL;
571   uint64_t result = combine_with;
572   for (auto i : TfLiteIntArrayView(int_array)) {
573     result = result ^ (i + kHashConst + (result << 10) + (result >> 4));
574   }
575   return result;
576 }
577 
HasZeroes(TfLiteIntArrayView array)578 bool HasZeroes(TfLiteIntArrayView array) {
579   for (auto value : array) {
580     if (value == 0) {
581       return true;
582     }
583   }
584   return false;
585 }
586 
587 // Bit mask for tensor flags.
588 enum {
589   NN_TENSOR_FLAG_SCALAR_AS_TENSOR = 1U << 0,
590   NN_TENSOR_FLAG_INT8_CONVERSION = 1U << 1,
591   NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED = 1U << 2,
592   NN_TENSOR_FLAG_FORCE_PER_CHANNEL = 1U << 3,
593   NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION = 1U << 4,
594 };
595 
596 // Returns the feature level to target when delegating to the given devices.
597 // The feature level is the max of the ones supported by the devices or
598 // the current NNAPI runtime feature level if no device is present.
GetTargetFeatureLevel(TfLiteContext * context,const NnApi * nnapi,const std::vector<ANeuralNetworksDevice * > & device_handles,int * target_feature_level,int * nnapi_errno)599 TfLiteStatus GetTargetFeatureLevel(
600     TfLiteContext* context, const NnApi* nnapi,
601     const std::vector<ANeuralNetworksDevice*>& device_handles,
602     int* target_feature_level, int* nnapi_errno) {
603   *target_feature_level = nnapi->nnapi_runtime_feature_level;
604   int64_t devices_feature_level = -1;
605   for (const auto* device_handle : device_handles) {
606     int64_t curr_device_feature_level;
607     RETURN_TFLITE_ERROR_IF_NN_ERROR(
608         context,
609         nnapi->ANeuralNetworksDevice_getFeatureLevel(
610             device_handle, &curr_device_feature_level),
611         "Searching for target device", nnapi_errno);
612 
613     devices_feature_level =
614         std::max(curr_device_feature_level, devices_feature_level);
615   }
616 
617   if ((devices_feature_level > 0) &&
618       // This second check is necessary since if the nnapi-reference device is
619       // in the list of target devices the devices_feature_level value will be
620       // 1000.
621       (devices_feature_level < nnapi->nnapi_runtime_feature_level)) {
622     TFLITE_LOG(TFLITE_LOG_INFO,
623                "Changing NNAPI Feature Level %lld to "
624                "supported by target devices: %lld",
625                nnapi->android_sdk_version, devices_feature_level);
626 
627     *target_feature_level = devices_feature_level;
628   }
629 
630   return kTfLiteOk;
631 }
632 
633 // Returns true if this delegate is configured to use a specific set of devices.
634 // This will happen either if:
635 // - accelerator_name option has been specified
636 // - NNAPI CPU implementation has been explicitly disabled.
637 // If exclude_nnapi_reference is true this method will return false if the
638 // accelerator_name in the delegate options is equal to "nnapi-reference"
ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,const NnApi * nnapi,bool exclude_nnapi_reference=false)639 bool ShouldUseTargetDevices(StatefulNnApiDelegate::Options delegate_options,
640                             const NnApi* nnapi,
641                             bool exclude_nnapi_reference = false) {
642   const char* device_name_ptr = delegate_options.accelerator_name;
643   std::string nnapi_cpu("nnapi-reference");
644   bool has_selected_accelerator = device_name_ptr != nullptr;
645   if (exclude_nnapi_reference && has_selected_accelerator) {
646     if (nnapi_cpu == device_name_ptr) return false;
647   }
648   return (delegate_options.disallow_nnapi_cpu &&
649           nnapi->android_sdk_version >=
650               delegate::nnapi::kMinSdkVersionForNNAPI12) ||
651          has_selected_accelerator;
652 }
653 
654 // Fills the given result vector with the list of devices the given delegate
655 // is referring to.
656 // There are three possible results:
657 // - an empty array (not the full list of available accelerators,
658 //   for efficiency reasons) if no accelerator is chosen and the
659 //   disallow_nnapi_cpu delegate option is false.
660 // - A single element array with the target processor, if an accelerator name
661 //   is specified in the delegate options.
662 // - The full list of devices available on device less the nnapi reference
663 //   implementation if the delegate option disallow_nnapi_cpu has been
664 //   specified.
GetTargetDevices(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,int * nnapi_errno,std::vector<ANeuralNetworksDevice * > * result)665 TfLiteStatus GetTargetDevices(TfLiteContext* context, TfLiteDelegate* delegate,
666                               const NnApi* nnapi, int* nnapi_errno,
667                               std::vector<ANeuralNetworksDevice*>* result) {
668   if (nnapi->android_sdk_version < delegate::nnapi::kMinSdkVersionForNNAPI12) {
669     return kTfLiteError;
670   }
671 
672   const auto delegate_options = StatefulNnApiDelegate::GetOptions(delegate);
673   const char* device_name_ptr = delegate_options.accelerator_name;
674 
675   if (device_name_ptr != nullptr) {
676     // User specified an accelerator to use.
677     ANeuralNetworksDevice* nnapi_device = nullptr;
678     TF_LITE_ENSURE_STATUS(GetDeviceHandle(nnapi, context, device_name_ptr,
679                                           &nnapi_device, nnapi_errno));
680     result->push_back(nnapi_device);
681   } else if (delegate_options.disallow_nnapi_cpu) {
682     std::string nnapi_cpu("nnapi-reference");
683     uint32_t num_devices = 0;
684     nnapi->ANeuralNetworks_getDeviceCount(&num_devices);
685 
686     for (uint32_t i = 0; i < num_devices; i++) {
687       ANeuralNetworksDevice* device = nullptr;
688       const char* buffer = nullptr;
689       RETURN_TFLITE_ERROR_IF_NN_ERROR(
690           context, nnapi->ANeuralNetworks_getDevice(i, &device),
691           "Getting list of available devices", nnapi_errno);
692       RETURN_TFLITE_ERROR_IF_NN_ERROR(
693           context, nnapi->ANeuralNetworksDevice_getName(device, &buffer),
694           "Getting list of available devices", nnapi_errno);
695       if (nnapi_cpu != buffer) {
696         result->push_back(device);
697       }
698     }
699   }
700 
701   return kTfLiteOk;
702 }
703 
704 }  // namespace
705 
706 namespace delegate {
707 namespace nnapi {
708 
709 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
NNMemory(const NnApi * nnapi,const char * name,size_t size)710 NNMemory::NNMemory(const NnApi* nnapi, const char* name, size_t size) {
711   if (name && size > 0) {
712     nnapi_ = nnapi;
713     byte_size_ = size;
714 #ifdef __ANDROID__
715     fd_ = nnapi_->ASharedMemory_create(name, size);
716 #else
717     // For non-Android platforms ASharedMemory_create needs unique name to
718     // create a shared memory object (see nnapi_implementation.cc).
719     char shm_name_buffer[L_tmpnam];
720     if (tmpnam(shm_name_buffer) == nullptr) {
721       shm_name_buffer[0] = '\0';
722     }
723     // tmpnam will produce a string containing with slashes, but shm_open
724     // won't like that.
725     shm_region_name_ = std::string(name) + std::string(shm_name_buffer);
726     std::replace(shm_region_name_.begin(), shm_region_name_.end(), '/', '-');
727     fd_ = nnapi_->ASharedMemory_create(shm_region_name_.c_str(), size);
728 #endif
729 
730     data_ptr_ = reinterpret_cast<uint8_t*>(
731         mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
732     nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
733                                                fd_, 0, &nn_memory_handle_);
734   }
735 }
736 #else
737 NNMemory::NNMemory(const NnApi* /*nnapi*/, const char* /*name*/,
738                    size_t /*size*/)
739     : nnapi_(nullptr) {}
740 #endif
741 
~NNMemory()742 NNMemory::~NNMemory() {
743 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
744   if (data_ptr_) {
745     munmap(data_ptr_, byte_size_);
746   }
747   if (nn_memory_handle_) {
748     nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
749   }
750 #ifdef __ANDROID__
751   if (fd_ >= 0) close(fd_);
752 #else
753   if (!shm_region_name_.empty()) shm_unlink(shm_region_name_.c_str());
754 #endif
755 #endif
756 }
757 
758 class DequantizeMapping {
759  public:
DequantizedAnnIndex(int ann_index,TfLiteType type) const760   int DequantizedAnnIndex(int ann_index, TfLiteType type) const {
761     for (const auto& element : mapping_) {
762       if (ann_index == std::get<0>(element) && type == std::get<1>(element)) {
763         return std::get<2>(element);
764       }
765     }
766     return -1;
767   }
768 
Add(int ann_index,TfLiteType type,int dequantized_ann_index)769   void Add(int ann_index, TfLiteType type, int dequantized_ann_index) {
770     // This assumes it is not already mapped.
771     mapping_.emplace_back(ann_index, type, dequantized_ann_index);
772   }
773 
774  private:
775   // Each tuple specifies the ANN (quantized) tensor index, the desired
776   // floating-point type and the matching ANN (dequantized) tensor index. This
777   // could use a map but instead std::vector is used to keep code size lower.
778   std::vector<std::tuple<int, TfLiteType, int>> mapping_;
779 };
780 
781 // Abstract builder for building an op in the NN API graph. This handles
782 // the disparity between TFLite and NN API operand types. NN API has singular
783 // operands for both tensors and parameters, and TFLite separates the two.
784 class NNAPIOpBuilder {
785  public:
NNAPIOpBuilder(const NnApi * nnapi,TfLiteContext * context,OperandMapping * tensor_mapping,DequantizeMapping * dequantize_mapping,std::map<const MMAPAllocation *,ANeuralNetworksMemory * > * allocation_mapping,std::vector<int> * nnapi_to_tflite_op_mapping,ANeuralNetworksModel * nn_model,int * nnapi_errno,bool allow_dynamic_dimensions)786   NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
787                  OperandMapping* tensor_mapping,
788                  DequantizeMapping* dequantize_mapping,
789                  std::map<const MMAPAllocation*, ANeuralNetworksMemory*>*
790                      allocation_mapping,
791                  std::vector<int>* nnapi_to_tflite_op_mapping,
792                  ANeuralNetworksModel* nn_model, int* nnapi_errno,
793                  bool allow_dynamic_dimensions)
794       : nnapi_(nnapi),
795         context_(context),
796         operand_mapping_(tensor_mapping),
797         dequantize_mapping_(dequantize_mapping),
798         allocation_memory_mapping_(allocation_mapping),
799         nnapi_to_tflite_op_mapping_(nnapi_to_tflite_op_mapping),
800         nn_model_(nn_model),
801         nnapi_errno_(nnapi_errno),
802         allow_dynamic_dimensions_(allow_dynamic_dimensions) {}
803 
AddScalarBoolOperand(bool value)804   TfLiteStatus AddScalarBoolOperand(bool value) {
805     return AddScalarOperand<bool>(value, ANEURALNETWORKS_BOOL);
806   }
807 
AddScalarInt32Operand(int32_t value)808   TfLiteStatus AddScalarInt32Operand(int32_t value) {
809     return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
810   }
811 
AddScalarFloat32Operand(float value)812   TfLiteStatus AddScalarFloat32Operand(float value) {
813     return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
814   }
815 
AddVectorInt32Operand(const int32_t * values,uint32_t num_values)816   TfLiteStatus AddVectorInt32Operand(const int32_t* values,
817                                      uint32_t num_values) {
818     return AddVectorOperand<int32_t>(values, num_values,
819                                      ANEURALNETWORKS_TENSOR_INT32,
820                                      /*scale=*/0.f, /*zero_point=*/0);
821   }
822 
AddVectorInt32Operand(const int32_t * values,uint32_t num_values,float scale,int32_t zero_point)823   TfLiteStatus AddVectorInt32Operand(const int32_t* values, uint32_t num_values,
824                                      float scale, int32_t zero_point) {
825     return AddVectorOperand<int32_t>(
826         values, num_values, ANEURALNETWORKS_TENSOR_INT32, scale, zero_point);
827   }
828 
AddVectorInt16Operand(const int16_t * values,uint32_t num_values)829   TfLiteStatus AddVectorInt16Operand(const int16_t* values,
830                                      uint32_t num_values) {
831     return AddVectorOperand<int16_t>(values, num_values,
832                                      ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
833                                      /*scale=*/1.f, /*zero_point=*/0);
834   }
835 
AddVectorInt8Operand(const int8_t * values,uint32_t num_values)836   TfLiteStatus AddVectorInt8Operand(const int8_t* values, uint32_t num_values) {
837     return AddVectorOperand<int8_t>(values, num_values,
838                                     ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
839                                     /*scale=*/1.f, /*zero_point=*/0);
840   }
841 
AddVectorFloat32Operand(const float * values,uint32_t num_values)842   TfLiteStatus AddVectorFloat32Operand(const float* values,
843                                        uint32_t num_values) {
844     return AddVectorOperand<float>(values, num_values,
845                                    ANEURALNETWORKS_TENSOR_FLOAT32);
846   }
847 
AddPoolingParams(void * data)848   TfLiteStatus AddPoolingParams(void* data) {
849     auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
850     AddScalarInt32Operand(builtin->padding);
851     AddScalarInt32Operand(builtin->stride_width);
852     AddScalarInt32Operand(builtin->stride_height);
853     AddScalarInt32Operand(builtin->filter_width);
854     AddScalarInt32Operand(builtin->filter_height);
855     AddScalarInt32Operand(builtin->activation);
856     return kTfLiteOk;
857   }
858 
AddTensorInput(int tensor_index,bool hybrid_op,int tensor_flags=0)859   TfLiteStatus AddTensorInput(int tensor_index, bool hybrid_op,
860                               int tensor_flags = 0) {
861     return AddTensor(tensor_index, hybrid_op, &augmented_inputs_, tensor_flags);
862   }
863 
AddTensorOutput(int tensor_index,int tensor_flags=0)864   TfLiteStatus AddTensorOutput(int tensor_index, int tensor_flags = 0) {
865     return AddTensor(tensor_index, /*hybrid_op=*/false, &augmented_outputs_,
866                      tensor_flags);
867   }
868 
AddAdditionalFloat32OutputTensor(uint32_t dimension_count)869   TfLiteStatus AddAdditionalFloat32OutputTensor(uint32_t dimension_count) {
870     std::vector<uint32_t> dims(dimension_count, 0);
871     return AddFloat32OutputTensor(dimension_count, dims.data(), nullptr);
872   }
873 
AddStateFloat32Tensor(int tensor_index,int * ann_tensor_index_out)874   TfLiteStatus AddStateFloat32Tensor(int tensor_index,
875                                      int* ann_tensor_index_out) {
876     TfLiteTensor* tensor = &context_->tensors[tensor_index];
877     return AddFloat32OutputTensor(
878         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
879         ann_tensor_index_out);
880   }
881 
AddStateInt16Tensor(int tensor_index,int * ann_tensor_index_out)882   TfLiteStatus AddStateInt16Tensor(int tensor_index,
883                                    int* ann_tensor_index_out) {
884     TfLiteTensor* tensor = &context_->tensors[tensor_index];
885     return AddAdditionalOutputTensor(
886         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
887         ANEURALNETWORKS_TENSOR_QUANT16_SYMM, tensor->params.scale,
888         tensor->params.zero_point, ann_tensor_index_out);
889   }
890 
AddStateInt8AsymTensor(int tensor_index,int * ann_tensor_index_out)891   TfLiteStatus AddStateInt8AsymTensor(int tensor_index,
892                                       int* ann_tensor_index_out) {
893     TfLiteTensor* tensor = &context_->tensors[tensor_index];
894     return AddAdditionalOutputTensor(
895         tensor->dims->size, reinterpret_cast<uint32_t*>(tensor->dims->data),
896         ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, tensor->params.scale,
897         tensor->params.zero_point, ann_tensor_index_out);
898   }
899 
900   // Add a constant tensor with a single element, intended for broadcast capable
901   // ops.
AddSingleValueConstantTensor(float value,bool is_quantized)902   TfLiteStatus AddSingleValueConstantTensor(float value, bool is_quantized) {
903     if (!is_quantized) {
904       return AddVectorFloat32Operand(&value, 1);
905     } else {
906       // in the case that we need to add a quantized tensor, set the value to
907       // 64, zero_point to be 0 and adjust scale accordingly.
908       const uint8_t quant8_value = 64;
909       return AddVectorOperand<uint8_t>(&quant8_value, 1,
910                                        ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
911                                        value / quant8_value, 0);
912     }
913   }
914 
915   // Calculate the scale and zero_point for 8-bit unsigned tensor, given float
916   // min and max. zero_point is clamped to [0, 255].
CalculateQuantizationParams(float min,float max,float * scale,int * zero_point)917   TfLiteStatus CalculateQuantizationParams(float min, float max, float* scale,
918                                            int* zero_point) {
919     if (max < min) return kTfLiteError;
920     *scale = (max - min) / 255.f;
921     if (min > 0.f) {
922       *zero_point = 0;
923     } else if (max < 0.f) {
924       *zero_point = 255;
925     } else {
926       *zero_point = (0.f - min) / (*scale);
927     }
928     return kTfLiteOk;
929   }
930 
931   // Lower hardswish according to the following equation:
932   // hard_swish[x] = x (ReLU6(x + 3)) / 6 == x * (Relu_N1_to_1(x/3) * 3 + 3) / 6
933   // = 0.5x * Relu_N1_to_1(x/3) + 0.5x
TransformHardSwishIntoSupportedOps(int lite_input_index,int lite_output_index,bool need_int8_conversion,int lite_node_index)934   TfLiteStatus TransformHardSwishIntoSupportedOps(int lite_input_index,
935                                                   int lite_output_index,
936                                                   bool need_int8_conversion,
937                                                   int lite_node_index) {
938     const TfLiteTensor& tensor = context_->tensors[lite_input_index];
939     float input_scale = tensor.params.scale;
940     int input_zero_point = tensor.params.zero_point;
941     float input_min = 0.f;
942     float input_max = 0.f;
943     int tensor_flags = 0;
944     if (need_int8_conversion) {
945       tensor_flags = tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION;
946       input_zero_point += 128;
947     }
948     bool is_quantized = false;
949     int nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
950     if (tensor.type == kTfLiteInt8 || tensor.type == kTfLiteUInt8) {
951       is_quantized = true;
952       nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
953       input_min = (0 - input_zero_point) * input_scale;
954       input_max = (255 - input_zero_point) * input_scale;
955     }
956 
957     // Stage1 : s1 = Relu1(x * 1/3)
958     float s1_output_min = 0.f;
959     float s1_output_max = 0.f;
960     int s1_out_ann_index = 0;
961     {
962       float s1_output_scale = 0.f;
963       int s1_output_zero_point = 0;
964       if (is_quantized) {
965         // clamp the output range to [-1, 1] if needed.
966         s1_output_min = input_min / 3.f < -1.f ? -1.f : input_min / 3.f;
967         s1_output_max = input_max / 3.f > 1.f ? 1.f : input_max / 3.f;
968         CalculateQuantizationParams(s1_output_min, s1_output_max,
969                                     &s1_output_scale, &s1_output_zero_point);
970       }
971       TF_LITE_ENSURE_OK(context_,
972                         AddTensorInput(lite_input_index, false, tensor_flags));
973       const float value3f = 1.f / 3.f;
974       TF_LITE_ENSURE_OK(context_,
975                         AddSingleValueConstantTensor(value3f, is_quantized));
976       TF_LITE_ENSURE_OK(context_,
977                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_RELU1));
978       TF_LITE_ENSURE_OK(
979           context_,
980           AddAdditionalOutputTensor(
981               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
982               nn_type, s1_output_scale, s1_output_zero_point,
983               &s1_out_ann_index));
984       TF_LITE_ENSURE_OK(
985           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
986     }
987 
988     // Stage2 : s2 = x / 2
989     float s2_output_min = input_min / 2.f;
990     float s2_output_max = input_max / 2.f;
991     int s2_out_ann_index = 0;
992     {
993       float s2_output_scale = input_scale / 2.0f;
994       int s2_output_zero_point = input_zero_point;
995       TF_LITE_ENSURE_OK(context_,
996                         AddTensorInput(lite_input_index, false, tensor_flags));
997       const float value2f = 0.5f;
998       TF_LITE_ENSURE_OK(context_,
999                         AddSingleValueConstantTensor(value2f, is_quantized));
1000       TF_LITE_ENSURE_OK(context_,
1001                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1002       TF_LITE_ENSURE_OK(
1003           context_,
1004           AddAdditionalOutputTensor(
1005               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1006               nn_type, s2_output_scale, s2_output_zero_point,
1007               &s2_out_ann_index));
1008       TF_LITE_ENSURE_OK(
1009           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1010     }
1011 
1012     // Stage 3 : s3 = s1 * s2
1013     int s3_out_ann_index = 0;
1014     {
1015       augmented_inputs_.push_back(s1_out_ann_index);
1016       augmented_inputs_.push_back(s2_out_ann_index);
1017       TF_LITE_ENSURE_OK(context_,
1018                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1019       float s3_output_scale = 0.f;
1020       int s3_output_zero_point = 0;
1021       if (is_quantized) {
1022         // the min for stage 3 is always 0.0f.
1023         float s3_output_min = 0.f;
1024         // the max for stage 3 is max(s1_min * s2_min, s1_max * s3_max).
1025         float s3_output_max =
1026             s1_output_max * s2_output_max > s1_output_min * s2_output_min
1027                 ? s1_output_max * s2_output_max
1028                 : s1_output_min * s2_output_min;
1029         CalculateQuantizationParams(s3_output_min, s3_output_max,
1030                                     &s3_output_scale, &s3_output_zero_point);
1031       }
1032       TF_LITE_ENSURE_OK(
1033           context_,
1034           AddAdditionalOutputTensor(
1035               tensor.dims->size, reinterpret_cast<uint32_t*>(tensor.dims->data),
1036               nn_type, s3_output_scale, s3_output_zero_point,
1037               &s3_out_ann_index));
1038       TF_LITE_ENSURE_OK(
1039           context_, FinalizeAddOperation(ANEURALNETWORKS_MUL, lite_node_index));
1040     }
1041 
1042     // Stage 4: y = s3 + s2
1043     {
1044       augmented_inputs_.push_back(s2_out_ann_index);
1045       augmented_inputs_.push_back(s3_out_ann_index);
1046       TF_LITE_ENSURE_OK(context_,
1047                         AddScalarInt32Operand(ANEURALNETWORKS_FUSED_NONE));
1048       TF_LITE_ENSURE_OK(context_,
1049                         AddTensorOutput(lite_output_index, tensor_flags));
1050       TF_LITE_ENSURE_OK(
1051           context_, FinalizeAddOperation(ANEURALNETWORKS_ADD, lite_node_index));
1052     }
1053 
1054     return kTfLiteOk;
1055   }
1056 
1057   // Adds the operation to the model and maps the operation to the originating
1058   // TFLite one.
AddOperationToModel(ANeuralNetworksOperationType type,uint32_t input_count,const uint32_t * inputs,uint32_t output_count,const uint32_t * outputs,int lite_node_index)1059   TfLiteStatus AddOperationToModel(ANeuralNetworksOperationType type,
1060                                    uint32_t input_count, const uint32_t* inputs,
1061                                    uint32_t output_count,
1062                                    const uint32_t* outputs,
1063                                    int lite_node_index) {
1064     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1065         context_,
1066         nnapi_->ANeuralNetworksModel_addOperation(
1067             nn_model_, type, input_count, inputs, output_count, outputs),
1068         "adding operation", nnapi_errno_);
1069     nnapi_to_tflite_op_mapping_->push_back(lite_node_index);
1070     return kTfLiteOk;
1071   }
1072 
1073   // Adds a Dequantize operator and replaces the input tensor index with the
1074   // dequantized version. If the dequantized version of the operator already
1075   // exists then it is not added again.
AddDequantize(int nn_input_index,int lite_tensor_index,TfLiteType dequantized_type,int lite_node_index)1076   TfLiteStatus AddDequantize(int nn_input_index, int lite_tensor_index,
1077                              TfLiteType dequantized_type, int lite_node_index) {
1078     const int ann_index =
1079         operand_mapping_->lite_index_to_ann(lite_tensor_index);
1080     int dequantized_ann_index =
1081         dequantize_mapping_->DequantizedAnnIndex(ann_index, dequantized_type);
1082 
1083     if (dequantized_ann_index == -1) {
1084       // The dequantized version does not exist yet, it has to be added: a new
1085       // Dequantize operation is added, yielding a new tensor.
1086       const TfLiteTensor& tensor = context_->tensors[lite_tensor_index];
1087       ANeuralNetworksOperandType operand_type{
1088           ANEURALNETWORKS_TENSOR_FLOAT32,
1089           static_cast<uint32_t>(tensor.dims->size),
1090           reinterpret_cast<uint32_t*>(tensor.dims->data), 0.f, 0};
1091       RETURN_TFLITE_ERROR_IF_NN_ERROR(
1092           context_,
1093           nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1094           "adding operand", nnapi_errno_);
1095       dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
1096 
1097       // Add Dequantize operation.
1098       const uint32_t dequantize_input[1] = {static_cast<uint32_t>(ann_index)};
1099       const uint32_t dequantize_output[1] = {
1100           static_cast<uint32_t>(dequantized_ann_index)};
1101       TF_LITE_ENSURE_OK(
1102           context_, AddOperationToModel(ANEURALNETWORKS_DEQUANTIZE,
1103                                         /*input_count=*/1, dequantize_input,
1104                                         /*output_count=*/1, dequantize_output,
1105                                         lite_node_index));
1106       dequantize_mapping_->Add(ann_index, dequantized_type,
1107                                dequantized_ann_index);
1108     }
1109 
1110     // The input for the original operation is modified so that the operation
1111     // now uses the dequantized tensor as input.
1112     augmented_inputs_[nn_input_index] = dequantized_ann_index;
1113 
1114     return kTfLiteOk;
1115   }
1116 
1117   // Add a RESHAPE op which reshapes an NNAPI intermediate output to the
1118   // dimensions of the TFLite output tensor.
AppendReshape(int nn_input_index,int lite_out_tensor_index,int lite_node_index)1119   TfLiteStatus AppendReshape(int nn_input_index, int lite_out_tensor_index,
1120                              int lite_node_index) {
1121     augmented_inputs_.push_back(nn_input_index);
1122     auto& output_tensor = context_->tensors[lite_out_tensor_index];
1123     TF_LITE_ENSURE_STATUS(
1124         AddVectorInt32Operand(output_tensor.dims->data,
1125                               static_cast<uint32_t>(output_tensor.dims->size)));
1126     TF_LITE_ENSURE_OK(context_,
1127                       AddTensorOutput(lite_out_tensor_index,
1128                                       NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1129     TF_LITE_ENSURE_STATUS(
1130         FinalizeAddOperation(ANEURALNETWORKS_RESHAPE, lite_node_index));
1131     return kTfLiteOk;
1132   }
1133 
1134   // Lower PACK into CONCAT + RESHAPE when possible
TransformPackIntoSupportedOps(int lite_node_index,TfLiteNode * node,TfLiteRegistration * reg)1135   TfLiteStatus TransformPackIntoSupportedOps(int lite_node_index,
1136                                              TfLiteNode* node,
1137                                              TfLiteRegistration* reg) {
1138     // Add input tensors for CONCAT, and calculate the dimensions for the
1139     // output.
1140     int concat_output_ann_index = -1;
1141     TfLitePackParams* builtin =
1142         reinterpret_cast<TfLitePackParams*>(node->builtin_data);
1143     auto& input_tensor = context_->tensors[node->inputs->data[0]];
1144     int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
1145                                  : builtin->axis;
1146     TF_LITE_ENSURE(context_, axis < input_tensor.dims->size);
1147     uint32_t concat_dim_size = 0;
1148     for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
1149       const auto input_index = node->inputs->data[input_pos];
1150       concat_dim_size +=
1151           context_->tensors[node->inputs->data[input_pos]].dims->data[axis];
1152       TF_LITE_ENSURE_STATUS(
1153           AddTensorInput(input_index, /*hybrid_op=*/false,
1154                          NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
1155     }
1156     TF_LITE_ENSURE_STATUS(AddScalarInt32Operand(axis));
1157     std::vector<uint32_t> concat_output_shape(input_tensor.dims->size, 0);
1158     for (int i = 0; i < concat_output_shape.size(); i++) {
1159       if (i == axis) {
1160         concat_output_shape[i] = concat_dim_size;
1161       } else {
1162         concat_output_shape[i] = input_tensor.dims->data[i];
1163       }
1164     }
1165     TF_LITE_ENSURE_STATUS(AddIntermediateOutputTensor(
1166         input_tensor.type, concat_output_shape.size(),
1167         concat_output_shape.data(), input_tensor.params.scale,
1168         input_tensor.params.zero_point, &concat_output_ann_index));
1169     TF_LITE_ENSURE_STATUS(
1170         FinalizeAddOperation(ANEURALNETWORKS_CONCATENATION, lite_node_index));
1171 
1172     // Reshape the output tensor
1173     TF_LITE_ENSURE_STATUS(AppendReshape(
1174         concat_output_ann_index, node->outputs->data[0], lite_node_index));
1175     return kTfLiteOk;
1176   }
1177 
1178   // Finish emitting the op (of type `type`) into the NN API.
FinalizeAddOperation(ANeuralNetworksOperationType type,int lite_node_index)1179   TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type,
1180                                     int lite_node_index) {
1181     // Actually add a NN API operation
1182     TF_LITE_ENSURE_OK(context_,
1183                       AddOperationToModel(
1184                           type, static_cast<uint32_t>(augmented_inputs_.size()),
1185                           augmented_inputs_.data(),
1186                           static_cast<uint32_t>(augmented_outputs_.size()),
1187                           augmented_outputs_.data(), lite_node_index));
1188     augmented_inputs_.clear();
1189     augmented_outputs_.clear();
1190     return kTfLiteOk;
1191   }
1192 
AddSingleValueTensorAsScalarOperand(int tensor_index,int nn_type)1193   TfLiteStatus AddSingleValueTensorAsScalarOperand(int tensor_index,
1194                                                    int nn_type) {
1195     const TfLiteTensor* tensor = &context_->tensors[tensor_index];
1196     TF_LITE_ENSURE_EQ(context_, NumElements(tensor), 1);
1197 
1198     ANeuralNetworksOperandType operand_type{.type = nn_type};
1199     RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1200         context_,
1201         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1202         "adding operand", tensor, nnapi_errno_);
1203     int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1204     if (ann_tensor_index != -1) {
1205       augmented_inputs_.push_back(ann_tensor_index);
1206       return kTfLiteOk;
1207     }
1208     // Allocate a new tensor index
1209     ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1210     augmented_inputs_.push_back(ann_tensor_index);
1211 
1212     const TfLiteType tensor_type = tensor->type;
1213     TfLiteType nn_type_equivalent;
1214     TF_LITE_ENSURE_OK(context_, GetEquivalentToANNType(context_, nn_type,
1215                                                        &nn_type_equivalent));
1216     if (tensor_type != nn_type_equivalent) {
1217       operand_mapping_->add_type_conversion(tensor_index, nn_type_equivalent);
1218     }
1219     return kTfLiteOk;
1220   }
1221 
1222   template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,const TfLiteIntArray * dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1223   TfLiteStatus AddNewInputConstantTensor(
1224       int32_t nn_type, TfLiteType type, const TfLiteIntArray* dims,
1225       const std::vector<T>& tensor_value,
1226       const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1227     TF_LITE_ENSURE_OK(context_,
1228                       context_->AddTensors(context_, 1, tensor_index));
1229 
1230     TfLiteTensor* new_tensor = &context_->tensors[*tensor_index];
1231     new_tensor->type = type;
1232     new_tensor->allocation_type = kTfLiteDynamic;
1233     new_tensor->params = quant_params;
1234 
1235     // Not removing the new tensor in case of resizing errors since it will
1236     // be cleared by the context
1237     TF_LITE_ENSURE_OK(
1238         context_,
1239         context_->ResizeTensor(
1240             context_, new_tensor,
1241             // Resize Tensor takes ownership of the dims array passed as param
1242             TfLiteIntArrayCopy(dims)));
1243 
1244     memcpy(new_tensor->data.raw,
1245            reinterpret_cast<const char*>(tensor_value.data()),
1246            tensor_value.size() * sizeof(T));
1247 
1248     const uint32_t tensor_rank = static_cast<uint32_t>(dims->size);
1249     const uint32_t* tensor_dims = reinterpret_cast<const uint32_t*>(dims->data);
1250     ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1251                                             quant_params.scale,
1252                                             quant_params.zero_point};
1253 
1254     const int ann_tensor_index =
1255         operand_mapping_->add_delegate_generated_input_ann_tensors_operand();
1256 
1257     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1258         context_,
1259         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1260         "adding operand", nnapi_errno_);
1261 
1262     augmented_inputs_.push_back(ann_tensor_index);
1263 
1264     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1265         context_,
1266         nnapi_->ANeuralNetworksModel_setOperandValue(
1267             nn_model_, ann_tensor_index, new_tensor->data.raw,
1268             new_tensor->bytes),
1269         "setting new operand value", nnapi_errno_);
1270 
1271     return kTfLiteOk;
1272   }
1273 
1274   template <typename T>
AddNewInputConstantTensor(int32_t nn_type,TfLiteType type,std::initializer_list<int> dims,const std::vector<T> & tensor_value,const TfLiteQuantizationParams & quant_params,int * tensor_index)1275   TfLiteStatus AddNewInputConstantTensor(
1276       int32_t nn_type, TfLiteType type, std::initializer_list<int> dims,
1277       const std::vector<T>& tensor_value,
1278       const TfLiteQuantizationParams& quant_params, int* tensor_index) {
1279     TfLiteIntArray* dim_array = TfLiteIntArrayCreate(dims.size());
1280     dim_array->size = dims.size();
1281     std::copy(dims.begin(), dims.end(), dim_array->data);
1282 
1283     const auto result = AddNewInputConstantTensor(
1284         nn_type, type, dim_array, tensor_value, quant_params, tensor_index);
1285     TfLiteIntArrayFree(dim_array);
1286     return result;
1287   }
1288 
AddIntermediateOutputTensor(TfLiteType tfl_type,uint32_t dimension_count,const uint32_t * dimension_data,float scale,int32_t zero_point,int * ann_index_out)1289   TfLiteStatus AddIntermediateOutputTensor(TfLiteType tfl_type,
1290                                            uint32_t dimension_count,
1291                                            const uint32_t* dimension_data,
1292                                            float scale, int32_t zero_point,
1293                                            int* ann_index_out) {
1294     int32_t nn_type;
1295     switch (tfl_type) {
1296       case kTfLiteFloat32:
1297         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1298         break;
1299       case kTfLiteInt8:
1300         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1301         break;
1302       case kTfLiteUInt8:
1303         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1304         break;
1305       default:
1306         return kTfLiteError;
1307     }
1308     TF_LITE_ENSURE_STATUS(
1309         AddAdditionalOutputTensor(dimension_count, dimension_data, nn_type,
1310                                   scale, zero_point, ann_index_out));
1311     return kTfLiteOk;
1312   }
1313 
ClearInputOuputLists()1314   void ClearInputOuputLists() {
1315     augmented_inputs_.clear();
1316     augmented_outputs_.clear();
1317   }
1318 
1319  private:
1320   // Returns a TF Lite type which has the same memory representation as a
1321   // provided NN API type.
GetEquivalentToANNType(TfLiteContext * context,int nn_type,TfLiteType * type)1322   TfLiteStatus GetEquivalentToANNType(TfLiteContext* context, int nn_type,
1323                                       TfLiteType* type) {
1324     switch (nn_type) {
1325       case ANEURALNETWORKS_INT32:
1326         *type = kTfLiteInt32;
1327         return kTfLiteOk;
1328       case ANEURALNETWORKS_FLOAT32:
1329         *type = kTfLiteFloat32;
1330         return kTfLiteOk;
1331       default:
1332         context->ReportError(context,
1333                              "NN API Delegate: Can't get an equivalent TF Lite "
1334                              "type for provided NN API type: %d.\n",
1335                              nn_type);
1336         return kTfLiteError;
1337     }
1338   }
1339 
1340   template <typename T>
AddScalarOperand(T value,int32_t nn_type)1341   TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
1342     ANeuralNetworksOperandType operand_type{.type = nn_type};
1343     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1344         context_,
1345         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1346         "adding operand", nnapi_errno_);
1347     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1348     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1349         context_,
1350         nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
1351                                                      &value, sizeof(T)),
1352         "setting new operand value", nnapi_errno_);
1353     augmented_inputs_.push_back(ann_index);
1354     return kTfLiteOk;
1355   }
1356 
1357   template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type,float scale,int32_t zero_point)1358   TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1359                                 int32_t nn_type, float scale,
1360                                 int32_t zero_point) {
1361     ANeuralNetworksOperandType operand_type{.type = nn_type,
1362                                             .dimensionCount = 1,
1363                                             .dimensions = &num_values,
1364                                             .scale = scale,
1365                                             .zeroPoint = zero_point};
1366 
1367     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1368         context_,
1369         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1370         "adding operand", nnapi_errno_);
1371 
1372     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1373     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1374         context_,
1375         nnapi_->ANeuralNetworksModel_setOperandValue(
1376             nn_model_, ann_index, values, sizeof(T) * num_values),
1377         "settings new operand value", nnapi_errno_);
1378     augmented_inputs_.push_back(ann_index);
1379     return kTfLiteOk;
1380   }
1381 
1382   template <typename T>
AddVectorOperand(const T * values,uint32_t num_values,int32_t nn_type)1383   TfLiteStatus AddVectorOperand(const T* values, uint32_t num_values,
1384                                 int32_t nn_type) {
1385     return AddVectorOperand(values, num_values, nn_type, /*scale=*/0.f,
1386                             /*zero_point=*/0);
1387   }
1388 
AddFloat32OutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int * ann_index_out)1389   TfLiteStatus AddFloat32OutputTensor(uint32_t dimension_count,
1390                                       const uint32_t* dimension_data,
1391                                       int* ann_index_out) {
1392     return AddAdditionalOutputTensor(
1393         dimension_count, dimension_data, ANEURALNETWORKS_TENSOR_FLOAT32,
1394         /*scale=*/0.f, /*zero_point=*/0, ann_index_out);
1395   }
1396 
AddAdditionalOutputTensor(uint32_t dimension_count,const uint32_t * dimension_data,int32_t nn_type,float scale,int32_t zero_point,int * ann_index_out)1397   TfLiteStatus AddAdditionalOutputTensor(uint32_t dimension_count,
1398                                          const uint32_t* dimension_data,
1399                                          int32_t nn_type, float scale,
1400                                          int32_t zero_point,
1401                                          int* ann_index_out) {
1402     ANeuralNetworksOperandType operand_type{
1403         .type = nn_type,
1404         .dimensionCount = dimension_count,
1405         .dimensions = dimension_data,
1406         .scale = scale,
1407         .zeroPoint = zero_point,
1408     };
1409     RETURN_TFLITE_ERROR_IF_NN_ERROR(
1410         context_,
1411         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1412         "adding operand", nnapi_errno_);
1413     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
1414     augmented_outputs_.push_back(ann_index);
1415     if (ann_index_out) *ann_index_out = ann_index;
1416     return kTfLiteOk;
1417   }
1418 
1419   // Adds a new NN API tensor that shadows the TF Lite tensor `tensor_index`.
1420   // This returns the NN API tensor index corresponding to the created tensor.
1421   // If another caller previously created a NN API tensor for `tensor_index`
1422   // then the existing one is returned.
AddTensor(int tensor_index,bool hybrid_op,std::vector<uint32_t> * indices,int tensor_flags=0)1423   TfLiteStatus AddTensor(int tensor_index, bool hybrid_op,
1424                          std::vector<uint32_t>* indices, int tensor_flags = 0) {
1425     const bool scalar_as_tensor =
1426         tensor_flags & NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
1427     const bool need_int8_conversion =
1428         tensor_flags & NN_TENSOR_FLAG_INT8_CONVERSION;
1429     const bool use_int8_asymm_signed =
1430         tensor_flags & NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
1431     const bool force_per_channel =
1432         tensor_flags & NN_TENSOR_FLAG_FORCE_PER_CHANNEL;
1433     const bool need_half2float_conversion =
1434         tensor_flags & NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION;
1435 
1436     int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
1437     if (ann_tensor_index != -1) {
1438       indices->push_back(ann_tensor_index);
1439       return kTfLiteOk;
1440     }
1441     // Allocate a new tensor index
1442     ann_tensor_index = operand_mapping_->add_new_ann_tensor_index(tensor_index);
1443 
1444     // Parameters needed for new type.
1445     int32_t nn_type = 0;
1446     float scale = 0.0f;
1447     int32_t zeroPoint = 0;
1448     ANeuralNetworksSymmPerChannelQuantParams ann_perchannel_params;
1449     TfLiteTensor* tensor = &context_->tensors[tensor_index];
1450     TfLiteType tensor_type = tensor->type;
1451     if (hybrid_op && (tensor_type == kTfLiteUInt8)) {
1452       // For legacy reason, UINT8 weights in hybrid operators are actually INT8
1453       // values and should be interpreted as such.
1454       tensor_type = kTfLiteInt8;
1455     }
1456     switch (tensor_type) {
1457       case kTfLiteNoType:
1458         // Tensors added during initialization of Ops don't have a type yet and
1459         // should not be registered with the NNAPI.
1460         indices->push_back(-1);
1461         return kTfLiteOk;
1462       case kTfLiteFloat32:
1463         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1464         break;
1465       case kTfLiteFloat16:
1466         nn_type = ANEURALNETWORKS_TENSOR_FLOAT16;
1467         if (need_half2float_conversion) {
1468           nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
1469           operand_mapping_->add_type_conversion(tensor_index, kTfLiteFloat32);
1470         }
1471         break;
1472       case kTfLiteUInt8:
1473         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1474         scale = tensor->params.scale;
1475         zeroPoint = tensor->params.zero_point;
1476         if (scale == 0) {
1477           // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM with zero scale is not valid in
1478           // NNAPI.
1479           scale = 1;
1480         }
1481         break;
1482       case kTfLiteInt8:
1483         // If explicit int8 conversion is needed, we still need
1484         // ANEURALNETWORKS_TENSOR_QUANT8_ASYMM type.
1485         if (use_int8_asymm_signed) {
1486           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED;
1487         } else if (need_int8_conversion) {
1488           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
1489         } else {
1490           nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM;
1491         }
1492         scale = tensor->params.scale;
1493         zeroPoint = tensor->params.zero_point;
1494         if (tensor->quantization.type == kTfLiteAffineQuantization) {
1495           TfLiteAffineQuantization* quantization_params =
1496               static_cast<TfLiteAffineQuantization*>(
1497                   tensor->quantization.params);
1498           if (quantization_params->scale->size > 1 || force_per_channel) {
1499             // Set up per-channel quantization.
1500             ann_perchannel_params = {
1501                 .channelDim = static_cast<uint32_t>(
1502                     quantization_params->quantized_dimension),
1503                 .scaleCount =
1504                     static_cast<uint32_t>(quantization_params->scale->size),
1505                 .scales = quantization_params->scale->data,
1506             };
1507             nn_type = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL;
1508             scale = 0.0f;
1509             zeroPoint = 0;
1510           } else if (quantization_params->scale->size == 1) {
1511             scale = quantization_params->scale->data[0];
1512             zeroPoint = quantization_params->zero_point->data[0];
1513           }
1514         }
1515         if (nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1516           if (need_int8_conversion) {
1517             zeroPoint += 128;
1518             operand_mapping_->add_type_conversion(tensor_index, kTfLiteUInt8);
1519           }
1520           if (scale == 0) {
1521             // QUANT8 tensors with zero scale are not valid in NNAPI.
1522             scale = 1;
1523           }
1524         }
1525         break;
1526       case kTfLiteInt32:
1527         nn_type = ANEURALNETWORKS_TENSOR_INT32;
1528         scale = tensor->params.scale;
1529         zeroPoint = tensor->params.zero_point;
1530         break;
1531       case kTfLiteBool:
1532         nn_type = ANEURALNETWORKS_TENSOR_BOOL8;
1533         break;
1534       case kTfLiteInt16:
1535         nn_type = ANEURALNETWORKS_TENSOR_QUANT16_SYMM;
1536         scale = tensor->params.scale;
1537         zeroPoint = tensor->params.zero_point;
1538         break;
1539       default:
1540         context_->ReportError(
1541             context_, "Failed to add NN API tensor: type %s is not supported.",
1542             TfLiteTypeGetName(tensor_type));
1543         return kTfLiteError;
1544     }
1545     bool has_unspecified_dimensions = HasUnspecifiedDimension(tensor);
1546     uint32_t tensor_rank = static_cast<uint32_t>(tensor->dims->size);
1547     std::vector<uint32_t> dims_unspecified(tensor_rank, 0);
1548     if (has_unspecified_dimensions) {
1549       for (int i = 0; i < tensor->dims_signature->size; i++) {
1550         dims_unspecified[i] = tensor->dims_signature->data[i] == -1
1551                                   ? 0
1552                                   : tensor->dims_signature->data[i];
1553       }
1554     }
1555     uint32_t* tensor_dims =
1556         has_unspecified_dimensions && allow_dynamic_dimensions_
1557             ? dims_unspecified.data()
1558             : reinterpret_cast<uint32_t*>(tensor->dims->data);
1559     if (scalar_as_tensor && tensor_rank == 0) {
1560       // Use rank 1, shape {1} operand for TFLite scalar tensors.
1561       tensor_rank = 1;
1562       tensor_dims = &tensor_rank;
1563     }
1564     if (tensor_rank == 0) {
1565       // if the tensor_rank is 0, the dimension ptr must be nullptr.
1566       tensor_dims = nullptr;
1567     }
1568 
1569     ANeuralNetworksOperandType operand_type{nn_type, tensor_rank, tensor_dims,
1570                                             scale, zeroPoint};
1571     RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1572         context_,
1573         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
1574         "adding operand", tensor, nnapi_errno_);
1575 
1576     if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1577       RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1578           context_,
1579           nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
1580               nn_model_, ann_tensor_index, &ann_perchannel_params),
1581           "setting new operand per channel quantization params", tensor,
1582           nnapi_errno_);
1583     }
1584     if (tensor->allocation_type == kTfLiteMmapRo) {
1585       if (IsQuantized(tensor_type) && need_int8_conversion &&
1586           nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1587         // We need to add a tensor and convert the weights into uint8.
1588         // Currently this is only needed for fully_connected. The new_tensor is
1589         // needed for lifetime management for the converted weights.
1590         int new_tensor_index = -1;
1591         TF_LITE_ENSURE_OK(context_,
1592                           context_->AddTensors(context_, 1, &new_tensor_index));
1593         TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1594         new_tensor->type = kTfLiteUInt8;
1595         new_tensor->allocation_type = kTfLiteDynamic;
1596         new_tensor->params.scale = scale;
1597         new_tensor->params.zero_point = zeroPoint;
1598         // Not removing the new tensor in case of resizing errors since it will
1599         // be cleared by the context
1600         TF_LITE_ENSURE_OK(
1601             context_, context_->ResizeTensor(context_, new_tensor,
1602                                              // Resize Tensor takes ownership of
1603                                              // the dims array passed as param
1604                                              TfLiteIntArrayCopy(tensor->dims)));
1605         // Convert the int8 value into corresponding uint8 value;
1606         const auto num_elements = NumElements(tensor);
1607         for (int i = 0; i < num_elements; ++i) {
1608           new_tensor->data.uint8[i] = static_cast<const uint8_t>(
1609               static_cast<int32_t>(tensor->data.int8[i]) + 128);
1610         }
1611         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1612             context_,
1613             nnapi_->ANeuralNetworksModel_setOperandValue(
1614                 nn_model_, ann_tensor_index, new_tensor->data.raw,
1615                 new_tensor->bytes),
1616             "setting new operand value", tensor, nnapi_errno_);
1617       } else if (tensor_type == kTfLiteFloat16 && need_half2float_conversion) {
1618         // We need to convert the constant fp16 weights to fp32. The new_tensor
1619         // is needed for lifetime management for the converted weights.
1620         int new_tensor_index = -1;
1621         TF_LITE_ENSURE_OK(context_,
1622                           context_->AddTensors(context_, 1, &new_tensor_index));
1623         TfLiteTensor* new_tensor = &context_->tensors[new_tensor_index];
1624         new_tensor->type = kTfLiteFloat32;
1625         new_tensor->allocation_type = kTfLiteDynamic;
1626         // Not removing the new tensor in case of resizing errors since it will
1627         // be cleared by the context
1628         TF_LITE_ENSURE_OK(
1629             context_, context_->ResizeTensor(context_, new_tensor,
1630                                              // Resize Tensor takes ownership of
1631                                              // the dims array passed as param
1632                                              TfLiteIntArrayCopy(tensor->dims)));
1633         // Convert the fp16 value into corresponding fp32 value;
1634         const auto num_elements = NumElements(tensor);
1635         for (int i = 0; i < num_elements; ++i) {
1636           new_tensor->data.f[i] = fp16_ieee_to_fp32_value(
1637               reinterpret_cast<uint16_t*>(tensor->data.data)[i]);
1638         }
1639         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1640             context_,
1641             nnapi_->ANeuralNetworksModel_setOperandValue(
1642                 nn_model_, ann_tensor_index, new_tensor->data.data,
1643                 new_tensor->bytes),
1644             "setting new operand value", tensor, nnapi_errno_);
1645 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
1646       } else if (tensor->allocation &&
1647                  static_cast<const Allocation*>(tensor->allocation)->type() ==
1648                      Allocation::Type::kMMap) {
1649         const MMAPAllocation* mmap_alloc =
1650             static_cast<const MMAPAllocation*>(tensor->allocation);
1651         if (allocation_memory_mapping_->count(mmap_alloc) == 0) {
1652           ANeuralNetworksMemory* ann_memory_handle = nullptr;
1653           nnapi_->ANeuralNetworksMemory_createFromFd(
1654               mmap_alloc->bytes(), PROT_READ, mmap_alloc->fd(), 0,
1655               &ann_memory_handle);
1656           allocation_memory_mapping_->insert(
1657               std::make_pair(mmap_alloc, ann_memory_handle));
1658         }
1659         ANeuralNetworksMemory* ann_memory_handle =
1660             allocation_memory_mapping_->at(mmap_alloc);
1661         // Compute the offset to the base pointer of the MMAPAllocation.
1662         auto offset = reinterpret_cast<const uint8_t*>(tensor->data.raw) -
1663                       reinterpret_cast<const uint8_t*>(mmap_alloc->base());
1664         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1665             context_,
1666             nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
1667                 nn_model_, ann_tensor_index, ann_memory_handle, offset,
1668                 tensor->bytes),
1669             "setting new operand value from memory", tensor, nnapi_errno_);
1670 #endif
1671       } else {
1672         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
1673             context_,
1674             nnapi_->ANeuralNetworksModel_setOperandValue(
1675                 nn_model_, ann_tensor_index, tensor->data.data, tensor->bytes),
1676             "setting new operand value", tensor, nnapi_errno_);
1677       }
1678     }
1679     indices->push_back(ann_tensor_index);
1680     return kTfLiteOk;
1681   }
1682 
1683   // Access to NNAPI.
1684   const NnApi* const nnapi_;
1685 
1686   // TfLiteContext for error handling.
1687   TfLiteContext* const context_;
1688 
1689   // Tracks relationship between indices.
1690   OperandMapping* const operand_mapping_;
1691 
1692   // Keeps mapping of ANN quantized tensor and float data type to equivalent
1693   // dequantized ANN tensor. For example, tensor #4 (UINT8) + FLOAT32 could map
1694   // to tensor #10 (FLOAT32) because a DEQUANTIZE operator was added to convert
1695   // tensor #4 to a FLOAT32 tensor.
1696   DequantizeMapping* const dequantize_mapping_;
1697 
1698   std::map<const MMAPAllocation*, ANeuralNetworksMemory*>* const
1699       allocation_memory_mapping_;
1700 
1701   // Tracks for every operation in the NNAPI model the source TfLite model
1702   // node index.
1703   std::vector<int>* const nnapi_to_tflite_op_mapping_;
1704 
1705   // The NNAPI model.
1706   ANeuralNetworksModel* const nn_model_;
1707 
1708   // Inputs and outputs for the current op. These are augmented in the sense
1709   // that NN API uses operands for all arguments, not just tensors, unlike
1710   // TensorFlow Lite.
1711   std::vector<uint32_t> augmented_inputs_;
1712   std::vector<uint32_t> augmented_outputs_;
1713 
1714   // Return status code of the latest NNAPI call.
1715   int* nnapi_errno_;
1716 
1717   // Whether to allow dynamic batch size without re-compilation.
1718   bool allow_dynamic_dimensions_;
1719 };  // namespace nnapi
1720 
1721 namespace {
1722 struct OpValidationContext {
1723   bool is_valid;
1724   std::vector<NNAPIValidationFailure>* validation_failures;
1725 };
1726 
1727 #define EXPECT_INPUT_TYPE_IN(actual_type, ...)                    \
1728   ExpectTypeIn(actual_type, {__VA_ARGS__},                        \
1729                NNAPIValidationFailureType::kUnsupportedInputType, \
1730                "Input type not in expected list " #__VA_ARGS__, &val_ctx)
1731 
AddValidationFailure(NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1732 inline void AddValidationFailure(NNAPIValidationFailureType failure_type,
1733                                  const char* message,
1734                                  OpValidationContext* val_ctx) {
1735   val_ctx->is_valid = false;
1736 
1737 #ifdef NNAPI_VERBOSE_VALIDATION
1738   if (val_ctx->validation_failures) {
1739     val_ctx->validation_failures->push_back({failure_type, message});
1740   }
1741 #endif
1742 }
1743 
1744 template <typename... Args>
AddValidationFailureFmt(OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1745 inline void AddValidationFailureFmt(OpValidationContext* val_ctx,
1746                                     NNAPIValidationFailureType failure_type,
1747                                     const char* message_fmt, Args... args) {
1748   val_ctx->is_valid = false;
1749 #ifdef NNAPI_VERBOSE_VALIDATION
1750   if (val_ctx->validation_failures) {
1751     size_t req_buf_size = snprintf(nullptr, 0, message_fmt, args...) + 1;
1752     std::unique_ptr<char[]> tmp_buf(new char[req_buf_size]);
1753     snprintf(tmp_buf.get(), req_buf_size, message_fmt, args...);
1754 
1755     val_ctx->validation_failures->push_back({failure_type, tmp_buf.get()});
1756   }
1757 #endif
1758 }
1759 
Expect(bool condition,NNAPIValidationFailureType failure_type,const char * message,OpValidationContext * val_ctx)1760 inline bool Expect(bool condition, NNAPIValidationFailureType failure_type,
1761                    const char* message, OpValidationContext* val_ctx) {
1762   if (!condition) {
1763     AddValidationFailure(failure_type, message, val_ctx);
1764     return false;
1765   }
1766   return true;
1767 }
1768 
1769 template <typename... Args>
ExpectFmt(bool condition,OpValidationContext * val_ctx,NNAPIValidationFailureType failure_type,const char * message_fmt,Args...args)1770 inline bool ExpectFmt(bool condition, OpValidationContext* val_ctx,
1771                       NNAPIValidationFailureType failure_type,
1772                       const char* message_fmt, Args... args) {
1773   if (!condition) {
1774     AddValidationFailureFmt(val_ctx, failure_type, message_fmt, args...);
1775     return false;
1776   }
1777   return true;
1778 }
1779 
ExpectTypeIn(TfLiteType actual_type,std::initializer_list<TfLiteType> allowed_types,NNAPIValidationFailureType failure_type,const char * msg,OpValidationContext * val_ctx)1780 inline bool ExpectTypeIn(TfLiteType actual_type,
1781                          std::initializer_list<TfLiteType> allowed_types,
1782                          NNAPIValidationFailureType failure_type,
1783                          const char* msg, OpValidationContext* val_ctx) {
1784   return Expect(std::find(allowed_types.begin(), allowed_types.end(),
1785                           actual_type) != allowed_types.end(),
1786                 failure_type, msg, val_ctx);
1787 }
1788 
ExpectMinAndroidSdkVersion(int curr_version,int min_version,OpValidationContext * val_ctx)1789 inline bool ExpectMinAndroidSdkVersion(int curr_version, int min_version,
1790                                        OpValidationContext* val_ctx) {
1791   return ExpectFmt(curr_version >= min_version, val_ctx,
1792                    NNAPIValidationFailureType::kUnsupportedAndroidVersion,
1793                    "Android sdk version less than %d", min_version);
1794 }
1795 
ExpectMaxOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1796 inline bool ExpectMaxOpVersion(int curr_version, int max_version,
1797                                OpValidationContext* val_ctx) {
1798   return ExpectFmt(curr_version <= max_version, val_ctx,
1799                    NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1800                    "OP Version higher than %d", max_version);
1801 }
1802 
ExpectOpVersion(int curr_version,int max_version,OpValidationContext * val_ctx)1803 inline bool ExpectOpVersion(int curr_version, int max_version,
1804                             OpValidationContext* val_ctx) {
1805   return ExpectFmt(curr_version <= max_version, val_ctx,
1806                    NNAPIValidationFailureType::kUnsupportedOperatorVersion,
1807                    "OP Version different from %d", max_version);
1808 }
1809 
ExpectIsFloatOperator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1810 inline bool ExpectIsFloatOperator(const TfLiteContext* context,
1811                                   const TfLiteNode* node,
1812                                   OpValidationContext* val_ctx) {
1813   const auto input_type = context->tensors[node->inputs->data[0]].type;
1814   return Expect(IsFloat(input_type),
1815                 NNAPIValidationFailureType::kUnsupportedInputType,
1816                 "Input should be Float", val_ctx);
1817 }
1818 
ExpectIsFloatOrUint8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1819 bool ExpectIsFloatOrUint8Operator(const TfLiteContext* context,
1820                                   const TfLiteNode* node,
1821                                   OpValidationContext* val_ctx) {
1822   const auto input_type = context->tensors[node->inputs->data[0]].type;
1823   return Expect(IsFloatOrUInt8(input_type),
1824                 NNAPIValidationFailureType::kUnsupportedInputType,
1825                 "Input should be Float or UINT8", val_ctx);
1826 }
1827 
ExpectIsFloatOrQuant8Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1828 bool ExpectIsFloatOrQuant8Operator(const TfLiteContext* context,
1829                                    const TfLiteNode* node,
1830                                    OpValidationContext* val_ctx) {
1831   const auto input_type = context->tensors[node->inputs->data[0]].type;
1832   return Expect(IsFloatOrQuantized(input_type),
1833                 NNAPIValidationFailureType::kUnsupportedInputType,
1834                 "Input should be Float or Quant8", val_ctx);
1835 }
1836 
ExpectIsFloatOrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1837 bool ExpectIsFloatOrInt32Operator(const TfLiteContext* context,
1838                                   const TfLiteNode* node,
1839                                   OpValidationContext* val_ctx) {
1840   const auto input_type = context->tensors[node->inputs->data[0]].type;
1841   return Expect(IsFloatOrInt32(input_type),
1842                 NNAPIValidationFailureType::kUnsupportedInputType,
1843                 "Input should be Float or Int32", val_ctx);
1844 }
1845 
ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1846 bool ExpectIsFloatQuant8OrInt32Operator(const TfLiteContext* context,
1847                                         const TfLiteNode* node,
1848                                         OpValidationContext* val_ctx) {
1849   const auto input_type = context->tensors[node->inputs->data[0]].type;
1850   return Expect(IsFloatQuantizedOrInt32(input_type),
1851                 NNAPIValidationFailureType::kUnsupportedInputType,
1852                 "Input should be Float, Quant8, or Int32", val_ctx);
1853 }
1854 
1855 // When using NN API version 1.0 or 1.1, the condition below must be true for
1856 // quantized versions of the following ops:
1857 // * CONV_2D
1858 // * DEPTHWISE_CONV_2D
1859 // * FULLY_CONNECTED (where filter actually stands for weights)
1860 // The condition is relaxed and no longer required since version 1.2.
ExpectIsRestrictedScalesCompliant(const TfLiteContext * context,const TfLiteNode * node,OpValidationContext * val_ctx)1861 bool ExpectIsRestrictedScalesCompliant(const TfLiteContext* context,
1862                                        const TfLiteNode* node,
1863                                        OpValidationContext* val_ctx) {
1864   const int input_id = node->inputs->data[0];
1865   const int filter_id = node->inputs->data[1];
1866   const int output_id = node->outputs->data[0];
1867   const float input_scale = context->tensors[input_id].params.scale;
1868   const float filter_scale = context->tensors[filter_id].params.scale;
1869   const float output_scale = context->tensors[output_id].params.scale;
1870   return Expect(input_scale * filter_scale < output_scale,
1871                 NNAPIValidationFailureType::kNotRestrictedScaleCompliant,
1872                 "When using NN API version 1.0 or 1.1, input_scale * "
1873                 "filter_scale < output_scale.",
1874                 val_ctx);
1875 }
1876 
1877 }  // namespace
1878 
1879 // Return a function that knows how to translate a node into its operands
1880 // when called. You can use this function to see if a node is supported
1881 // (i.e. if the returned MappingFn is null, then the node is not supported).
Validate(const TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const TfLiteNode * node,bool is_accelerator_specified,std::vector<NNAPIValidationFailure> * map_failures)1882 bool NNAPIDelegateKernel::Validate(
1883     const TfLiteContext* context, int builtin_code, int version,
1884     int android_sdk_version, const TfLiteNode* node,
1885     bool is_accelerator_specified,
1886     std::vector<NNAPIValidationFailure>* map_failures) {
1887   OpValidationContext val_ctx{true, map_failures};
1888   switch (builtin_code) {
1889     case kTfLiteBuiltinAdd: {
1890       ExpectMaxOpVersion(version, 2, &val_ctx);
1891       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1892         ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1893         if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1894           Expect(reinterpret_cast<TfLiteAddParams*>(node->builtin_data)
1895                          ->activation == kTfLiteActNone,
1896                  NNAPIValidationFailureType::kNoActivationExpected,
1897                  "No activation function supported", &val_ctx);
1898         }
1899       } else {
1900         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1901       }
1902     } break;
1903     case kTfLiteBuiltinArgMax:
1904     case kTfLiteBuiltinArgMin: {
1905       ExpectMaxOpVersion(version, 2, &val_ctx);
1906       // Those operators were introduced in NNAPI 1.2.
1907       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
1908                                  &val_ctx);
1909       const TfLiteType input_type =
1910           context->tensors[node->inputs->data[(0)]].type;
1911       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
1912                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
1913 
1914       const auto& axis_tensor = context->tensors[node->inputs->data[1]];
1915       if (axis_tensor.type == kTfLiteInt64) {
1916         Expect(
1917             axis_tensor.allocation_type == kTfLiteMmapRo &&
1918                 *axis_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
1919                 *axis_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
1920             NNAPIValidationFailureType::kUnsupportedInputType,
1921             "NNAPI only supports axis as int32. If the axis type is int64 and "
1922             "constant we can convert it to int32 if the value isn't too "
1923             "large.",
1924             &val_ctx);
1925       } else {
1926         Expect(axis_tensor.type == kTfLiteInt32,
1927                NNAPIValidationFailureType::kUnsupportedInputType,
1928                "Axis should be Int32", &val_ctx);
1929       }
1930       if (builtin_code == kTfLiteBuiltinArgMax) {
1931         auto builtin =
1932             reinterpret_cast<TfLiteArgMaxParams*>(node->builtin_data);
1933         Expect(builtin->output_type == kTfLiteInt32,
1934                NNAPIValidationFailureType::kUnsupportedOutputType,
1935                "NNAPI only supports int32 output.", &val_ctx);
1936       } else {
1937         auto builtin =
1938             reinterpret_cast<TfLiteArgMinParams*>(node->builtin_data);
1939         Expect(builtin->output_type == kTfLiteInt32,
1940                NNAPIValidationFailureType::kUnsupportedOutputType,
1941                "NNAPI only supports int32 output.", &val_ctx);
1942       }
1943     } break;
1944     case kTfLiteBuiltinMul: {
1945       if (is_accelerator_specified) {
1946         ExpectMaxOpVersion(version, 3, &val_ctx);
1947       } else {
1948         ExpectMaxOpVersion(version, 2, &val_ctx);
1949       }
1950       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
1951         ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
1952         if (IsInt32(context->tensors[node->inputs->data[0]].type)) {
1953           Expect(reinterpret_cast<TfLiteMulParams*>(node->builtin_data)
1954                          ->activation == kTfLiteActNone,
1955                  NNAPIValidationFailureType::kNoActivationExpected,
1956                  "No activation function supported", &val_ctx);
1957         }
1958       } else {
1959         ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1960       }
1961     } break;
1962     case kTfLiteBuiltinAveragePool2d: {
1963       ExpectMaxOpVersion(version, 2, &val_ctx);
1964       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1965       auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1966       // TODO(b/138756912): Large filter window would overflow on the
1967       // quantized reference CPU path.
1968       if (IsQuantized(context->tensors[node->inputs->data[0]].type)) {
1969         Expect(is_accelerator_specified ||
1970                    (builtin->filter_width * builtin->filter_height <= 256),
1971                NNAPIValidationFailureType::kUnsupportedOperandSize,
1972                "Large filter window would overflow on the reference CPU path",
1973                &val_ctx);
1974       }
1975     } break;
1976     case kTfLiteBuiltinMaxPool2d: {
1977       ExpectMaxOpVersion(version, 2, &val_ctx);
1978       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
1979     } break;
1980     case kTfLiteBuiltinL2Pool2d: {
1981       ExpectOpVersion(version, 1, &val_ctx);
1982       ExpectIsFloatOperator(context, node, &val_ctx);
1983 
1984       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1985         auto builtin = reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
1986         Expect(builtin->activation == kTfLiteActNone,
1987                NNAPIValidationFailureType::kUnsupportedOperandValue,
1988                "Before NNAPI 1.2 fused activation for l2_pool may not be "
1989                "supported.",
1990                &val_ctx);
1991       }
1992     } break;
1993     case kTfLiteBuiltinConv2d: {
1994       ExpectMaxOpVersion(version, 3, &val_ctx);
1995       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
1996         Expect(!IsHybridOperator(context, builtin_code, node),
1997                NNAPIValidationFailureType::kUnsupportedHybridOperator,
1998                "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
1999         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2000 
2001         const auto& filter_tensor = context->tensors[node->inputs->data[1]];
2002         if (filter_tensor.quantization.type == kTfLiteAffineQuantization) {
2003           TfLiteAffineQuantization* quantization_params =
2004               static_cast<TfLiteAffineQuantization*>(
2005                   filter_tensor.quantization.params);
2006           Expect(quantization_params->scale->size <= 1,
2007                  NNAPIValidationFailureType::kUnsupportedQuantizationType,
2008                  "Per-channel quantized convolution not supported before NNAPI "
2009                  "1.2.",
2010                  &val_ctx);
2011         }
2012       }
2013       const auto input_type = context->tensors[node->inputs->data[0]].type;
2014       if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2015           input_type == kTfLiteUInt8) {
2016         ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2017       }
2018       auto builtin = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
2019       // TODO(b/132950584): Add support for Conv2D with omitted bias.
2020       Expect(node->inputs->size == 3,
2021              NNAPIValidationFailureType::kMissingRequiredOperand,
2022              "Conv2D with omitted bias not supported", &val_ctx);
2023       if (builtin->dilation_width_factor != 1 ||
2024           builtin->dilation_height_factor != 1) {
2025         Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2026                NNAPIValidationFailureType::kUnsupportedOperandValue,
2027                "NNAPI supports dilated Conv2D since NNAPI 1.2.", &val_ctx);
2028       }
2029     } break;
2030     case kTfLiteBuiltinDepthwiseConv2d: {
2031       ExpectMaxOpVersion(version, 3, &val_ctx);
2032 
2033       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2034         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2035 
2036         const auto input_type = context->tensors[node->inputs->data[0]].type;
2037         if (input_type == kTfLiteUInt8) {
2038           ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2039         }
2040 
2041         auto builtin =
2042             reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
2043         Expect(builtin->dilation_width_factor == 1 &&
2044                    builtin->dilation_height_factor == 1,
2045                NNAPIValidationFailureType::kUnsupportedOperandValue,
2046                "dilation_width_factor and dilation_height_factor expected to "
2047                "be equal to 1",
2048                &val_ctx);
2049       }
2050     } break;
2051     case kTfLiteBuiltinFullyConnected: {
2052       ExpectMaxOpVersion(version, 5, &val_ctx);
2053       const auto output_type = context->tensors[node->outputs->data[0]].type;
2054       Expect(output_type != kTfLiteInt16,
2055              NNAPIValidationFailureType::kUnsupportedOutputType,
2056              "Unsupported output of type kTfLiteInt16", &val_ctx);
2057       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2058         Expect(!IsHybridOperator(context, builtin_code, node),
2059                NNAPIValidationFailureType::kUnsupportedHybridOperator,
2060                "Hybrid operators not supported before NNAPI 1.2", &val_ctx);
2061         ExpectIsFloatOrUint8Operator(context, node, &val_ctx);
2062       }
2063       const auto input_type = context->tensors[node->inputs->data[0]].type;
2064       if (android_sdk_version < kMinSdkVersionForNNAPI12 &&
2065           input_type == kTfLiteUInt8) {
2066         ExpectIsRestrictedScalesCompliant(context, node, &val_ctx);
2067       }
2068       auto builtin =
2069           reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data);
2070       if (builtin->keep_num_dims) {
2071         ExpectMinAndroidSdkVersion(android_sdk_version,
2072                                    kMinSdkVersionForNNAPI13, &val_ctx);
2073       }
2074     } break;
2075     case kTfLiteBuiltinHardSwish: {
2076       // Add support for hardswish. For Pre-Q devices, deconstructing it into
2077       // basic ops. Though for some nnapi accelerators using optimized tflite
2078       // kernels might even be faster.
2079       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2080     } break;
2081     case kTfLiteBuiltinSoftmax: {
2082       ExpectOpVersion(version, 2, &val_ctx);
2083       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2084       const auto& output = context->tensors[node->outputs->data[0]];
2085       ExpectTypeIn(output.type, {kTfLiteFloat32, kTfLiteUInt8, kTfLiteInt8},
2086                    NNAPIValidationFailureType::kUnsupportedOutputType,
2087                    "Output type should be one of kTfLiteFloat32, kTfLiteUInt8, "
2088                    "kTfLiteInt8.",
2089                    &val_ctx);
2090       const auto& input = context->tensors[node->inputs->data[0]];
2091       const int input_rank = input.dims->size;
2092       Expect(input_rank <= 4,
2093              NNAPIValidationFailureType::kUnsupportedOperandRank,
2094              "Input rank should be <= 4", &val_ctx);
2095       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2096         Expect(
2097             input_rank == 2 || input_rank == 4,
2098             NNAPIValidationFailureType::kUnsupportedOperandRank,
2099             "Before API level 29 only 2D and 4D input tensors were supported.",
2100             &val_ctx);
2101       }
2102     } break;
2103     case kTfLiteBuiltinReshape: {
2104       ExpectOpVersion(version, 1, &val_ctx);
2105       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2106       if (node->inputs->size >= 2) {
2107         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2108                    kTfLiteMmapRo,
2109                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2110                "The shape input tensor must be constant.", &val_ctx);
2111       }
2112       if (node->inputs->size == 1) {
2113         // reject scalar reshaping
2114         auto* params =
2115             reinterpret_cast<TfLiteReshapeParams*>(node->builtin_data);
2116         int num_dimensions = params->num_dimensions;
2117         if (num_dimensions == 1 && params->shape[0] == 0) {
2118           // Legacy tflite models use a shape parameter of [0] to indicate
2119           // scalars.
2120           num_dimensions = 0;
2121         }
2122         Expect(num_dimensions > 0,
2123                NNAPIValidationFailureType::kUnsupportedOperandRank,
2124                "New shape rank should be > 0", &val_ctx);
2125       }
2126     } break;
2127     case kTfLiteBuiltinResizeBilinear: {
2128       ExpectMaxOpVersion(version, 3, &val_ctx);
2129       const auto& input = context->tensors[node->inputs->data[0]];
2130       const auto output_dims = context->tensors[node->outputs->data[0]].dims;
2131       Expect(input.dims->size == 4,
2132              NNAPIValidationFailureType::kUnsupportedOperandRank,
2133              "Input should have rank 4", &val_ctx);
2134       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2135       Expect(node->inputs->size >= 2,
2136              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2137              "Expected at least 2 inputs", &val_ctx);
2138       if (node->inputs->size >= 2) {
2139         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2140                    kTfLiteMmapRo,
2141                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2142                "The size input tensor must be constant.", &val_ctx);
2143       }
2144       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2145         Expect(output_dims->data[1] == output_dims->data[2],
2146                NNAPIValidationFailureType::kUnsupportedOperandValue,
2147                "Require width == height due to driver differences in NNAPI "
2148                "< 1.2",
2149                &val_ctx);
2150       }
2151       auto builtin =
2152           reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
2153       if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2154         Expect(!builtin->align_corners,
2155                NNAPIValidationFailureType::kUnsupportedOperandValue,
2156                "NNAPI does not support align_corners == true.", &val_ctx);
2157         Expect(!builtin->half_pixel_centers,
2158                NNAPIValidationFailureType::kUnsupportedOperandValue,
2159                "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2160       }
2161       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2162         Expect(input.type == kTfLiteFloat32,
2163                NNAPIValidationFailureType::kUnsupportedInputType,
2164                "NNAPI 1.0 & 1.1 only supports float input.", &val_ctx);
2165       }
2166     } break;
2167     case kTfLiteBuiltinResizeNearestNeighbor: {
2168       ExpectMaxOpVersion(version, 3, &val_ctx);
2169       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2170                                  &val_ctx);
2171       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2172       Expect(node->inputs->size >= 2,
2173              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2174              "Expected at least 2 inputs", &val_ctx);
2175       if (node->inputs->size >= 2) {
2176         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
2177                    kTfLiteMmapRo,
2178                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2179                "The size input tensor must be constant.", &val_ctx);
2180       }
2181       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
2182           node->builtin_data);
2183       if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
2184         Expect(!builtin->align_corners,
2185                NNAPIValidationFailureType::kUnsupportedOperandValue,
2186                "NNAPI does not support align_corners == true.", &val_ctx);
2187         Expect(!builtin->half_pixel_centers,
2188                NNAPIValidationFailureType::kUnsupportedOperandValue,
2189                "NNAPI does not support half_pixel_centers == true.", &val_ctx);
2190       }
2191     } break;
2192     case kTfLiteBuiltinSqueeze: {
2193       ExpectOpVersion(version, 1, &val_ctx);
2194       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2195                                  &val_ctx);
2196       auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(node->builtin_data);
2197       if (android_sdk_version == kMinSdkVersionForNNAPI11) {
2198         Expect(builtin->num_squeeze_dims != 0,
2199                NNAPIValidationFailureType::kUnsupportedOperandValue,
2200                "NNAPI 1.1 does not support null squeeze_dims properly.",
2201                &val_ctx);
2202       }
2203     } break;
2204     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
2205       ExpectMaxOpVersion(version, 2, &val_ctx);
2206       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2207                                  &val_ctx);
2208 
2209       Expect(!IsHybridOperator(context, builtin_code, node),
2210              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2211              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2212 
2213       Expect(node->inputs->size == 20 || node->inputs->size == 24,
2214              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2215              "Supporting only operation with 20 or 24 inputs", &val_ctx);
2216     } break;
2217     case kTfLiteBuiltinL2Normalization: {
2218       ExpectMaxOpVersion(version, 2, &val_ctx);
2219 
2220       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2221         ExpectIsFloatOperator(context, node, &val_ctx);
2222 
2223         const auto& input = context->tensors[node->inputs->data[0]];
2224         Expect(input.dims->size == 4,
2225                NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2226                "Expected 4 inputs", &val_ctx);
2227       }
2228       auto builtin = reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
2229       Expect(builtin->activation == kTfLiteActNone,
2230              NNAPIValidationFailureType::kNoActivationExpected,
2231              "Expected no activation", &val_ctx);
2232     } break;
2233     case kTfLiteBuiltinLocalResponseNormalization: {
2234       ExpectOpVersion(version, 1, &val_ctx);
2235     } break;
2236     case kTfLiteBuiltinLshProjection: {
2237       ExpectOpVersion(version, 1, &val_ctx);
2238 
2239       if (reinterpret_cast<TfLiteLSHProjectionParams*>(node->builtin_data)
2240               ->type == kTfLiteLshProjectionSparse) {
2241         // NNAPI does not support sparse projection correctly pre-Q
2242         // (b/111751836).
2243         Expect(android_sdk_version >= kMinSdkVersionForNNAPI12,
2244                NNAPIValidationFailureType::kUnsupportedInputType,
2245                "NNAPI does not support sparse projection correctly pre-Q",
2246                &val_ctx);
2247         Expect(node->inputs->size == 2,
2248                NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2249                " NNAPI does not support weights for sparse projects.",
2250                &val_ctx);
2251       }
2252     } break;
2253     case kTfLiteBuiltinConcatenation: {
2254       ExpectMaxOpVersion(version, 2, &val_ctx);
2255       Expect(reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data)
2256                      ->activation == kTfLiteActNone,
2257              NNAPIValidationFailureType::kNoActivationExpected,
2258              "No activation function supported", &val_ctx);
2259       Expect(context->tensors[node->inputs->data[0]].dims->size <= 4,
2260              NNAPIValidationFailureType::kUnsupportedOperandRank,
2261              "Input rank should be less than 4", &val_ctx);
2262 
2263       const auto& input_type = context->tensors[node->inputs->data[0]].type;
2264       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat16, kTfLiteFloat32,
2265                            kTfLiteUInt8, kTfLiteInt8);
2266 
2267       if (input_type == kTfLiteUInt8 &&
2268           android_sdk_version < kMinSdkVersionForNNAPI12) {
2269         auto first_param = context->tensors[node->inputs->data[0]].params;
2270         for (int i = 1; i < node->inputs->size; i++) {
2271           auto curr_param = context->tensors[node->inputs->data[i]].params;
2272           if (!Expect(curr_param.scale == first_param.scale &&
2273                           curr_param.zero_point == first_param.zero_point,
2274                       NNAPIValidationFailureType::kUnsupportedOperandValue,
2275                       "NNAPI 1.0-1 only supported concatenating quantized "
2276                       "tensor of the same scale and offset.",
2277                       &val_ctx)) {
2278             break;
2279           }
2280         }
2281       }
2282     } break;
2283     case kTfLiteBuiltinDequantize: {
2284       // Allow dequantizing fp16->fp32.
2285       if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2286           context->tensors[node->inputs->data[0]].type == kTfLiteFloat16 &&
2287           context->tensors[node->inputs->data[0]].allocation_type !=
2288               kTfLiteMmapRo) {
2289         return true;
2290       }
2291       Expect(version == 1 || version == 2,
2292              NNAPIValidationFailureType::kUnsupportedOperatorVersion,
2293              "Supported op versions are 1 and 2 only", &val_ctx);
2294 
2295       const auto& input = context->tensors[node->inputs->data[0]];
2296       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2297         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8);
2298       } else {
2299         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteUInt8, kTfLiteInt8);
2300 
2301         if (android_sdk_version == kMinSdkVersionForNNAPI12 &&
2302             input.type == kTfLiteInt8) {
2303           const auto zero_point = input.params.zero_point;
2304           Expect(zero_point == 0,
2305                  NNAPIValidationFailureType::kUnsupportedInputType,
2306                  "NN API supports int8 type since version 1.2 but only for "
2307                  "symmetric quantization.",
2308                  &val_ctx);
2309         }
2310       }
2311     } break;
2312     case kTfLiteBuiltinDensify: {
2313       // Allow densifying sparse weights.
2314       if (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2315           context->tensors[node->inputs->data[0]].allocation_type ==
2316               kTfLiteMmapRo) {
2317         return true;
2318       }
2319       return false;
2320     } break;
2321     case kTfLiteBuiltinFloor: {
2322       ExpectOpVersion(version, 1, &val_ctx);
2323     } break;
2324     case kTfLiteBuiltinRelu:
2325     case kTfLiteBuiltinReluN1To1:
2326     case kTfLiteBuiltinRelu6:
2327     case kTfLiteBuiltinLogistic: {
2328       ExpectMaxOpVersion(version, 2, &val_ctx);
2329       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2330     } break;
2331     case kTfLiteBuiltinTanh: {
2332       ExpectMaxOpVersion(version, 2, &val_ctx);
2333       const TfLiteType input_type =
2334           context->tensors[node->inputs->data[0]].type;
2335       Expect(IsFloat(input_type) ||
2336                  (IsQuantized(input_type) &&
2337                   android_sdk_version >= kMinSdkVersionForNNAPI12),
2338              NNAPIValidationFailureType::kUnsupportedInputType,
2339              " NNAPI only support float tanh.", &val_ctx);
2340     } break;
2341     case kTfLiteBuiltinSub: {
2342       ExpectMaxOpVersion(version, 3, &val_ctx);
2343       const TfLiteType input_type =
2344           context->tensors[node->inputs->data[0]].type;
2345       Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
2346               IsFloat(input_type)) ||
2347                  (android_sdk_version >= kMinSdkVersionForNNAPI12 &&
2348                   IsQuantized(input_type)) ||
2349                  (android_sdk_version >= kMinSdkVersionForNNAPI13 &&
2350                   IsInt32(input_type)),
2351              NNAPIValidationFailureType::kUnsupportedInputType,
2352              "NNAPI only support float sub.", &val_ctx);
2353       if (IsInt32(input_type)) {
2354         Expect(reinterpret_cast<TfLiteSubParams*>(node->builtin_data)
2355                        ->activation == kTfLiteActNone,
2356                NNAPIValidationFailureType::kNoActivationExpected,
2357                "No activation function supported", &val_ctx);
2358       }
2359       const int input0_rank =
2360           context->tensors[node->inputs->data[0]].dims->size;
2361       const int input1_rank =
2362           context->tensors[node->inputs->data[1]].dims->size;
2363       Expect(input0_rank <= 4 && input1_rank <= 4,
2364              NNAPIValidationFailureType::kUnsupportedOperandRank,
2365              "Input rank must be <= 4", &val_ctx);
2366     } break;
2367     case kTfLiteBuiltinDiv: {
2368       ExpectOpVersion(version, 1, &val_ctx);
2369       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2370                                  &val_ctx);
2371       Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2372              NNAPIValidationFailureType::kUnsupportedInputType,
2373              "NNAPI only support float div.", &val_ctx);
2374     } break;
2375     case kTfLiteBuiltinPad:
2376     case kTfLiteBuiltinPadv2: {
2377       ExpectMaxOpVersion(version, 2, &val_ctx);
2378       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
2379       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2380                                  &val_ctx);
2381 
2382       const TfLiteIntArrayView input_shape(
2383           context->tensors[node->inputs->data[0]].dims);
2384       Expect(!HasZeroes(input_shape),
2385              NNAPIValidationFailureType::kUnsupportedOperandValue,
2386              "NN API pad ops do not support input tensors with no elements",
2387              &val_ctx);
2388 
2389       Expect(node->inputs->size >= 2,
2390              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2391              "Expecting at least 2 inputs", &val_ctx);
2392 
2393       if (node->inputs->size == 3) {
2394         // This is going to be mapped with a PadV2
2395         Expect(
2396             android_sdk_version >= kMinSdkVersionForNNAPI12,
2397             NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2398             "Specification of the padding value is supported from NNAPI 1.2.",
2399             &val_ctx);
2400       } else {  // this is going to be mapped as Pad
2401         if (android_sdk_version < kMinSdkVersionForNNAPI12) {
2402           Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2403                  NNAPIValidationFailureType::kUnsupportedInputType,
2404                  "Only Float32 inputs are supported before NNAPI 1.2",
2405                  &val_ctx);
2406         }
2407       }
2408     } break;
2409     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
2410       ExpectOpVersion(version, 1, &val_ctx);
2411       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2412                                  &val_ctx);
2413       Expect(!IsHybridOperator(context, builtin_code, node),
2414              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2415              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2416     } break;
2417     case kTfLiteBuiltinSpaceToBatchNd: {
2418       ExpectMaxOpVersion(version, 2, &val_ctx);
2419       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2420                                  &val_ctx);
2421     } break;
2422     case kTfLiteBuiltinBatchToSpaceNd: {
2423       ExpectMaxOpVersion(version, 2, &val_ctx);
2424       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2425                                  &val_ctx);
2426       auto crops = context->tensors[node->inputs->data[2]];
2427       auto crops_data = crops.data.i32;
2428       Expect(crops_data && crops.bytes == 16 && crops_data[0] == 0 &&
2429                  crops_data[1] == 0 && crops_data[2] == 0 && crops_data[3] == 0,
2430              NNAPIValidationFailureType::kUnsupportedOperandValue,
2431              "All crops should be 0.", &val_ctx);
2432     } break;
2433     case kTfLiteBuiltinStridedSlice: {
2434       ExpectMaxOpVersion(version, 2, &val_ctx);
2435       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2436                                  &val_ctx);
2437     } break;
2438     case kTfLiteBuiltinTranspose: {
2439       ExpectMaxOpVersion(version, 2, &val_ctx);
2440       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2441                                  &val_ctx);
2442       // Note that the permutation input tensor value dictates the output
2443       // dimensions.
2444       // TODO(b/110888333): Support dynamically-sized tensors in delegates.
2445       Expect((node->inputs->size > 1) &&
2446                  (context->tensors[node->inputs->data[1]].allocation_type ==
2447                   kTfLiteMmapRo),
2448              NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2449              "Dynamically-sized tensors not supported.", &val_ctx);
2450     } break;
2451     case kTfLiteBuiltinAbs:
2452     case kTfLiteBuiltinExp:
2453     case kTfLiteBuiltinLog:
2454     case kTfLiteBuiltinRsqrt:
2455     case kTfLiteBuiltinPow: {
2456       ExpectOpVersion(version, 1, &val_ctx);
2457       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2458                                  &val_ctx);
2459       ExpectIsFloatOperator(context, node, &val_ctx);
2460     } break;
2461     case kTfLiteBuiltinSlice: {
2462       ExpectMaxOpVersion(version, 2, &val_ctx);
2463       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2464                                  &val_ctx);
2465       const auto input_type = context->tensors[node->inputs->data[0]].type;
2466       const auto begin_type = context->tensors[node->inputs->data[1]].type;
2467       const auto size_type = context->tensors[node->inputs->data[2]].type;
2468       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2469                            kTfLiteUInt8, kTfLiteInt8);
2470       Expect(begin_type == kTfLiteInt32,
2471              NNAPIValidationFailureType::kUnsupportedInputType,
2472              "Begin type should be Int32", &val_ctx);
2473       Expect(size_type == kTfLiteInt32,
2474              NNAPIValidationFailureType::kUnsupportedInputType,
2475              "Size type should be Int32", &val_ctx);
2476     } break;
2477     case kTfLiteBuiltinSin: {
2478       ExpectOpVersion(version, 1, &val_ctx);
2479       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2480                                  &val_ctx);
2481       ExpectIsFloatOperator(context, node, &val_ctx);
2482     } break;
2483     case kTfLiteBuiltinTransposeConv: {
2484       ExpectMaxOpVersion(version, 3, &val_ctx);
2485       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2486                                  &val_ctx);
2487       Expect((node->inputs->size > 1) &&
2488                  (context->tensors[node->inputs->data[0]].allocation_type ==
2489                   kTfLiteMmapRo) &&
2490                  (context->tensors[node->inputs->data[1]].allocation_type ==
2491                   kTfLiteMmapRo),
2492              NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2493              "Dynamically-sized tensors not supported.", &val_ctx);
2494     } break;
2495     case kTfLiteBuiltinSqrt: {
2496       ExpectOpVersion(version, 1, &val_ctx);
2497       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2498                                  &val_ctx);
2499       ExpectIsFloatOperator(context, node, &val_ctx);
2500     } break;
2501     case kTfLiteBuiltinRnn: {
2502       ExpectOpVersion(version, 1, &val_ctx);
2503       Expect(node->inputs->size == 5,
2504              NNAPIValidationFailureType::kUnsupportedOperatorVariant,
2505              "Expected 5 input", &val_ctx);
2506       if (node->inputs->size >= 2) {
2507         Expect(
2508             context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2509                 kTfLiteFloat32,
2510             NNAPIValidationFailureType::kUnsupportedInputType,
2511             "NNAPI only support float32 weights.", &val_ctx);
2512       }
2513     } break;
2514     case kTfLiteBuiltinSpaceToDepth: {
2515       ExpectMaxOpVersion(version, 2, &val_ctx);
2516       const TfLiteType input_type =
2517           context->tensors[node->inputs->data[0]].type;
2518       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2519                            kTfLiteInt8);
2520     } break;
2521     case kTfLiteBuiltinSvdf: {
2522       ExpectOpVersion(version, 1, &val_ctx);
2523       Expect(node->inputs->size == 5,
2524              NNAPIValidationFailureType::kUnsupportedOperandRank,
2525              "Expected input of rank 5", &val_ctx);
2526       if (node->inputs->size >= 2) {
2527         Expect(
2528             context->tensors[node->inputs->data[/*kWeightsTensor*/ 1]].type ==
2529                 kTfLiteFloat32,
2530             NNAPIValidationFailureType::kUnsupportedInputType,
2531             "NNAPI only support float32 weights.", &val_ctx);
2532       }
2533       Expect(android_sdk_version >= kMinSdkVersionForNNAPI11,
2534              NNAPIValidationFailureType::kUnsupportedOperandRank,
2535              "SVDF does not support rank > 1 on NNAPI 1.0.", &val_ctx);
2536       Expect(context->tensors[node->inputs->data[/*kWeightsFeatureTensor*/ 1]]
2537                      .type == kTfLiteFloat32,
2538              NNAPIValidationFailureType::kUnsupportedInputType,
2539              "Weights should be Float32", &val_ctx);
2540     } break;
2541     case kTfLiteBuiltinLstm: {
2542       ExpectMaxOpVersion(version, 3, &val_ctx);
2543       Expect(
2544           android_sdk_version >= kMinSdkVersionForNNAPI11,
2545           NNAPIValidationFailureType::kUnsupportedAndroidVersion,
2546           "NNAPI 1.0 has a bug for optional tensors which would affect LSTM.",
2547           &val_ctx);
2548       Expect(android_sdk_version >= kMinSdkVersionForNNAPI12 ||
2549                  !IsHybridOperator(context, builtin_code, node),
2550              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2551              "Hybrid operators not supported before NNAPI 1.2.", &val_ctx);
2552 
2553       const auto weight_input_index =
2554           isLstmBasicKernel(node) ? 2 /*  basic::kInputWeights */
2555                                   : 4 /* full::kInputToOutputWeightsTensor */;
2556 
2557       const TfLiteType weight_type =
2558           context->tensors[node->inputs->data[weight_input_index]].type;
2559 
2560       if (isLstmBasicKernel(node)) {
2561         Expect(weight_type == kTfLiteUInt8,
2562                NNAPIValidationFailureType::kUnsupportedInputType,
2563                "Basic LSTM Kernels support only UINT8 weights", &val_ctx);
2564 
2565         const auto input_quantization_params =
2566             context->tensors[node->inputs->data[0]].params;
2567         Expect(input_quantization_params.scale == 1. / 128. &&
2568                    input_quantization_params.zero_point == 128,
2569                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2570                "Invalid input quantization", &val_ctx);
2571 
2572         const auto output_quantization_params =
2573             context->tensors[node->outputs->data[0]].params;
2574         Expect(output_quantization_params.scale == 1. / 128. &&
2575                    output_quantization_params.zero_point == 128,
2576                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2577                "Invalid output quantization", &val_ctx);
2578 
2579         const auto cell_state_quantization_params =
2580             context->tensors[node->outputs->data[1]].params;
2581         Expect(cell_state_quantization_params.scale == 16. / 32768. ||
2582                    cell_state_quantization_params.zero_point == 0,
2583                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2584                "Invalid cell state quantization", &val_ctx);
2585 
2586         auto is_const_tensor = [&node, &context](int tensor_idx) {
2587           return context->tensors[node->inputs->data[tensor_idx]]
2588                      .allocation_type == kTfLiteMmapRo;
2589         };
2590 
2591         Expect(is_const_tensor(2 /* kInputWeights */),
2592                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2593                "Weights tensor should be constant", &val_ctx);
2594         Expect(is_const_tensor(3 /* kInputBiases */),
2595                NNAPIValidationFailureType::kInputTensorShouldHaveConstantShape,
2596                "Biases tensor should be constant", &val_ctx);
2597 
2598         return val_ctx.is_valid;
2599       } else {
2600         if (node->inputs->size == 24) {
2601           ExpectMinAndroidSdkVersion(android_sdk_version,
2602                                      kMinSdkVersionForNNAPI12, &val_ctx);
2603         }
2604 
2605         if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2606           Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8 ||
2607                      weight_type == kTfLiteInt8,
2608                  NNAPIValidationFailureType::kUnsupportedInputType,
2609                  "Weight has to be Float32 or UINT8 or INT8", &val_ctx);
2610         } else {
2611           Expect(weight_type == kTfLiteFloat32 || weight_type == kTfLiteUInt8,
2612                  NNAPIValidationFailureType::kUnsupportedInputType,
2613                  "Weight has to be Float32 or UINT8", &val_ctx);
2614         }
2615       }
2616     } break;
2617     case kTfLiteBuiltinMean: {
2618       ExpectMaxOpVersion(version, 2, &val_ctx);
2619       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2620                                  &val_ctx);
2621       if (android_sdk_version >= kMinSdkVersionForNNAPI12) {
2622         Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 ||
2623                    IsQuantized(context->tensors[node->inputs->data[0]].type),
2624                NNAPIValidationFailureType::kUnsupportedInputType,
2625                "Expected Float32 or Quantized input", &val_ctx);
2626       } else {
2627         Expect(context->tensors[node->inputs->data[0]].type == kTfLiteFloat32,
2628                NNAPIValidationFailureType::kUnsupportedInputType,
2629                "Expected Float32 input", &val_ctx);
2630       }
2631       Expect(context->tensors[node->outputs->data[0]].dims->size > 0,
2632              NNAPIValidationFailureType::kUnsupportedOutputType,
2633              "NNAPI does not support generating a scalar as output for MEAN.",
2634              &val_ctx);
2635 
2636       auto input_param = context->tensors[node->inputs->data[0]].params;
2637       auto output_param = context->tensors[node->outputs->data[0]].params;
2638       Expect(input_param.scale == output_param.scale &&
2639                  input_param.zero_point == output_param.zero_point,
2640              NNAPIValidationFailureType::kUnsupportedOutputType,
2641              "NNAPI requires that the input and output have the same "
2642              "quantization parameters.",
2643              &val_ctx);
2644     } break;
2645     case kTfLiteBuiltinEmbeddingLookup: {
2646       ExpectOpVersion(version, 1, &val_ctx);
2647       Expect(context->tensors[node->inputs->data[1]].type == kTfLiteFloat32,
2648              NNAPIValidationFailureType::kUnsupportedInputType,
2649              "NNAPI only support float32 values.", &val_ctx);
2650     } break;
2651     case kTfLiteBuiltinHashtableLookup: {
2652       ExpectOpVersion(version, 1, &val_ctx);
2653       Expect(context->tensors[node->outputs->data[0]].type == kTfLiteFloat32,
2654              NNAPIValidationFailureType::kUnsupportedOutputType,
2655              "NNAPI only support float32 output.", &val_ctx);
2656     } break;
2657     case kTfLiteBuiltinMaximum:
2658     case kTfLiteBuiltinMinimum: {
2659       ExpectMaxOpVersion(version, 3, &val_ctx);
2660       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2661                                  &val_ctx);
2662       const auto input_type = context->tensors[node->inputs->data[0]].type;
2663       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2664                            kTfLiteInt8, kTfLiteInt32);
2665       const TfLiteTensor& operand0 = context->tensors[node->inputs->data[0]];
2666       if (operand0.dims->size == 0) {
2667         Expect(operand0.allocation_type == kTfLiteMmapRo,
2668                NNAPIValidationFailureType::kUnsupportedInputType,
2669                "Scalar operand should be constant", &val_ctx);
2670       }
2671       const TfLiteTensor& operand1 = context->tensors[node->inputs->data[1]];
2672       if (operand1.dims->size == 0) {
2673         Expect(operand1.allocation_type == kTfLiteMmapRo,
2674                NNAPIValidationFailureType::kUnsupportedInputType,
2675                "Scalar operand should be constant", &val_ctx);
2676       }
2677     } break;
2678     case kTfLiteBuiltinCast: {
2679       ExpectOpVersion(version, 1, &val_ctx);
2680       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2681                                  &val_ctx);
2682       const TfLiteType input_type =
2683           context->tensors[node->inputs->data[0]].type;
2684       const TfLiteType output_type =
2685           context->tensors[node->outputs->data[0]].type;
2686       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2687         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2688                              kTfLiteUInt8, kTfLiteInt8);
2689 
2690         ExpectTypeIn(
2691             output_type,
2692             {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8},
2693             NNAPIValidationFailureType::kUnsupportedOutputType,
2694             "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2695             "kTfLiteUInt8, kTfLiteInt8.",
2696             &val_ctx);
2697       } else {
2698         EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2699                              kTfLiteUInt8);
2700 
2701         ExpectTypeIn(
2702             output_type, {kTfLiteFloat32, kTfLiteInt32, kTfLiteUInt8},
2703             NNAPIValidationFailureType::kUnsupportedOutputType,
2704             "Output type should be one of kTfLiteFloat32, kTfLiteInt32, "
2705             "kTfLiteUInt8.",
2706             &val_ctx);
2707       }
2708     } break;
2709     case kTfLiteBuiltinLeakyRelu:
2710     case kTfLiteBuiltinPrelu: {
2711       ExpectOpVersion(version, 1, &val_ctx);
2712       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2713                                  &val_ctx);
2714       const auto input_type = context->tensors[node->inputs->data[0]].type;
2715       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2716                            kTfLiteInt8);
2717     } break;
2718     case kTfLiteBuiltinTile: {
2719       ExpectOpVersion(version, 1, &val_ctx);
2720       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2721                                  &val_ctx);
2722       const auto input_type = context->tensors[node->inputs->data[0]].type;
2723       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8,
2724                            kTfLiteUInt8, kTfLiteInt32);
2725       const auto multipliers_type =
2726           context->tensors[node->inputs->data[1]].type;
2727       Expect(multipliers_type == kTfLiteInt32,
2728              NNAPIValidationFailureType::kUnsupportedInputType,
2729              "Multipliers should be Int32", &val_ctx);
2730     } break;
2731     case kTfLiteBuiltinLogicalOr:
2732     case kTfLiteBuiltinLogicalAnd:
2733     case kTfLiteBuiltinLogicalNot: {
2734       ExpectOpVersion(version, 1, &val_ctx);
2735       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2736                                  &val_ctx);
2737       const auto input_type = context->tensors[node->inputs->data[0]].type;
2738       Expect(input_type == kTfLiteBool,
2739              NNAPIValidationFailureType::kUnsupportedInputType,
2740              "Input should be bool", &val_ctx);
2741     } break;
2742     case kTfLiteBuiltinLess:
2743     case kTfLiteBuiltinLessEqual:
2744     case kTfLiteBuiltinGreater:
2745     case kTfLiteBuiltinGreaterEqual:
2746     case kTfLiteBuiltinEqual:
2747     case kTfLiteBuiltinNotEqual: {
2748       ExpectMaxOpVersion(version, 2, &val_ctx);
2749       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2750                                  &val_ctx);
2751       const auto input_type = context->tensors[node->inputs->data[0]].type;
2752       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2753                            kTfLiteInt8, kTfLiteBool, kTfLiteInt32);
2754     } break;
2755     case kTfLiteBuiltinNeg: {
2756       ExpectMaxOpVersion(version, 2, &val_ctx);
2757       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2758                                  &val_ctx);
2759       const auto input_type = context->tensors[node->inputs->data[0]].type;
2760       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32);
2761     } break;
2762     case kTfLiteBuiltinTopkV2: {
2763       ExpectMaxOpVersion(version, 2, &val_ctx);
2764       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2765                                  &val_ctx);
2766       const auto& input_type = context->tensors[node->inputs->data[0]].type;
2767       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt32,
2768                            kTfLiteUInt8, kTfLiteInt8);
2769       const auto& k_param = context->tensors[node->inputs->data[1]];
2770       Expect(k_param.type == kTfLiteInt32 &&
2771                  k_param.allocation_type == kTfLiteMmapRo,
2772              NNAPIValidationFailureType::kUnsupportedInputType,
2773              "K param should be a constant of type Int32", &val_ctx);
2774     } break;
2775     case kTfLiteBuiltinSelect: {
2776       ExpectMaxOpVersion(version, 2, &val_ctx);
2777       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI11,
2778                                  &val_ctx);
2779       const auto value_type = context->tensors[node->inputs->data[1]].type;
2780       EXPECT_INPUT_TYPE_IN(value_type, kTfLiteFloat32, kTfLiteInt32,
2781                            kTfLiteUInt8, kTfLiteInt8);
2782       TfLiteIntArray* condition_shape =
2783           context->tensors[node->inputs->data[0]].dims;
2784       TfLiteIntArray* input_shape =
2785           context->tensors[node->inputs->data[1]].dims;
2786       Expect(TfLiteIntArrayEqual(condition_shape, input_shape),
2787              NNAPIValidationFailureType::kUnsupportedOperandValue,
2788              "Condition and inputs tensors should have the same shape",
2789              &val_ctx);
2790     } break;
2791     case kTfLiteBuiltinGather: {
2792       ExpectOpVersion(version, 2, &val_ctx);
2793       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2794                                  &val_ctx);
2795       const auto input_type = context->tensors[node->inputs->data[0]].type;
2796       const auto& positions = context->tensors[node->inputs->data[1]];
2797 
2798       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2799                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2800 
2801       Expect(positions.type == kTfLiteInt32,
2802              NNAPIValidationFailureType::kUnsupportedInputType,
2803              "Positions type should be one of kTfLiteInt32", &val_ctx);
2804       Expect(positions.dims->size != 0,
2805              NNAPIValidationFailureType::kUnsupportedOperandRank,
2806              "0-dimension args are not supported by NNAPI.", &val_ctx);
2807     } break;
2808     case kTfLiteBuiltinBidirectionalSequenceLstm: {
2809       ExpectOpVersion(version, 1, &val_ctx);
2810       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2811                                  &val_ctx);
2812       Expect(!IsHybridOperator(context, builtin_code, node),
2813              NNAPIValidationFailureType::kUnsupportedHybridOperator,
2814              "Hybrid version of this op is not supported by NN API.", &val_ctx);
2815     } break;
2816     case kTfLiteBuiltinExpandDims: {
2817       ExpectOpVersion(version, 1, &val_ctx);
2818       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2819                                  &val_ctx);
2820       const auto input_type = context->tensors[node->inputs->data[0]].type;
2821       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteFloat16,
2822                            kTfLiteInt32, kTfLiteUInt8, kTfLiteInt8);
2823       const auto axis = context->tensors[node->inputs->data[1]];
2824       Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2825              NNAPIValidationFailureType::kUnsupportedInputType,
2826              "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2827     } break;
2828     case kTfLiteBuiltinSplit: {
2829       ExpectOpVersion(version, 3, &val_ctx);
2830       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2831                                  &val_ctx);
2832       // Tensor indices: split_dim: 0, value: 1
2833       const TfLiteTensor& input = context->tensors[node->inputs->data[1]];
2834       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
2835         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2836                              kTfLiteInt8, kTfLiteInt32);
2837       } else {
2838         EXPECT_INPUT_TYPE_IN(input.type, kTfLiteFloat32, kTfLiteUInt8,
2839                              kTfLiteInt32);
2840       }
2841       const TfLiteTensor& axis = context->tensors[node->inputs->data[0]];
2842       Expect(axis.type == kTfLiteInt32 && axis.allocation_type == kTfLiteMmapRo,
2843              NNAPIValidationFailureType::kUnsupportedInputType,
2844              "NNAPI only supports constant int32 axis tensor.", &val_ctx);
2845     } break;
2846     case kTfLiteBuiltinLogSoftmax: {
2847       ExpectOpVersion(version, 1, &val_ctx);
2848       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2849                                  &val_ctx);
2850       const auto input_type = context->tensors[node->inputs->data[0]].type;
2851       Expect(input_type == kTfLiteFloat32,
2852              NNAPIValidationFailureType::kUnsupportedInputType,
2853              "Input should be Float32.", &val_ctx);
2854     } break;
2855     case kTfLiteBuiltinQuantize: {
2856       ExpectMaxOpVersion(version, 2, &val_ctx);
2857       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2858                                  &val_ctx);
2859       const auto value_type = context->tensors[node->inputs->data[0]].type;
2860       Expect(value_type == kTfLiteFloat32 || IsQuantized(value_type),
2861              NNAPIValidationFailureType::kUnsupportedInputType,
2862              "Value should be quantized or Float32.", &val_ctx);
2863       if (IsQuantized(value_type)) {
2864         const auto quantization_params =
2865             context->tensors[node->inputs->data[0]].params;
2866         Expect(quantization_params.scale > 0.f,
2867                NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2868                "Quantization scale should be > 0.", &val_ctx);
2869       }
2870       const auto output_type = context->tensors[node->outputs->data[0]].type;
2871       if (android_sdk_version < kMinSdkVersionForNNAPI13) {
2872         Expect(output_type == kTfLiteUInt8,
2873                NNAPIValidationFailureType::kUnsupportedOutputType,
2874                "Output should be kTfLiteUInt8.", &val_ctx);
2875       } else {
2876         ExpectTypeIn(output_type, {kTfLiteUInt8, kTfLiteInt8},
2877                      NNAPIValidationFailureType::kUnsupportedOutputType,
2878                      "Output should be kTfLiteUInt8.", &val_ctx);
2879       }
2880       const auto quantization_params =
2881           context->tensors[node->outputs->data[0]].params;
2882       Expect(quantization_params.scale > 0.f,
2883              NNAPIValidationFailureType::kUnsupportedQuantizationParameters,
2884              "Quantization scale should be > 0.", &val_ctx);
2885     } break;
2886     case kTfLiteBuiltinReduceAny: {
2887       ExpectOpVersion(version, 2, &val_ctx);
2888       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2889                                  &val_ctx);
2890       Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2891              NNAPIValidationFailureType::kUnsupportedOutputType,
2892              "NNAPI does not support generating a scalar as output.", &val_ctx);
2893     } break;
2894     case kTfLiteBuiltinReduceMin:
2895     case kTfLiteBuiltinReduceMax: {
2896       ExpectMaxOpVersion(version, 2, &val_ctx);
2897       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2898                                  &val_ctx);
2899       const auto input_tensor = context->tensors[node->inputs->data[0]];
2900       const auto input_type = input_tensor.type;
2901       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2902                            kTfLiteInt8);
2903       Expect(input_tensor.dims->size != 0,
2904              NNAPIValidationFailureType::kUnsupportedOutputType,
2905              "NNAPI does not support generating a scalar as output.", &val_ctx);
2906     } break;
2907     case kTfLiteBuiltinDepthToSpace: {
2908       const TfLiteType input_type =
2909           context->tensors[node->inputs->data[0]].type;
2910       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteUInt8,
2911                            kTfLiteInt8);
2912     } break;
2913     case kTfLiteBuiltinReduceProd:
2914     case kTfLiteBuiltinSum: {
2915       ExpectOpVersion(version, 1, &val_ctx);
2916       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI12,
2917                                  &val_ctx);
2918       Expect(context->tensors[node->outputs->data[0]].dims->size != 0,
2919              NNAPIValidationFailureType::kUnsupportedOutputType,
2920              "NNAPI does not support generating a scalar as output", &val_ctx);
2921       const auto input_type = context->tensors[node->inputs->data[0]].type;
2922       Expect(input_type == kTfLiteFloat32,
2923              NNAPIValidationFailureType::kUnsupportedInputType,
2924              "NNAPI only supports floating point input.", &val_ctx);
2925     } break;
2926     case kTfLiteBuiltinElu: {
2927       ExpectOpVersion(version, 1, &val_ctx);
2928       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2929                                  &val_ctx);
2930       const auto input_type = context->tensors[node->inputs->data[0]].type;
2931       Expect(input_type == kTfLiteFloat32,
2932              NNAPIValidationFailureType::kUnsupportedInputType,
2933              "NNAPI only supports floating point input.", &val_ctx);
2934     } break;
2935     case kTfLiteBuiltinFill: {
2936       ExpectOpVersion(version, 1, &val_ctx);
2937       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2938                                  &val_ctx);
2939       const auto& dims_tensor = context->tensors[node->inputs->data[0]];
2940       Expect(IsConstantTensor(&dims_tensor),
2941              NNAPIValidationFailureType::kUnsupportedInputType,
2942              "NNAPI doesn't support dynamic dimensions tensor.", &val_ctx);
2943       EXPECT_INPUT_TYPE_IN(dims_tensor.type, kTfLiteInt32, kTfLiteInt64);
2944       if (IsConstantTensor(&dims_tensor)) {
2945         Expect(dims_tensor.dims->data[0] != 0,
2946                NNAPIValidationFailureType::kUnsupportedOperandValue,
2947                "NNAPI doesn't support generating scalars from FILL", &val_ctx);
2948         if (dims_tensor.type == kTfLiteInt64) {
2949           bool fit_in_int32 =
2950               std::all_of(dims_tensor.data.i64,
2951                           dims_tensor.data.i64 + dims_tensor.dims->data[0],
2952                           [](int64_t dim) {
2953                             return std::numeric_limits<int32_t>::min() <= dim &&
2954                                    dim <= std::numeric_limits<int32_t>::max();
2955                           });
2956           Expect(fit_in_int32,
2957                  NNAPIValidationFailureType::kUnsupportedOperandValue,
2958                  "NNAPI only supports int32 dimensions tensor. If the "
2959                  "dimensions type is int64 and they are constant we can "
2960                  "convert them to int32 if the value isn't too large.",
2961                  &val_ctx);
2962         }
2963       }
2964       const auto& value_tensor = context->tensors[node->inputs->data[1]];
2965       EXPECT_INPUT_TYPE_IN(value_tensor.type, kTfLiteFloat32, kTfLiteInt32,
2966                            kTfLiteInt64);
2967       if (value_tensor.type == kTfLiteInt64 &&
2968           IsConstantTensor(&value_tensor)) {
2969         Expect(
2970             *value_tensor.data.i64 <= std::numeric_limits<int32_t>::max() &&
2971                 *value_tensor.data.i64 >= std::numeric_limits<int32_t>::min(),
2972             NNAPIValidationFailureType::kUnsupportedInputType,
2973             "NNAPI only supports int32 input. If the input type is int64 and "
2974             "constant we can convert it to int32 if the value isn't too "
2975             "large.",
2976             &val_ctx);
2977       }
2978     } break;
2979     case kTfLiteBuiltinPack: {
2980       ExpectOpVersion(version, 2, &val_ctx);
2981       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
2982                                  &val_ctx);
2983       const auto input_type = context->tensors[node->inputs->data[0]].type;
2984       EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32,
2985                            kTfLiteInt8);
2986       auto builtin = reinterpret_cast<TfLitePackParams*>(node->builtin_data);
2987       Expect(builtin->axis != -1 &&
2988                  builtin->axis !=
2989                      context->tensors[node->inputs->data[0]].dims->size,
2990              NNAPIValidationFailureType::kUnsupportedOperandValue,
2991              "NNAPI does not support axis being the last dimension", &val_ctx);
2992     } break;
2993     default:
2994       // All other operators are not mapped.
2995       AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
2996                            "Unsupported operation type.", &val_ctx);
2997   }
2998   return val_ctx.is_valid;
2999 }  // NOLINT(readability/fn_size)
3000 
Map(TfLiteContext * context,int builtin_code,int version,int android_sdk_version,const NNAPIOpMappingArgs & mapping_args,ANeuralNetworksOperationType * nn_op_type)3001 TfLiteStatus NNAPIDelegateKernel::Map(
3002     TfLiteContext* context, int builtin_code, int version,
3003     int android_sdk_version, const NNAPIOpMappingArgs& mapping_args,
3004     ANeuralNetworksOperationType* nn_op_type) {
3005   auto add_zero_bias = [mapping_args](int input_id, int filter_id,
3006                                       int num_elements) -> void {
3007     // NNAPI requires a bias tensor, so we allocate a new tensor to fill
3008     // it with zeroes. It is deleted with other tensors in the context
3009     // during subgraph destructor call.
3010     int bias_index = -1;
3011     mapping_args.context->AddTensors(mapping_args.context, 1, &bias_index);
3012     TfLiteTensor* bias_tensor = &mapping_args.context->tensors[bias_index];
3013     const auto input_type = mapping_args.context->tensors[input_id].type;
3014     if (input_type == kTfLiteFloat32) {
3015       bias_tensor->type = kTfLiteFloat32;
3016     } else {
3017       bias_tensor->type = kTfLiteInt32;
3018     }
3019     // Create an array with a required bias shape and resize the bias
3020     // tensor.
3021     TfLiteIntArray* bias_shape = TfLiteIntArrayCreate(1);
3022     bias_shape->data[0] = num_elements;
3023     bias_tensor->allocation_type = kTfLiteDynamic;
3024     mapping_args.context->ResizeTensor(mapping_args.context, bias_tensor,
3025                                        bias_shape);
3026     // Set tensor's values to zeroes and add it using AddVector*, so
3027     // that the values are copied to NNAPI. We don't use the AddTensor
3028     // function because it doesn't copy values and the tensor we just
3029     // created is not in the node->inputs.
3030     if (input_type == kTfLiteFloat32) {
3031       memset(bias_tensor->data.f, 0, num_elements * sizeof(float));
3032       mapping_args.builder->AddVectorFloat32Operand(bias_tensor->data.f,
3033                                                     num_elements);
3034     } else {
3035       memset(bias_tensor->data.i32, 0, num_elements * sizeof(int));
3036       const TfLiteTensor& input_tensor =
3037           mapping_args.context->tensors[input_id];
3038       const TfLiteTensor& filter_tensor =
3039           mapping_args.context->tensors[filter_id];
3040       // NNAPI requires bias scale to be a product of an input scale and
3041       // a filter scale.
3042       bias_tensor->params.scale =
3043           input_tensor.params.scale * filter_tensor.params.scale;
3044       mapping_args.builder->AddVectorInt32Operand(
3045           bias_tensor->data.i32, num_elements, bias_tensor->params.scale,
3046           /*zero_point=*/0);
3047     }
3048   };
3049   switch (builtin_code) {
3050     case kTfLiteBuiltinAdd: {
3051       auto builtin =
3052           reinterpret_cast<TfLiteAddParams*>(mapping_args.node->builtin_data);
3053       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3054       *nn_op_type = ANEURALNETWORKS_ADD;
3055     } break;
3056     case kTfLiteBuiltinArgMax: {
3057       *nn_op_type = ANEURALNETWORKS_ARGMAX;
3058     } break;
3059     case kTfLiteBuiltinArgMin: {
3060       *nn_op_type = ANEURALNETWORKS_ARGMIN;
3061     } break;
3062     case kTfLiteBuiltinMul: {
3063       auto builtin =
3064           reinterpret_cast<TfLiteMulParams*>(mapping_args.node->builtin_data);
3065       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3066       *nn_op_type = ANEURALNETWORKS_MUL;
3067     } break;
3068     case kTfLiteBuiltinAveragePool2d: {
3069       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3070       *nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
3071     } break;
3072     case kTfLiteBuiltinMaxPool2d: {
3073       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3074       *nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
3075     } break;
3076     case kTfLiteBuiltinL2Pool2d: {
3077       mapping_args.builder->AddPoolingParams(mapping_args.node->builtin_data);
3078       *nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
3079     } break;
3080     case kTfLiteBuiltinConv2d: {
3081       auto builtin =
3082           reinterpret_cast<TfLiteConvParams*>(mapping_args.node->builtin_data);
3083       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3084       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3085       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3086       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3087       // NNAPI supports dilated Conv2D since NNAPI 1.2.
3088       if (builtin->dilation_width_factor != 1 ||
3089           builtin->dilation_height_factor != 1) {
3090         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
3091         mapping_args.builder->AddScalarInt32Operand(
3092             builtin->dilation_width_factor);
3093         mapping_args.builder->AddScalarInt32Operand(
3094             builtin->dilation_height_factor);
3095       }
3096       *nn_op_type = ANEURALNETWORKS_CONV_2D;
3097     } break;
3098     case kTfLiteBuiltinDepthwiseConv2d: {
3099       auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
3100           mapping_args.node->builtin_data);
3101       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3102       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3103       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3104       mapping_args.builder->AddScalarInt32Operand(builtin->depth_multiplier);
3105       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3106       if (builtin->dilation_width_factor != 1 ||
3107           builtin->dilation_height_factor != 1) {
3108         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format.
3109         mapping_args.builder->AddScalarInt32Operand(
3110             builtin->dilation_width_factor);
3111         mapping_args.builder->AddScalarInt32Operand(
3112             builtin->dilation_height_factor);
3113       }
3114       *nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
3115     } break;
3116     case kTfLiteBuiltinFullyConnected: {
3117       const bool is_bias_present =
3118           mapping_args.node->inputs->size == 3 &&
3119           mapping_args.node->inputs->data[2] != kTfLiteOptionalTensor;
3120       if (!is_bias_present) {
3121         const int input_tensor_id =
3122             mapping_args.node->inputs->data[/*kInputTensor*/ 0];
3123         const int filter_tensor_id =
3124             mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3125         const int num_units =
3126             mapping_args.context->tensors[filter_tensor_id].dims->data[0];
3127         add_zero_bias(input_tensor_id, filter_tensor_id, num_units);
3128       }
3129       auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
3130           mapping_args.node->builtin_data);
3131       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3132       *nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
3133     } break;
3134     case kTfLiteBuiltinHardSwish: {
3135       *nn_op_type = ANEURALNETWORKS_HARD_SWISH;
3136     } break;
3137     case kTfLiteBuiltinSoftmax: {
3138       auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(
3139           mapping_args.node->builtin_data);
3140       mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3141       // Optional scalar specifying the dimension the activation would be
3142       // performed on is not added. Default to -1.
3143       *nn_op_type = ANEURALNETWORKS_SOFTMAX;
3144     } break;
3145     case kTfLiteBuiltinReshape: {
3146       if (mapping_args.node->inputs->size == 1) {
3147         // if no new_shape tensor, construct the new shape from params.
3148         auto* params = reinterpret_cast<TfLiteReshapeParams*>(
3149             mapping_args.node->builtin_data);
3150         int num_dimensions = params->num_dimensions;
3151         std::vector<int32_t> output_shape(num_dimensions);
3152         for (int i = 0; i < num_dimensions; ++i) {
3153           output_shape[i] = params->shape[i];
3154         }
3155         mapping_args.builder->AddVectorInt32Operand(
3156             output_shape.data(), static_cast<uint32_t>(num_dimensions));
3157       }
3158       *nn_op_type = ANEURALNETWORKS_RESHAPE;
3159     } break;
3160     case kTfLiteBuiltinResizeBilinear: {
3161       const int output_id = mapping_args.node->outputs->data[0];
3162       auto& output = mapping_args.context->tensors[output_id];
3163       const int output_height = output.dims->data[1];
3164       const int output_width = output.dims->data[2];
3165       mapping_args.builder->AddScalarInt32Operand(output_width);
3166       mapping_args.builder->AddScalarInt32Operand(output_height);
3167       auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
3168           mapping_args.node->builtin_data);
3169       if (builtin->align_corners == true ||
3170           builtin->half_pixel_centers == true) {
3171         mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
3172         mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3173         mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3174       }
3175       *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
3176     } break;
3177     case kTfLiteBuiltinResizeNearestNeighbor: {
3178       const TfLiteTensor& new_shape =
3179           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3180       // NNAPI uses scalar inputs for height and width.
3181       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
3182       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
3183       mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
3184       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
3185           mapping_args.node->builtin_data);
3186       if (builtin->align_corners == true ||
3187           builtin->half_pixel_centers == true) {
3188         mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
3189         mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
3190       }
3191       *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
3192     } break;
3193     case kTfLiteBuiltinSqueeze: {
3194       auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
3195           mapping_args.node->builtin_data);
3196       // Note that we add the squeeze dimensions even if the dimensions
3197       // were unspecified (empty), as NNAPI requires the operand.
3198       mapping_args.builder->AddVectorInt32Operand(
3199           builtin->num_squeeze_dims ? builtin->squeeze_dims : nullptr,
3200           static_cast<uint32_t>(builtin->num_squeeze_dims));
3201       *nn_op_type = ANEURALNETWORKS_SQUEEZE;
3202     } break;
3203     case kTfLiteBuiltinUnidirectionalSequenceLstm: {
3204       auto builtin = reinterpret_cast<TfLiteUnidirectionalSequenceLSTMParams*>(
3205           mapping_args.node->builtin_data);
3206       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3207       mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3208       mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3209       mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3210       const bool hybrid_op = IsHybridOperator(
3211           mapping_args.context, kTfLiteBuiltinUnidirectionalSequenceLstm,
3212           mapping_args.node);
3213       if (mapping_args.node->inputs->size == 24) {
3214         // Add layer normalization tensors if they are provided.
3215         for (int i = 20; i < 24; ++i) {
3216           const int input_index = mapping_args.node->inputs->data[i];
3217           if (input_index != kTfLiteOptionalTensor) {
3218             mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3219           } else {
3220             mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3221           }
3222         }
3223       } else {
3224         for (int i = 0; i < 4; ++i) {
3225           mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3226         }
3227       }
3228 
3229       *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_LSTM;
3230     } break;
3231     case kTfLiteBuiltinL2Normalization: {
3232       *nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
3233     } break;
3234     case kTfLiteBuiltinLocalResponseNormalization: {
3235       auto builtin = reinterpret_cast<TfLiteLocalResponseNormParams*>(
3236           mapping_args.node->builtin_data);
3237       mapping_args.builder->AddScalarInt32Operand(builtin->radius);
3238       mapping_args.builder->AddScalarFloat32Operand(builtin->bias);
3239       mapping_args.builder->AddScalarFloat32Operand(builtin->alpha);
3240       mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
3241       *nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
3242     } break;
3243     case kTfLiteBuiltinLshProjection: {
3244       auto builtin = reinterpret_cast<TfLiteLSHProjectionParams*>(
3245           mapping_args.node->builtin_data);
3246       int type = builtin->type;
3247       // In Android Q+, NNAPI uses 3 to denote
3248       // kTfLiteLshProjectionSparse.
3249       const int kNNAPILshProjectionSparse = 3;
3250       if (builtin->type == kTfLiteLshProjectionSparse) {
3251         type = kNNAPILshProjectionSparse;
3252         // Add NNAPI null weight operand.
3253         mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3254       }
3255       mapping_args.builder->AddScalarInt32Operand(type);
3256       *nn_op_type = ANEURALNETWORKS_LSH_PROJECTION;
3257     } break;
3258     case kTfLiteBuiltinConcatenation: {
3259       auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(
3260           mapping_args.node->builtin_data);
3261       int axis = builtin->axis < 0
3262                      ? mapping_args.context
3263                                ->tensors[mapping_args.node->inputs->data[0]]
3264                                .dims->size +
3265                            builtin->axis
3266                      : builtin->axis;
3267       mapping_args.builder->AddScalarInt32Operand(axis);
3268       *nn_op_type = ANEURALNETWORKS_CONCATENATION;
3269     } break;
3270     case kTfLiteBuiltinDequantize: {
3271       *nn_op_type = ANEURALNETWORKS_DEQUANTIZE;
3272     } break;
3273     case kTfLiteBuiltinFloor: {
3274       *nn_op_type = ANEURALNETWORKS_FLOOR;
3275     } break;
3276     case kTfLiteBuiltinRelu: {
3277       *nn_op_type = ANEURALNETWORKS_RELU;
3278     } break;
3279     case kTfLiteBuiltinReluN1To1: {
3280       *nn_op_type = ANEURALNETWORKS_RELU1;
3281     } break;
3282     case kTfLiteBuiltinRelu6: {
3283       *nn_op_type = ANEURALNETWORKS_RELU6;
3284     } break;
3285     case kTfLiteBuiltinLogistic: {
3286       *nn_op_type = ANEURALNETWORKS_LOGISTIC;
3287     } break;
3288     case kTfLiteBuiltinTanh: {
3289       *nn_op_type = ANEURALNETWORKS_TANH;
3290     } break;
3291     case kTfLiteBuiltinSub: {
3292       auto builtin =
3293           reinterpret_cast<TfLiteSubParams*>(mapping_args.node->builtin_data);
3294       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3295       *nn_op_type = ANEURALNETWORKS_SUB;
3296     } break;
3297     case kTfLiteBuiltinDiv: {
3298       auto builtin =
3299           reinterpret_cast<TfLiteDivParams*>(mapping_args.node->builtin_data);
3300       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3301       *nn_op_type = ANEURALNETWORKS_DIV;
3302     } break;
3303     case kTfLiteBuiltinPad:
3304     case kTfLiteBuiltinPadv2: {
3305       // We want to map to PAD as much as possible since it is more widely
3306       // supported. We map to PadV2 only when there is the need to specify
3307       // the padding value
3308       if (mapping_args.node->inputs->size == 2) {
3309         *nn_op_type = ANEURALNETWORKS_PAD;
3310       } else {
3311         const int constant_value_id = mapping_args.node->inputs->data[2];
3312         if (constant_value_id == kTfLiteOptionalTensor) {
3313           *nn_op_type = ANEURALNETWORKS_PAD;
3314         } else {
3315           *nn_op_type = ANEURALNETWORKS_PAD_V2;
3316         }
3317       }
3318     } break;
3319     case kTfLiteBuiltinUnidirectionalSequenceRnn: {
3320       auto builtin = reinterpret_cast<TfLiteSequenceRNNParams*>(
3321           mapping_args.node->builtin_data);
3322       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3323       mapping_args.builder->AddScalarInt32Operand(builtin->time_major);
3324       *nn_op_type = ANEURALNETWORKS_UNIDIRECTIONAL_SEQUENCE_RNN;
3325     } break;
3326     case kTfLiteBuiltinSpaceToBatchNd: {
3327       *nn_op_type = ANEURALNETWORKS_SPACE_TO_BATCH_ND;
3328     } break;
3329     case kTfLiteBuiltinBatchToSpaceNd: {
3330       *nn_op_type = ANEURALNETWORKS_BATCH_TO_SPACE_ND;
3331     } break;
3332     case kTfLiteBuiltinStridedSlice: {
3333       auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
3334           mapping_args.node->builtin_data);
3335       mapping_args.builder->AddScalarInt32Operand(builtin->begin_mask);
3336       mapping_args.builder->AddScalarInt32Operand(builtin->end_mask);
3337       mapping_args.builder->AddScalarInt32Operand(builtin->shrink_axis_mask);
3338       *nn_op_type = ANEURALNETWORKS_STRIDED_SLICE;
3339     } break;
3340     case kTfLiteBuiltinTranspose: {
3341       *nn_op_type = ANEURALNETWORKS_TRANSPOSE;
3342     } break;
3343     case kTfLiteBuiltinAbs: {
3344       *nn_op_type = ANEURALNETWORKS_ABS;
3345     } break;
3346     case kTfLiteBuiltinExp: {
3347       *nn_op_type = ANEURALNETWORKS_EXP;
3348     } break;
3349     case kTfLiteBuiltinLog: {
3350       *nn_op_type = ANEURALNETWORKS_LOG;
3351     } break;
3352     case kTfLiteBuiltinRsqrt: {
3353       *nn_op_type = ANEURALNETWORKS_RSQRT;
3354     } break;
3355     case kTfLiteBuiltinPow: {
3356       *nn_op_type = ANEURALNETWORKS_POW;
3357     } break;
3358     case kTfLiteBuiltinSlice: {
3359       *nn_op_type = ANEURALNETWORKS_SLICE;
3360     } break;
3361     case kTfLiteBuiltinSin: {
3362       *nn_op_type = ANEURALNETWORKS_SIN;
3363     } break;
3364     case kTfLiteBuiltinTransposeConv: {
3365       int input_tensor_flags = 0;
3366       const int input_tensor_id =
3367           mapping_args.node->inputs->data[/*kDataInputTensor*/ 2];
3368       const int weight_tensor_id =
3369           mapping_args.node->inputs->data[/*kWeightsTensor*/ 1];
3370 
3371       // Transpose convolution doesn't have hybrid variation.
3372       const bool hybrid_op = false;
3373 
3374       if (android_sdk_version >= kMinSdkVersionForNNAPI13) {
3375         mapping_args.builder->AddTensorInput(
3376             input_tensor_id, hybrid_op,
3377             input_tensor_flags | NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED);
3378 
3379       } else {
3380         mapping_args.builder->AddTensorInput(
3381             input_tensor_id, hybrid_op,
3382             input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION);
3383       }
3384       // Transpose convlution uses per-channel quantization with int8 inputs
3385       // even if the number of channels in quantization parameters is equal to 1
3386       // (as opposed to conv2d, which uses per-tensor quantization in this
3387       // case).
3388       mapping_args.builder->AddTensorInput(
3389           weight_tensor_id, hybrid_op,
3390           input_tensor_flags | NN_TENSOR_FLAG_FORCE_PER_CHANNEL);
3391 
3392       const bool is_bias_present =
3393           mapping_args.node->inputs->size == 4 &&
3394           mapping_args.node->inputs->data[/*kBiasTensor*/ 3] !=
3395               kTfLiteOptionalTensor;
3396 
3397       if (is_bias_present) {
3398         mapping_args.builder->AddTensorInput(
3399             mapping_args.node->inputs->data[/*kBiasTensor*/ 3], hybrid_op);
3400       } else {
3401         const TfLiteTensor& output_shape =
3402             mapping_args.context->tensors[mapping_args.node->inputs
3403                                               ->data[/*kOutputShapeTensor*/ 0]];
3404         const int output_depth = output_shape.data.i32[3];
3405         add_zero_bias(input_tensor_id, weight_tensor_id, output_depth);
3406       }
3407       mapping_args.builder->AddTensorInput(
3408           mapping_args.node->inputs->data[/*kOutputShapeTensor*/ 0], hybrid_op);
3409 
3410       auto builtin = reinterpret_cast<TfLiteTransposeConvParams*>(
3411           mapping_args.node->builtin_data);
3412       mapping_args.builder->AddScalarInt32Operand(builtin->padding);
3413       mapping_args.builder->AddScalarInt32Operand(builtin->stride_width);
3414       mapping_args.builder->AddScalarInt32Operand(builtin->stride_height);
3415       mapping_args.builder->AddScalarInt32Operand(
3416           /*ANEURALNETWORKS_FUSED_NONE*/ 0);
3417       // Use NHWC layout for input and output.
3418       mapping_args.builder->AddScalarBoolOperand(false);
3419       *nn_op_type = ANEURALNETWORKS_TRANSPOSE_CONV;
3420     } break;
3421     case kTfLiteBuiltinSqrt: {
3422       *nn_op_type = ANEURALNETWORKS_SQRT;
3423     } break;
3424     case kTfLiteBuiltinRnn: {
3425       // NNAPI need both state_in and state_out.
3426       int ann_index;
3427       mapping_args.builder->AddStateFloat32Tensor(
3428           mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4],
3429           &ann_index);
3430       mapping_args.model_state_outputs->push_back(ann_index);
3431       mapping_args.model_state_tfl_inputs->push_back(
3432           mapping_args.node->inputs->data[/*kHiddenStateTensor*/ 4]);
3433       auto builtin =
3434           reinterpret_cast<TfLiteRNNParams*>(mapping_args.node->builtin_data);
3435       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3436       *nn_op_type = ANEURALNETWORKS_RNN;
3437     } break;
3438     case kTfLiteBuiltinSpaceToDepth: {
3439       auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(
3440           mapping_args.node->builtin_data);
3441       mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3442       *nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
3443     } break;
3444     case kTfLiteBuiltinSvdf: {
3445       // NNAPI need both state_in and state_out.
3446       int ann_index;
3447       mapping_args.builder->AddStateFloat32Tensor(
3448           mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4],
3449           &ann_index);
3450       mapping_args.model_state_outputs->push_back(ann_index);
3451       mapping_args.model_state_tfl_inputs->push_back(
3452           mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 4]);
3453 
3454       auto builtin =
3455           reinterpret_cast<TfLiteSVDFParams*>(mapping_args.node->builtin_data);
3456       mapping_args.builder->AddScalarInt32Operand(builtin->rank);
3457       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3458       *nn_op_type = ANEURALNETWORKS_SVDF;
3459     } break;
3460     case kTfLiteBuiltinLstm: {
3461       if (isLstmBasicKernel(mapping_args.node)) {
3462         const auto output_dims =
3463             mapping_args.context->tensors[mapping_args.node->outputs->data[1]]
3464                 .dims;
3465 
3466         // Inputs kInputData
3467         mapping_args.builder->AddTensorInput(
3468             mapping_args.node->inputs->data[0 /* kInputData */],
3469             /* hybrid_op */ false,
3470             /* scalar_as_tensor */ false);
3471 
3472         // The 8 weights tensors are set decomposing the
3473         // kInputWeights param
3474         const auto weight_tensor =
3475             mapping_args.context->tensors[mapping_args.node->inputs
3476                                               ->data[2 /* kInputWeights */]];
3477 
3478         std::vector<uint8_t> recurrent_to_input;
3479         std::vector<uint8_t> input_to_input;
3480         std::vector<uint8_t> recurrent_to_cell;
3481         std::vector<uint8_t> input_to_cell;
3482         std::vector<uint8_t> recurrent_to_forget;
3483         std::vector<uint8_t> input_to_forget;
3484         std::vector<uint8_t> recurrent_to_output;
3485         std::vector<uint8_t> input_to_output;
3486         tflite::delegate::nnapi::DecomposeQuantLstmWeightsTensor(
3487             weight_tensor.data.uint8, weight_tensor.dims, &recurrent_to_input,
3488             &input_to_input, &recurrent_to_cell, &input_to_cell,
3489             &recurrent_to_forget, &input_to_forget, &recurrent_to_output,
3490             &input_to_output);
3491 
3492         TfLiteIntArray* recurrent_weight_dims = TfLiteIntArrayCreate(2);
3493         TfLiteIntArray* input_weight_dims = TfLiteIntArrayCreate(2);
3494         tflite::delegate::nnapi::SetWeightSubmatrixDims(
3495             weight_tensor.dims, recurrent_weight_dims, input_weight_dims);
3496 
3497         int new_tensor_index = -1;
3498 
3499         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3500             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3501             input_weight_dims, input_to_input, weight_tensor.params,
3502             &new_tensor_index);
3503 
3504         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3505             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3506             input_weight_dims, input_to_forget, weight_tensor.params,
3507             &new_tensor_index);
3508 
3509         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3510             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3511             input_weight_dims, input_to_cell, weight_tensor.params,
3512             &new_tensor_index);
3513 
3514         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3515             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3516             input_weight_dims, input_to_output, weight_tensor.params,
3517             &new_tensor_index);
3518 
3519         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3520             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3521             recurrent_weight_dims, recurrent_to_input, weight_tensor.params,
3522             &new_tensor_index);
3523 
3524         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3525             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3526             recurrent_weight_dims, recurrent_to_forget, weight_tensor.params,
3527             &new_tensor_index);
3528 
3529         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3530             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3531             recurrent_weight_dims, recurrent_to_cell, weight_tensor.params,
3532             &new_tensor_index);
3533 
3534         mapping_args.builder->AddNewInputConstantTensor<uint8_t>(
3535             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3536             recurrent_weight_dims, recurrent_to_output, weight_tensor.params,
3537             &new_tensor_index);
3538 
3539         TfLiteIntArrayFree(input_weight_dims);
3540         TfLiteIntArrayFree(recurrent_weight_dims);
3541 
3542         // Biases have to be split in four.
3543         const auto bias_size = output_dims->data[1];
3544         const TfLiteTensor& biases_tensor =
3545             mapping_args.context->tensors[mapping_args.node->inputs
3546                                               ->data[3 /* kInputBiases */]];
3547 
3548         std::vector<int32_t> input_bias;
3549         std::vector<int32_t> cell_bias;
3550         std::vector<int32_t> forget_bias;
3551         std::vector<int32_t> output_bias;
3552         delegate::nnapi::DecomposeBiasTensor(biases_tensor.data.i32, bias_size,
3553                                              &input_bias, &cell_bias,
3554                                              &forget_bias, &output_bias);
3555 
3556         int input_bias_tensor = -1;
3557         mapping_args.builder->AddNewInputConstantTensor<int32_t>(
3558             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, input_bias,
3559             biases_tensor.params, &input_bias_tensor);
3560         int forget_bias_tensor = -1;
3561         mapping_args.builder->AddNewInputConstantTensor(
3562             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3563             forget_bias, biases_tensor.params, &forget_bias_tensor);
3564         int cell_gate_bias_tensor = -1;
3565         mapping_args.builder->AddNewInputConstantTensor(
3566             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size}, cell_bias,
3567             biases_tensor.params, &cell_gate_bias_tensor);
3568         int output_gate_bias_tensor = -1;
3569         mapping_args.builder->AddNewInputConstantTensor(
3570             ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, {bias_size},
3571             output_bias, biases_tensor.params, &output_gate_bias_tensor);
3572 
3573         mapping_args.builder->AddTensorInput(
3574             mapping_args.node->inputs->data[4 /* kInputPrevState */],
3575             /* hybrid_op */ false,
3576             /* scalar_as_tensor */ false);
3577 
3578         // kInputPrevActivation
3579         mapping_args.builder->AddTensorInput(
3580             mapping_args.node->inputs->data[1 /* kInputPrevActivation */],
3581             /* hybrid_op */ false,
3582             /* scalar_as_tensor */ false);
3583 
3584         // Configuring the copy from the activation, state outputs
3585         // to their associated inputs
3586         mapping_args.feedback_loops->push_back(std::make_tuple(
3587             mapping_args.node->outputs->data[0 /*kOutputActivation*/],
3588             mapping_args.node->inputs->data[1 /*kInputPrevActivation*/]));
3589 
3590         mapping_args.feedback_loops->push_back(std::make_tuple(
3591             mapping_args.node->outputs->data[1 /*kOutputState*/],
3592             mapping_args.node->inputs->data[4 /*kInputPrevState*/]));
3593 
3594         // OUTPUTS
3595         // Setting only the first two since the remaining ones are
3596         // ignored by NNAPI
3597         mapping_args.builder->AddTensorOutput(
3598             mapping_args.node->outputs->data[1 /* kOutputState */], 0);
3599 
3600         mapping_args.builder->AddTensorOutput(
3601             mapping_args.node->outputs->data[0 /* kOutputActivation */], 0);
3602 
3603         *nn_op_type = ANEURALNETWORKS_QUANTIZED_16BIT_LSTM;
3604       } else {
3605         auto builtin = reinterpret_cast<TfLiteLSTMParams*>(
3606             mapping_args.node->builtin_data);
3607         mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3608         mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3609         mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3610 
3611         // Current NNAPI implementation requires the scratch_buffer as
3612         // output.
3613         mapping_args.builder->AddAdditionalFloat32OutputTensor(2);
3614 
3615         // NNAPI need both state_in and state_out for cell_state and
3616         // output_state.
3617         int ann_index;
3618         mapping_args.builder->AddStateFloat32Tensor(
3619             mapping_args.node->inputs->data[/*kInputActivationStateTensor*/ 18],
3620             &ann_index);
3621         mapping_args.model_state_outputs->push_back(ann_index);
3622         mapping_args.model_state_tfl_inputs->push_back(
3623             mapping_args.node->inputs
3624                 ->data[/*kInputActivationStateTensor*/ 18]);
3625         mapping_args.builder->AddStateFloat32Tensor(
3626             mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19],
3627             &ann_index);
3628         mapping_args.model_state_outputs->push_back(ann_index);
3629         mapping_args.model_state_tfl_inputs->push_back(
3630             mapping_args.node->inputs->data[/*kInputCellStateTensor*/ 19]);
3631 
3632         const bool hybrid_op = IsHybridOperator(
3633             mapping_args.context, kTfLiteBuiltinLstm, mapping_args.node);
3634 
3635         if (mapping_args.node->inputs->size == 24) {
3636           for (int i = 20; i < 24; ++i) {
3637             const auto input_index = mapping_args.node->inputs->data[i];
3638             if (input_index != kTfLiteOptionalTensor) {
3639               mapping_args.builder->AddTensorInput(input_index, hybrid_op);
3640             } else {
3641               mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3642             }
3643           }
3644         }
3645 
3646         *nn_op_type = ANEURALNETWORKS_LSTM;
3647       }
3648     } break;
3649     case kTfLiteBuiltinMean: {
3650       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3651           mapping_args.node->builtin_data);
3652       int32_t keep_dims = 0;
3653       if (builtin->keep_dims) keep_dims = 1;
3654       mapping_args.builder->AddScalarInt32Operand(keep_dims);
3655       *nn_op_type = ANEURALNETWORKS_MEAN;
3656     } break;
3657     case kTfLiteBuiltinEmbeddingLookup: {
3658       *nn_op_type = ANEURALNETWORKS_EMBEDDING_LOOKUP;
3659     } break;
3660     case kTfLiteBuiltinHashtableLookup: {
3661       *nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
3662     } break;
3663     case kTfLiteBuiltinMaximum: {
3664       *nn_op_type = ANEURALNETWORKS_MAXIMUM;
3665     } break;
3666     case kTfLiteBuiltinMinimum: {
3667       *nn_op_type = ANEURALNETWORKS_MINIMUM;
3668     } break;
3669     case kTfLiteBuiltinCast: {
3670       *nn_op_type = ANEURALNETWORKS_CAST;
3671     } break;
3672     case kTfLiteBuiltinLeakyRelu: {
3673       const auto input_type =
3674           mapping_args.context->tensors[mapping_args.node->inputs->data[0]]
3675               .type;
3676       auto builtin = reinterpret_cast<TfLiteLeakyReluParams*>(
3677           mapping_args.node->builtin_data);
3678 
3679       TfLiteTensor alpha_tensor;
3680       alpha_tensor.type = input_type;
3681       alpha_tensor.allocation_type = kTfLiteDynamic;
3682       alpha_tensor.dims = TfLiteIntArrayCreate(1);
3683       alpha_tensor.dims->data[0] = 1;
3684       alpha_tensor.params.zero_point = 0;
3685 
3686       int new_tensor_index = -1;
3687       if (input_type == kTfLiteFloat32) {
3688         alpha_tensor.params.scale = 0;
3689         std::vector<float> alpha_value = {builtin->alpha};
3690         mapping_args.builder->AddNewInputConstantTensor(
3691             ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, alpha_tensor.dims,
3692             alpha_value, alpha_tensor.params, &new_tensor_index);
3693       } else if (input_type == kTfLiteInt8 &&
3694                  android_sdk_version >= kMinSdkVersionForNNAPI13) {
3695         alpha_tensor.params.scale = builtin->alpha;
3696         std::vector<int8_t> alpha_value = {1};
3697         mapping_args.builder->AddNewInputConstantTensor(
3698             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
3699             alpha_tensor.dims, alpha_value, alpha_tensor.params,
3700             &new_tensor_index);
3701       } else {
3702         alpha_tensor.params.scale = builtin->alpha;
3703         std::vector<uint8_t> alpha_value = {1};
3704         mapping_args.builder->AddNewInputConstantTensor(
3705             ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, kTfLiteUInt8,
3706             alpha_tensor.dims, alpha_value, alpha_tensor.params,
3707             &new_tensor_index);
3708       }
3709 
3710       *nn_op_type = ANEURALNETWORKS_PRELU;
3711     } break;
3712     case kTfLiteBuiltinPrelu: {
3713       *nn_op_type = ANEURALNETWORKS_PRELU;
3714     } break;
3715     case kTfLiteBuiltinTile: {
3716       *nn_op_type = ANEURALNETWORKS_TILE;
3717     } break;
3718     case kTfLiteBuiltinLogicalOr: {
3719       *nn_op_type = ANEURALNETWORKS_LOGICAL_OR;
3720     } break;
3721     case kTfLiteBuiltinLogicalAnd: {
3722       *nn_op_type = ANEURALNETWORKS_LOGICAL_AND;
3723     } break;
3724     case kTfLiteBuiltinLogicalNot: {
3725       *nn_op_type = ANEURALNETWORKS_LOGICAL_NOT;
3726     } break;
3727     case kTfLiteBuiltinLess: {
3728       *nn_op_type = ANEURALNETWORKS_LESS;
3729     } break;
3730     case kTfLiteBuiltinLessEqual: {
3731       *nn_op_type = ANEURALNETWORKS_LESS_EQUAL;
3732     } break;
3733     case kTfLiteBuiltinGreater: {
3734       *nn_op_type = ANEURALNETWORKS_GREATER;
3735     } break;
3736     case kTfLiteBuiltinGreaterEqual: {
3737       *nn_op_type = ANEURALNETWORKS_GREATER_EQUAL;
3738     } break;
3739     case kTfLiteBuiltinEqual: {
3740       *nn_op_type = ANEURALNETWORKS_EQUAL;
3741     } break;
3742     case kTfLiteBuiltinNotEqual: {
3743       *nn_op_type = ANEURALNETWORKS_NOT_EQUAL;
3744     } break;
3745     case kTfLiteBuiltinNeg: {
3746       *nn_op_type = ANEURALNETWORKS_NEG;
3747     } break;
3748     case kTfLiteBuiltinTopkV2: {
3749       const TfLiteTensor& k_param =
3750           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3751       mapping_args.builder->AddScalarInt32Operand(*k_param.data.i32);
3752       *nn_op_type = ANEURALNETWORKS_TOPK_V2;
3753     } break;
3754     case kTfLiteBuiltinSelect: {
3755       *nn_op_type = ANEURALNETWORKS_SELECT;
3756     } break;
3757     case kTfLiteBuiltinGather: {
3758       auto builtin = reinterpret_cast<TfLiteGatherParams*>(
3759           mapping_args.node->builtin_data);
3760       mapping_args.builder->AddScalarInt32Operand(builtin->axis);
3761       mapping_args.builder->AddTensorInput(mapping_args.node->inputs->data[1],
3762                                            /* hybrid_op */ false,
3763                                            /* tensor_flags */ 0);
3764       *nn_op_type = ANEURALNETWORKS_GATHER;
3765     } break;
3766     case kTfLiteBuiltinBidirectionalSequenceLstm: {
3767       auto builtin = reinterpret_cast<TfLiteBidirectionalSequenceLSTMParams*>(
3768           mapping_args.node->builtin_data);
3769       mapping_args.builder->AddScalarInt32Operand(builtin->activation);
3770       mapping_args.builder->AddScalarFloat32Operand(builtin->cell_clip);
3771       mapping_args.builder->AddScalarFloat32Operand(builtin->proj_clip);
3772       mapping_args.builder->AddScalarBoolOperand(builtin->merge_outputs);
3773       mapping_args.builder->AddScalarBoolOperand(builtin->time_major);
3774       // TF Lite doesn't support layer normalization in bidirectional
3775       // sequence LSTM, so we insert optional tensors for NNAPI.
3776       for (int i = 0; i < 8; ++i) {
3777         mapping_args.builder->AddVectorFloat32Operand(nullptr, 0);
3778       }
3779       *nn_op_type = ANEURALNETWORKS_BIDIRECTIONAL_SEQUENCE_LSTM;
3780     } break;
3781     case kTfLiteBuiltinExpandDims: {
3782       const TfLiteTensor& axis_param =
3783           mapping_args.context->tensors[mapping_args.node->inputs->data[1]];
3784       mapping_args.builder->AddScalarInt32Operand(*axis_param.data.i32);
3785       *nn_op_type = ANEURALNETWORKS_EXPAND_DIMS;
3786     } break;
3787     case kTfLiteBuiltinSplit: {
3788       const TfLiteTensor& axis =
3789           mapping_args.context->tensors[mapping_args.node->inputs->data[0]];
3790       auto builtin =
3791           reinterpret_cast<TfLiteSplitParams*>(mapping_args.node->builtin_data);
3792       mapping_args.builder->AddScalarInt32Operand(*axis.data.i32);
3793       mapping_args.builder->AddScalarInt32Operand(builtin->num_splits);
3794       *nn_op_type = ANEURALNETWORKS_SPLIT;
3795     } break;
3796     case kTfLiteBuiltinLogSoftmax: {
3797       // Scaling and axis are hardcoded to respectively 1 and -1
3798       // in TFLite.
3799       mapping_args.builder->AddScalarFloat32Operand(1);
3800       mapping_args.builder->AddScalarInt32Operand(-1);
3801       *nn_op_type = ANEURALNETWORKS_LOG_SOFTMAX;
3802     } break;
3803     case kTfLiteBuiltinQuantize: {
3804       auto input_index = mapping_args.node->inputs->data[0];
3805       // NNAPI doesn't support requantization cases but only quantizations
3806       // from float. Dequantizing our input adding a Dequantize node before
3807       // this one.
3808       if (IsQuantized(mapping_args.context->tensors[input_index].type)) {
3809         mapping_args.builder->AddDequantize(0, input_index, kTfLiteFloat32,
3810                                             mapping_args.node_index);
3811       }
3812 
3813       *nn_op_type = ANEURALNETWORKS_QUANTIZE;
3814     } break;
3815     case kTfLiteBuiltinReduceAny: {
3816       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3817           mapping_args.node->builtin_data);
3818       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3819       *nn_op_type = ANEURALNETWORKS_REDUCE_ANY;
3820     } break;
3821     case kTfLiteBuiltinReduceMin: {
3822       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3823           mapping_args.node->builtin_data);
3824       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3825       *nn_op_type = ANEURALNETWORKS_REDUCE_MIN;
3826     } break;
3827     case kTfLiteBuiltinReduceMax: {
3828       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3829           mapping_args.node->builtin_data);
3830       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3831       *nn_op_type = ANEURALNETWORKS_REDUCE_MAX;
3832     } break;
3833     case kTfLiteBuiltinDepthToSpace: {
3834       auto builtin = reinterpret_cast<TfLiteDepthToSpaceParams*>(
3835           mapping_args.node->builtin_data);
3836       mapping_args.builder->AddScalarInt32Operand(builtin->block_size);
3837       *nn_op_type = ANEURALNETWORKS_DEPTH_TO_SPACE;
3838     } break;
3839     case kTfLiteBuiltinReduceProd: {
3840       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3841           mapping_args.node->builtin_data);
3842       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3843       *nn_op_type = ANEURALNETWORKS_REDUCE_PROD;
3844     } break;
3845     case kTfLiteBuiltinSum: {
3846       auto builtin = reinterpret_cast<TfLiteReducerParams*>(
3847           mapping_args.node->builtin_data);
3848       mapping_args.builder->AddScalarBoolOperand(builtin->keep_dims);
3849       *nn_op_type = ANEURALNETWORKS_REDUCE_SUM;
3850     } break;
3851     case kTfLiteBuiltinElu: {
3852       mapping_args.builder->AddScalarFloat32Operand(1.0);
3853       *nn_op_type = ANEURALNETWORKS_ELU;
3854     } break;
3855     case kTfLiteBuiltinFill: {
3856       *nn_op_type = ANEURALNETWORKS_FILL;
3857     } break;
3858     default:
3859       // All other operators are not mapped.
3860       return kTfLiteError;
3861   }
3862   return kTfLiteOk;
3863 }
3864 
3865 // Initialize the kernel (a NN model).
Init(TfLiteContext * context,const TfLiteDelegateParams * params,int * nnapi_errno)3866 TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
3867                                        const TfLiteDelegateParams* params,
3868                                        int* nnapi_errno) {
3869   for (auto node_index : TfLiteIntArrayView(params->nodes_to_replace)) {
3870     nodes_.push_back(node_index);
3871   }
3872 
3873   // Initialize densify map and dequantize map.
3874   densify_output_to_node_mapping_ = std::vector<int>(context->tensors_size, -1);
3875   non_const_dequantize_output_to_node_mapping_ =
3876       std::vector<int>(context->tensors_size, -1);
3877   const auto delegate_options =
3878       StatefulNnApiDelegate::GetOptions(params->delegate);
3879   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
3880       ShouldUseTargetDevices(delegate_options, nnapi_)) {
3881     TF_LITE_ENSURE_STATUS(GetTargetDevices(context, params->delegate, nnapi_,
3882                                            nnapi_errno, &nnapi_devices_));
3883 
3884     if (nnapi_devices_.empty()) {
3885       context->ReportError(
3886           context, "NNAPI delegate requested but no accelerators available.");
3887       return kTfLiteError;
3888     }
3889   }
3890 
3891   // Mark the handle backed tensors.
3892   tensor_memory_map_ =
3893       &StatefulNnApiDelegate::GetTensorMemoryMap(params->delegate);
3894 
3895   if (!nn_model_) {
3896     ANeuralNetworksModel* model = nullptr;
3897     RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3898                                     nnapi_->ANeuralNetworksModel_create(&model),
3899                                     "creating NNAPI model", nnapi_errno);
3900     nn_model_.reset(model);
3901 
3902     TF_LITE_ENSURE_STATUS(BuildGraph(context, delegate_options,
3903                                      params->input_tensors,
3904                                      params->output_tensors, nnapi_errno));
3905   }
3906 
3907   auto* cache = StatefulNnApiDelegate::GetCache(params->delegate);
3908   if (cache) {
3909     // Compilation caching is enabled, construct the uint8 token.
3910     uint64_t token_parts[4];
3911     // model_token is incorporated into parition_key by TFLite Serialization.
3912     // NNAPI uses 256-bit key, but we can just tile the unique 64-bit
3913     // fingerprint from TFLite.
3914     auto partition_entry = cache->GetEntryForKernel(kNnapiId, context, params);
3915     token_parts[0] = partition_entry.GetFingerprint();
3916     token_parts[1] = partition_entry.GetFingerprint();
3917     token_parts[2] = partition_entry.GetFingerprint();
3918     token_parts[3] = partition_entry.GetFingerprint();
3919     // TODO(b/172238515): get token size from header instead of hardcoding.
3920     // Allocate one extra 'null' byte to avoid bugs with backends that might
3921     // be doing strlen() on the token ptr.
3922     std::vector<uint8_t> nnapi_cache_token(33, 0);
3923     // Copy the token bits.
3924     uint8_t* p = reinterpret_cast<uint8_t*>(token_parts);
3925     for (int i = 0; i < 4 * sizeof(uint64_t); i++) {
3926       nnapi_cache_token[i] = p[i];
3927     }
3928 
3929     nn_compilation_cache_token_ = nnapi_cache_token;
3930   }
3931 
3932   initialised_ = true;
3933 
3934   return kTfLiteOk;
3935 }
3936 
Prepare(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)3937 TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
3938                                           TfLiteNode* node, int* nnapi_errno) {
3939   if (!initialised_) {
3940     return kTfLiteError;
3941   }
3942 
3943   const auto delegate_options =
3944       StatefulNnApiDelegate::GetOptions(node->delegate);
3945   if (nn_compilation_) {
3946     return kTfLiteOk;
3947   }
3948 
3949   ANeuralNetworksCompilation* compilation = nullptr;
3950   if (!nnapi_devices_.empty()) {
3951     // Compile for the selected accelerator.
3952     RETURN_TFLITE_ERROR_IF_NN_ERROR(
3953         context,
3954         nnapi_->ANeuralNetworksCompilation_createForDevices(
3955             nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
3956             &compilation),
3957         "creating NNAPI model for given devices", nnapi_errno);
3958   } else {
3959     // Trying to call ANeuralNetworksCompilation_create when the delegate is
3960     // constructed from a support library would result in a crash.
3961     if (nnapi_->ANeuralNetworksCompilation_create != nullptr) {
3962       RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
3963                                       nnapi_->ANeuralNetworksCompilation_create(
3964                                           nn_model_.get(), &compilation),
3965                                       "creating NNAPI compilation",
3966                                       nnapi_errno);
3967     } else {
3968       TF_LITE_KERNEL_LOG(
3969           context,
3970           "Attempted to call ANeuralNetworksCompilation_create from NNAPI "
3971           "delegate that is constructed from a support library");
3972       return kTfLiteError;
3973     }
3974   }
3975 
3976   auto preference = delegate_options.execution_preference;
3977   if (preference !=
3978       StatefulNnApiDelegate::Options::ExecutionPreference::kUndefined) {
3979     const int preference_result =
3980         nnapi_->ANeuralNetworksCompilation_setPreference(compilation,
3981                                                          preference);
3982     if (preference_result != ANEURALNETWORKS_NO_ERROR) {
3983       nnapi_->ANeuralNetworksCompilation_free(compilation);
3984       compilation = nullptr;
3985     }
3986     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
3987                                     "setting compilation preferences",
3988                                     nnapi_errno);
3989   }
3990 
3991   if (!nn_compilation_cache_token_.empty()) {
3992     const char* cache_dir = delegate_options.cache_dir;
3993     const int set_caching_result =
3994         nnapi_->ANeuralNetworksCompilation_setCaching(
3995             compilation, cache_dir, nn_compilation_cache_token_.data());
3996     if (set_caching_result != ANEURALNETWORKS_NO_ERROR) {
3997       nnapi_->ANeuralNetworksCompilation_free(compilation);
3998       compilation = nullptr;
3999     }
4000     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
4001                                     "configuring NNAPI caching", nnapi_errno);
4002   }
4003   // Set compilation timeout if applicable.
4004   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4005     if (delegate_options.max_compilation_timeout_duration_ns > 0) {
4006       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4007           context,
4008           nnapi_->ANeuralNetworksCompilation_setTimeout(
4009               compilation,
4010               delegate_options.max_compilation_timeout_duration_ns),
4011           "setting compilation timeout", nnapi_errno);
4012     }
4013     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4014         context,
4015         nnapi_->ANeuralNetworksCompilation_setPriority(
4016             compilation, delegate_options.execution_priority),
4017         "setting compilation priority", nnapi_errno);
4018   }
4019   const int finish_result =
4020       nnapi_->ANeuralNetworksCompilation_finish(compilation);
4021   if (finish_result != ANEURALNETWORKS_NO_ERROR) {
4022     nnapi_->ANeuralNetworksCompilation_free(compilation);
4023     compilation = nullptr;
4024   }
4025   RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
4026                                   "completing NNAPI compilation", nnapi_errno);
4027   nn_compilation_.reset(compilation);
4028 
4029   bool should_use_burst_mode = delegate_options.use_burst_computation;
4030   // Override should_use_burst_mode to true if the selected NNAPI devices are of
4031   // NNAPI feature level 5 or higher.
4032   if (!nnapi_devices_.empty() &&
4033       target_feature_level_ >= kNNAPIRuntimeFeatureLevel5) {
4034     should_use_burst_mode = true;
4035   }
4036   // Create burst object to be reused across a sequence of executions
4037   if (should_use_burst_mode &&
4038       nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI12 &&
4039       nnapi_->ANeuralNetworksBurst_create) {
4040     ANeuralNetworksBurst* burst = nullptr;
4041     const int create_burst_result =
4042         nnapi_->ANeuralNetworksBurst_create(nn_compilation_.get(), &burst);
4043     if (create_burst_result != ANEURALNETWORKS_NO_ERROR) {
4044       nnapi_->ANeuralNetworksBurst_free(burst);
4045       burst = nullptr;
4046     }
4047     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, create_burst_result,
4048                                     "creating NNAPI burst", nnapi_errno);
4049     nn_burst_.reset(burst);
4050   }
4051 
4052   return kTfLiteOk;
4053 }
4054 
GetOperationsSupportedByTargetNnApiDevices(TfLiteContext * context,std::vector<int> * supported_nodes,int * nnapi_errno)4055 TfLiteStatus NNAPIDelegateKernel::GetOperationsSupportedByTargetNnApiDevices(
4056     TfLiteContext* context, std::vector<int>* supported_nodes,
4057     int* nnapi_errno) {
4058   if (!nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices) {
4059     return kTfLiteError;
4060   }
4061 
4062   const auto nnapi_model_size = nnapi_to_tflite_op_mapping_.size();
4063 
4064   // Determine the list of operations the device actually supports
4065   std::unique_ptr<bool[]> nnapi_ops_support_flags(new bool[nnapi_model_size]);
4066 
4067   RETURN_TFLITE_ERROR_IF_NN_ERROR(
4068       context,
4069       nnapi_->ANeuralNetworksModel_getSupportedOperationsForDevices(
4070           nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
4071           nnapi_ops_support_flags.get()),
4072       "Checking supported operations for devices", nnapi_errno);
4073 
4074   // A TfLite op is supported only if all the associated NNAPI ones are.
4075   auto tflite_ops_support_status = std::map<int, bool>();
4076   std::for_each(nodes_.begin(), nodes_.end(),
4077                 [&tflite_ops_support_status](int tflite_node_index) {
4078                   tflite_ops_support_status[tflite_node_index] = true;
4079                 });
4080   for (int nnapi_op_index = 0; nnapi_op_index < nnapi_model_size;
4081        nnapi_op_index++) {
4082     const auto tflite_op_index = nnapi_to_tflite_op_mapping_[nnapi_op_index];
4083     tflite_ops_support_status[tflite_op_index] &=
4084         nnapi_ops_support_flags[nnapi_op_index];
4085     if (!tflite_ops_support_status[tflite_op_index]) {
4086       if (std::count(non_const_dequantize_output_to_node_mapping_.begin(),
4087                      non_const_dequantize_output_to_node_mapping_.end(), -1) <
4088               non_const_dequantize_output_to_node_mapping_.size() ||
4089           std::count(densify_output_to_node_mapping_.begin(),
4090                      densify_output_to_node_mapping_.end(),
4091                      -1) < densify_output_to_node_mapping_.size()) {
4092         // Only allow full model delegation for sparse model.
4093         return kTfLiteOk;
4094       }
4095     }
4096   }
4097 
4098   supported_nodes->clear();
4099   std::for_each(nodes_.begin(), nodes_.end(),
4100                 [&supported_nodes, &tflite_ops_support_status](int node_index) {
4101                   if (tflite_ops_support_status[node_index]) {
4102                     supported_nodes->push_back(node_index);
4103                   }
4104                 });
4105 
4106   return kTfLiteOk;
4107 }
4108 
Invoke(TfLiteContext * context,TfLiteNode * node,int * nnapi_errno)4109 TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
4110                                          TfLiteNode* node, int* nnapi_errno) {
4111   const bool allow_padding =
4112       nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13 &&
4113       nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding != nullptr;
4114   const auto delegate_options =
4115       StatefulNnApiDelegate::GetOptions(node->delegate);
4116 
4117   // Check for conditions where we need to re-create NN Execution object and
4118   // re-configure the settings and inputs / outputs.
4119   bool should_reset_execution = false;
4120   if (nnapi_->nnapi_runtime_feature_level <= kMinSdkVersionForNNAPI13 ||
4121       delegate_options.allow_dynamic_dimensions) {
4122     // Must reset execution before Android API 31, or using dynamic dimensions.
4123     should_reset_execution = true;
4124   } else {
4125     // For Android API 31+, check for BufferHandle changes and reset the
4126     // execution if any.
4127     std::vector<int> curr_in_tensor_handle_map(context->tensors_size);
4128     for (int i = 0; i < curr_in_tensor_handle_map.size(); i++) {
4129       curr_in_tensor_handle_map[i] = context->tensors[i].buffer_handle;
4130     }
4131     if (!(tensor_handle_map_ == curr_in_tensor_handle_map)) {
4132       should_reset_execution = true;
4133       tensor_handle_map_ = curr_in_tensor_handle_map;
4134     }
4135   }
4136   if (should_reset_execution) {
4137     ANeuralNetworksExecution* execution = nullptr;
4138     RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
4139                                     nnapi_->ANeuralNetworksExecution_create(
4140                                         nn_compilation_.get(), &execution),
4141                                     "creating NNAPI execution", nnapi_errno);
4142     if (nnapi_->nnapi_runtime_feature_level > kMinSdkVersionForNNAPI13) {
4143       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4144           context,
4145           nnapi_->ANeuralNetworksExecution_setReusable(execution,
4146                                                        /*reusable=*/true),
4147           "making execution reusable", nnapi_errno);
4148     }
4149     nn_execution_.reset(execution);
4150 
4151     // Allow padding bytes for execution inputs & outputs if applicable.
4152     if (allow_padding) {
4153       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4154           context,
4155           nnapi_->ANeuralNetworksExecution_enableInputAndOutputPadding(
4156               nn_execution_.get(),
4157               /*enable=*/true),
4158           "setting allow padding for execution intputs and outputs",
4159           nnapi_errno);
4160     }
4161     // Set compilation timeout if applicable.
4162     if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
4163       if (delegate_options.max_execution_timeout_duration_ns > 0) {
4164         RETURN_TFLITE_ERROR_IF_NN_ERROR(
4165             context,
4166             nnapi_->ANeuralNetworksExecution_setTimeout(
4167                 nn_execution_.get(),
4168                 delegate_options.max_execution_timeout_duration_ns),
4169             "setting execution timeout", nnapi_errno);
4170       }
4171       if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
4172         RETURN_TFLITE_ERROR_IF_NN_ERROR(
4173             context,
4174             nnapi_->ANeuralNetworksExecution_setLoopTimeout(
4175                 nn_execution_.get(),
4176                 delegate_options.max_execution_loop_timeout_duration_ns),
4177             "setting execution loop timeout", nnapi_errno);
4178       }
4179     }
4180     // Check if the size of input and output memory pool needs to be resized.
4181     if (delegate_options.allow_dynamic_dimensions) {
4182       size_t total_input_byte_size = 0;
4183       // Make the TensorFlow Lite inputs and outputs to ann_indices.
4184       for (int i : TfLiteIntArrayView(node->inputs)) {
4185         // Constant tensors are not NNAPI inputs.
4186         if (i != kTfLiteOptionalTensor &&
4187             context->tensors[i].allocation_type != kTfLiteMmapRo &&
4188             // The delegate might not have mapped this input (this can
4189             // happen if one tensor is split in several ones)
4190             operand_mapping_.lite_index_to_ann(i) != -1) {
4191           if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4192             continue;
4193           }
4194           const TfLiteType nn_type_conversion =
4195               operand_mapping_.lite_index_to_ann_type_conversion(i);
4196           int tensor_size = 0;
4197           if (nn_type_conversion == kTfLiteNoType) {
4198             tensor_size = context->tensors[i].bytes;
4199           } else {
4200             size_t type_size;
4201             TF_LITE_ENSURE_OK(
4202                 context,
4203                 GetSizeOfType(context, nn_type_conversion, &type_size));
4204             tensor_size = NumElements(&context->tensors[i]) * type_size;
4205           }
4206           total_input_byte_size += tensor_size;
4207           total_input_byte_size += GetNumPaddingBytes(tensor_size);
4208         }
4209       }
4210       if (total_input_byte_size > nn_input_memory_->get_byte_size()) {
4211         nn_input_memory_.reset(
4212             new NNMemory(nnapi_, "input_pool", total_input_byte_size));
4213       }
4214 
4215       size_t total_output_byte_size = 0;
4216       for (int i : TfLiteIntArrayView(node->outputs)) {
4217         if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
4218           continue;
4219         }
4220         total_output_byte_size += context->tensors[i].bytes;
4221         total_output_byte_size += GetNumPaddingBytes(context->tensors[i].bytes);
4222       }
4223       if (total_output_byte_size > nn_output_memory_->get_byte_size()) {
4224         nn_output_memory_.reset(
4225             new NNMemory(nnapi_, "output_pool", total_output_byte_size));
4226       }
4227     }
4228   }
4229   // Set the input tensor buffers. Note: we access tflite tensors using
4230   // absolute indices but NN api indices inputs by relative indices.
4231   int relative_input_index = 0;
4232 
4233   const bool use_int8_asymm_signed =
4234       target_feature_level_ >= kMinSdkVersionForNNAPI13;
4235 
4236   size_t input_offset = 0;
4237   for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
4238     if (absolute_input_index == kTfLiteOptionalTensor) {
4239       continue;
4240     }
4241     ANeuralNetworksOperandType input_nn_operand_type;
4242     ANeuralNetworksOperandType* input_nn_operand_type_ptr = nullptr;
4243     TfLiteTensor* tensor = &context->tensors[absolute_input_index];
4244     TfLiteType ann_type_equivalent =
4245         operand_mapping_.lite_index_to_ann_type_conversion(
4246             absolute_input_index);
4247     if (delegate_options.allow_dynamic_dimensions &&
4248         HasUnspecifiedDimension(tensor)) {
4249       input_nn_operand_type =
4250           ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
4251       input_nn_operand_type_ptr = &input_nn_operand_type;
4252     }
4253     if (tensor->allocation_type != kTfLiteMmapRo) {
4254       if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4255           tensor->buffer_handle < tensor_memory_map_->size()) {
4256         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4257             context,
4258             nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4259                 nn_execution_.get(), relative_input_index,
4260                 input_nn_operand_type_ptr,
4261                 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4262                 tensor->bytes),
4263             "associating NNAPI execution input with a memory object", tensor,
4264             nnapi_errno);
4265         relative_input_index++;
4266         continue;
4267       }
4268       int tensor_size = 0;
4269       int padding_bytes = 0;
4270       if (ann_type_equivalent != kTfLiteNoType) {
4271         const auto num_elements = NumElements(tensor);
4272         uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
4273         if (tensor->type == kTfLiteUInt8 &&
4274             ann_type_equivalent == kTfLiteInt32) {
4275           for (int i = 0; i < num_elements; ++i) {
4276             reinterpret_cast<int32_t*>(input_ptr)[i] =
4277                 static_cast<const int32_t>(tensor->data.uint8[i]);
4278           }
4279         } else if (tensor->type == kTfLiteInt8 &&
4280                    ann_type_equivalent == kTfLiteUInt8) {
4281           // Explicitly convert int8 values to uint8 values.
4282           for (int i = 0; i < num_elements; ++i) {
4283             input_ptr[i] = static_cast<const uint8_t>(
4284                 static_cast<int32_t>(tensor->data.int8[i]) + 128);
4285           }
4286         } else if (tensor->type == kTfLiteInt8 &&
4287                    ann_type_equivalent == kTfLiteInt32) {
4288           if (use_int8_asymm_signed) {
4289             for (int i = 0; i < num_elements; ++i) {
4290               reinterpret_cast<int32_t*>(input_ptr)[i] =
4291                   static_cast<const int32_t>(tensor->data.int8[i]);
4292             }
4293           } else {
4294             for (int i = 0; i < num_elements; ++i) {
4295               reinterpret_cast<int32_t*>(input_ptr)[i] =
4296                   static_cast<const int32_t>(tensor->data.int8[i]) + 128;
4297             }
4298           }
4299         } else if (tensor->type == kTfLiteInt64 &&
4300                    ann_type_equivalent == kTfLiteInt32) {
4301           // Check that values fit into int32.
4302           int32_t* input_ptr_i32 = reinterpret_cast<int32_t*>(input_ptr);
4303           for (int i = 0; i < num_elements; ++i) {
4304             if (input_ptr_i32[i] < std::numeric_limits<int32_t>::min() ||
4305                 input_ptr_i32[i] > std::numeric_limits<int32_t>::max()) {
4306               TF_LITE_KERNEL_LOG(context,
4307                                  "NN API Delegate: int64 value out of bounds "
4308                                  "for int32 target NNAPI tensor\n");
4309               return kTfLiteError;
4310             }
4311             input_ptr_i32[i] = static_cast<int32_t>(tensor->data.i64[i]);
4312           }
4313         } else {
4314           TF_LITE_KERNEL_LOG(
4315               context,
4316               "NN API Delegate: unsupported tensor types conversion: "
4317               "from type code %d to type code %d.\n",
4318               tensor->type, ann_type_equivalent);
4319           return kTfLiteError;
4320         }
4321         size_t type_size;
4322         TF_LITE_ENSURE_OK(
4323             context, GetSizeOfType(context, ann_type_equivalent, &type_size));
4324         tensor_size = NumElements(tensor) * type_size;
4325         padding_bytes = GetNumPaddingBytes(tensor_size);
4326         if (should_reset_execution) {
4327           RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4328               context,
4329               nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4330                   nn_execution_.get(), relative_input_index,
4331                   input_nn_operand_type_ptr, nn_input_memory_->get_handle(),
4332                   input_offset, GetNNTensorSize(tensor_size, allow_padding)),
4333               "associating NNAPI execution input with a memory object", tensor,
4334               nnapi_errno);
4335         }
4336       } else {
4337         // copy data to pre-allocated shared memory.
4338         memcpy(nn_input_memory_->get_data_ptr() + input_offset,
4339                tensor->data.raw, tensor->bytes);
4340         tensor_size = tensor->bytes;
4341         padding_bytes = GetNumPaddingBytes(tensor_size);
4342         if (should_reset_execution) {
4343           RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4344               context,
4345               nnapi_->ANeuralNetworksExecution_setInputFromMemory(
4346                   nn_execution_.get(), relative_input_index,
4347                   input_nn_operand_type_ptr, nn_input_memory_->get_handle(),
4348                   input_offset, GetNNTensorSize(tensor_size, allow_padding)),
4349               "associating NNAPI execution input with a memory object", tensor,
4350               nnapi_errno);
4351         }
4352       }
4353       input_offset += tensor_size + padding_bytes;
4354       relative_input_index++;
4355     }
4356   }
4357 
4358   // Set the output tensor buffers.
4359   int relative_output_index = 0;
4360   size_t output_offset = 0;
4361   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4362     // If the NNAPI implementation doesn't have some of the outputs
4363     // they are left unmapped and we should not try to read their value here
4364     if (operand_mapping_.lite_index_to_ann(output_index) == -1) {
4365       continue;
4366     }
4367     ANeuralNetworksOperandType output_nn_operand_type;
4368     ANeuralNetworksOperandType* output_nn_operand_type_ptr = nullptr;
4369     TfLiteTensor* tensor = &context->tensors[output_index];
4370     if (delegate_options.allow_dynamic_dimensions &&
4371         HasUnspecifiedDimension(tensor)) {
4372       TfLiteType ann_type_equivalent =
4373           operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4374       output_nn_operand_type =
4375           ConvertTensorTypeToNNType(tensor, ann_type_equivalent);
4376       output_nn_operand_type_ptr = &output_nn_operand_type;
4377     }
4378     if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
4379         tensor->buffer_handle < tensor_memory_map_->size() &&
4380         should_reset_execution) {
4381       RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4382           context,
4383           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4384               nn_execution_.get(), relative_output_index,
4385               output_nn_operand_type_ptr,
4386               tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
4387               tensor->bytes),
4388           "associating NNAPI execution output to a memory object", tensor,
4389           nnapi_errno);
4390 
4391     } else {
4392       int padding_bytes = GetNumPaddingBytes(tensor->bytes);
4393       if (should_reset_execution) {
4394         RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
4395             context,
4396             nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4397                 nn_execution_.get(), relative_output_index,
4398                 output_nn_operand_type_ptr, nn_output_memory_->get_handle(),
4399                 output_offset, GetNNTensorSize(tensor->bytes, allow_padding)),
4400             "associating NNAPI execution output to a memory object", tensor,
4401             nnapi_errno);
4402       }
4403       output_offset += tensor->bytes + padding_bytes;
4404     }
4405     relative_output_index++;
4406   }
4407 
4408   // Set memory for NNAPI state_outputs.
4409   for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
4410     int state_tensor_idx = model_state_tfl_inputs_[i];
4411     TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
4412     int padding_bytes = GetNumPaddingBytes(tensor->bytes);
4413     if (should_reset_execution) {
4414       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4415           context,
4416           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
4417               nn_execution_.get(), relative_output_index, nullptr,
4418               nn_output_memory_->get_handle(), output_offset,
4419               GetNNTensorSize(tensor->bytes, allow_padding)),
4420           "associating NNAPI execution state output to a memory object",
4421           nnapi_errno);
4422     }
4423     output_offset += tensor->bytes + padding_bytes;
4424     relative_output_index++;
4425   }
4426 
4427   // Invoke ANN in blocking fashion.
4428   if (nnapi_->android_sdk_version < kMinSdkVersionForNNAPI12) {
4429     ANeuralNetworksEvent* event = nullptr;
4430     RETURN_TFLITE_ERROR_IF_NN_ERROR(
4431         context,
4432         nnapi_->ANeuralNetworksExecution_startCompute(nn_execution_.get(),
4433                                                       &event),
4434         "starting async computation", nnapi_errno);
4435     const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
4436     nnapi_->ANeuralNetworksEvent_free(event);
4437     RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
4438                                     "waiting for async computation completion",
4439                                     nnapi_errno);
4440   } else {
4441     // Use Burst mode by default for NNAPI 1.2+.
4442     if (nn_burst_) {
4443       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4444           context,
4445           nnapi_->ANeuralNetworksExecution_burstCompute(nn_execution_.get(),
4446                                                         nn_burst_.get()),
4447           "running burst computation", nnapi_errno);
4448     } else {
4449       // Use synchronous execution for NNAPI 1.2+ as a fallback.
4450       RETURN_TFLITE_ERROR_IF_NN_ERROR(
4451           context,
4452           nnapi_->ANeuralNetworksExecution_compute(nn_execution_.get()),
4453           "running computation", nnapi_errno);
4454     }
4455   }
4456 
4457   // copy results from shared memory to the destination.
4458   output_offset = 0;
4459   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
4460     TfLiteTensor* tensor = &context->tensors[output_index];
4461     if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
4462       continue;
4463     }
4464     TfLiteType ann_type_equivalent =
4465         operand_mapping_.lite_index_to_ann_type_conversion(output_index);
4466     if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
4467       // Explicitly convert uint8 values to int8 values.
4468       uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
4469           nn_output_memory_->get_data_ptr() + output_offset);
4470       const auto num_elements = NumElements(tensor);
4471       for (int i = 0; i < num_elements; ++i) {
4472         output_ptr[i] =
4473             static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
4474       }
4475     }
4476     memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
4477            tensor->bytes);
4478     output_offset += tensor->bytes;
4479     output_offset += GetNumPaddingBytes(tensor->bytes);
4480   }
4481   // The state_out of previous invocation need to be copied to state_in of
4482   // current invocation.
4483   for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
4484     int state_tensor_idx = model_state_tfl_inputs_[i];
4485     TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
4486     memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
4487            tensor->bytes);
4488     output_offset += tensor->bytes;
4489     output_offset += GetNumPaddingBytes(tensor->bytes);
4490   }
4491 
4492   // copy output of all output tensors in feedback_loops_ into the
4493   // associated input
4494   for (auto feedback_loop : feedback_loops_) {
4495     int output_tensor_idx;
4496     int input_tensor_idx;
4497     std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
4498     TfLiteTensor& src = context->tensors[output_tensor_idx];
4499     TfLiteTensor& dest = context->tensors[input_tensor_idx];
4500 
4501     memcpy(dest.data.raw, src.data.raw, src.bytes);
4502   }
4503 
4504   return kTfLiteOk;
4505 }
4506 
AddDequantizeOperatorsWhereNeeded(const TfLiteContext * context,int builtin_code,const TfLiteNode * node,int tflite_node_index,NNAPIOpBuilder * builder,int * nnapi_errno)4507 void NNAPIDelegateKernel::AddDequantizeOperatorsWhereNeeded(
4508     const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
4509     int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno) {
4510   // Depending on the operator and the input data format, Dequantize
4511   // operators may need to be added. For example when the input is
4512   // floating-point but weights are quantized then the weights will first be
4513   // dequantized to the same format as the input before being passed to the
4514   // operator.
4515 
4516   // The tensor determining whether the inputs should be floating-point.
4517   int input_tensor_index = -1;
4518   std::vector<int> inputs_to_potentially_dequantize;
4519 
4520   switch (builtin_code) {
4521     case kTfLiteBuiltinConv2d:
4522     case kTfLiteBuiltinFullyConnected: {
4523       input_tensor_index = 0;
4524       // Weights and bias are inputs #1 and #2 respectively and may require
4525       // dequantization.
4526       inputs_to_potentially_dequantize = {1, 2};
4527       break;
4528     }
4529     case kTfLiteBuiltinLstm: {
4530       input_tensor_index = 0;
4531       inputs_to_potentially_dequantize = {1,  2,  3,  4,  5,  6,  7,
4532                                           8,  9,  10, 11, 12, 13, 14,
4533                                           15, 16, 17, 20, 21, 22, 23};
4534       break;
4535     }
4536     default:
4537       return;
4538   }
4539 
4540   int tensor_id = node->inputs->data[input_tensor_index];
4541   if (tensor_id < 0) return;
4542 
4543   // Nothing to do if the input is not floating-point.
4544   if (!IsFloat(context->tensors[tensor_id].type)) return;
4545 
4546   for (int i : inputs_to_potentially_dequantize) {
4547     if (i < 0 || i >= node->inputs->size) continue;  // Ignore invalid index.
4548     tensor_id = node->inputs->data[i];
4549     if (tensor_id < 0) continue;  // Ignore optional input.
4550 
4551     const TfLiteType type = context->tensors[tensor_id].type;
4552     // Nothing to do for this tensor if it's not quantized.
4553     if (!IsQuantized(type)) continue;
4554 
4555     // Insert Dequantize operator if it hasn't been done already and change
4556     // the node's input accordingly.
4557     builder->AddDequantize(i, node->inputs->data[i], type, tflite_node_index);
4558   }
4559 }
4560 
DensifyAndDequantizeConstTensor(TfLiteContext * context,int densify_node_id,bool should_dequantize,NNAPIOpBuilder & builder)4561 TfLiteStatus NNAPIDelegateKernel::DensifyAndDequantizeConstTensor(
4562     TfLiteContext* context, int densify_node_id, bool should_dequantize,
4563     NNAPIOpBuilder& builder) {
4564   TfLiteNode* densify_node;
4565   TfLiteRegistration* reg;
4566   TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
4567       context, densify_node_id, &densify_node, &reg));
4568   int sparse_weight_tid = densify_node->inputs->data[0];
4569   auto input_tensor = context->tensors[sparse_weight_tid];
4570   auto output_tensor = context->tensors[densify_node->outputs->data[0]];
4571   if (input_tensor.sparsity == nullptr) {
4572     return kTfLiteError;
4573   }
4574   const int dims_count = output_tensor.dims->size;
4575   std::vector<int> vector_shape(dims_count);
4576   for (int i = 0; i < dims_count; i++) {
4577     vector_shape[i] = output_tensor.dims->data[i];
4578   }
4579   size_t dense_size;
4580   int new_tensor_index = -1;
4581   switch (input_tensor.type) {
4582     case kTfLiteFloat32: {
4583       dense_size = output_tensor.bytes / sizeof(float);
4584       std::vector<float> output_data(dense_size);
4585       tflite::optimize::sparsity::FormatConverter<float> converter(
4586           vector_shape, *input_tensor.sparsity);
4587       converter.SparseToDense(static_cast<const float*>(input_tensor.data.data),
4588                               dense_size, output_data.data(), context);
4589       TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
4590           ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
4591           output_data, output_tensor.params, &new_tensor_index));
4592       break;
4593     }
4594     case kTfLiteFloat16: {
4595       dense_size = output_tensor.bytes / sizeof(Eigen::half);
4596       std::vector<uint16_t> output_data(dense_size);
4597       Eigen::half* unpacked_fp16_data =
4598           reinterpret_cast<Eigen::half*>(output_data.data());
4599       tflite::optimize::sparsity::FormatConverter<Eigen::half> converter(
4600           vector_shape, *input_tensor.sparsity);
4601       converter.SparseToDense(
4602           static_cast<const Eigen::half*>(input_tensor.data.data), dense_size,
4603           unpacked_fp16_data, context);
4604       if (should_dequantize) {
4605         // we need to dequantize the fp16 dense tensor
4606         std::vector<float> float_dense_data(dense_size);
4607         for (int i = 0; i < dense_size; ++i) {
4608           float_dense_data[i] = fp16_ieee_to_fp32_value(
4609               reinterpret_cast<uint16_t*>(output_data.data())[i]);
4610         }
4611         TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<float>(
4612             ANEURALNETWORKS_TENSOR_FLOAT32, kTfLiteFloat32, output_tensor.dims,
4613             float_dense_data, output_tensor.params, &new_tensor_index));
4614       } else {
4615         TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<uint16_t>(
4616             ANEURALNETWORKS_TENSOR_FLOAT16, kTfLiteFloat16, output_tensor.dims,
4617             output_data, output_tensor.params, &new_tensor_index));
4618       }
4619       break;
4620     }
4621     case kTfLiteInt8: {
4622       dense_size = output_tensor.bytes / sizeof(int8_t);
4623       std::vector<int8_t> output_data(dense_size);
4624       tflite::optimize::sparsity::FormatConverter<int8_t> converter(
4625           vector_shape, *input_tensor.sparsity);
4626       converter.SparseToDense(
4627           static_cast<const int8_t*>(input_tensor.data.data), dense_size,
4628           output_data.data(), context);
4629       TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor<int8_t>(
4630           ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED, kTfLiteInt8,
4631           output_tensor.dims, output_data, output_tensor.params,
4632           &new_tensor_index));
4633       break;
4634     }
4635     default: {
4636       return kTfLiteError;
4637     }
4638   }
4639   return kTfLiteOk;
4640 }
4641 
AddOpsAndTensors(TfLiteContext * context,int * nnapi_errno,bool allow_dynamic_dimensions)4642 TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
4643     TfLiteContext* context, int* nnapi_errno, bool allow_dynamic_dimensions) {
4644   DequantizeMapping dequantize_mapping;
4645   // The operand builder allows creating a single op. It is created outside
4646   // the for loop to avoid reallocating the vectors.
4647   NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_,
4648                          &dequantize_mapping, &allocation_memory_mapping_,
4649                          &nnapi_to_tflite_op_mapping_, nn_model_.get(),
4650                          nnapi_errno, allow_dynamic_dimensions);
4651   // If we have target accelerators the target SDK version might be
4652   // different than the current android version.
4653   target_feature_level_ = nnapi_->nnapi_runtime_feature_level;
4654   if (!nnapi_devices_.empty()) {
4655     TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
4656         context, nnapi_, nnapi_devices_, &target_feature_level_, nnapi_errno));
4657   }
4658   // First path, handle const fp16->fp32 dequantize and densify if needed.
4659   for (auto node_index : nodes_) {
4660     TfLiteNode* node = nullptr;
4661     TfLiteRegistration* registration = nullptr;
4662     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
4663         context, node_index, &node, &registration));
4664     if (IsDequantizeConstFloat16(context, node, registration)) {
4665       builder.AddTensorInput(node->inputs->data[0], /*hybrid_op=*/false,
4666                              NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION);
4667     }
4668     if (IsDensifyConstTensor(context, node, registration)) {
4669       densify_output_to_node_mapping_[node->outputs->data[0]] = node_index;
4670     }
4671     if (IsDequantizeNonConstFloat16(context, node, registration)) {
4672       non_const_dequantize_output_to_node_mapping_[node->outputs->data[0]] =
4673           node_index;
4674     }
4675   }
4676   // Clear the input and output lists for the dequantize path.
4677   builder.ClearInputOuputLists();
4678 
4679   // Add other tensors.
4680   for (auto node_index : nodes_) {
4681     // Obtain the op and registration.
4682     TfLiteNode* node;
4683     TfLiteRegistration* reg;
4684     TF_LITE_ENSURE_STATUS(
4685         context->GetNodeAndRegistration(context, node_index, &node, &reg));
4686     // skip DENSIFY -> DEQUANTIZE as they are handled elsewhere.
4687     if (IsDensifyConstTensor(context, node, reg) ||
4688         IsDequantizeNonConstFloat16(context, node, reg)) {
4689       continue;
4690     }
4691 
4692     // Delegate PACK by lowering it into CONCAT + RESHAPE.
4693     if (reg->builtin_code == kTfLiteBuiltinPack) {
4694       TF_LITE_ENSURE_STATUS(
4695           builder.TransformPackIntoSupportedOps(node_index, node, reg));
4696       continue;
4697     }
4698     // Fully quantized full LSTM.
4699     if (target_feature_level_ >= kMinSdkVersionForNNAPI13 &&
4700         reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4701         context->tensors[node->inputs->data[0]].type == kTfLiteInt8) {
4702       const auto quant8_full_lstm_op_code = ANEURALNETWORKS_QUANTIZED_LSTM;
4703 
4704       constexpr int kInputTensor = 0;
4705       constexpr int kInputToInputWeightsTensor = 1;
4706       constexpr int kRecurrentToInputWeightsTensor = 5;
4707       constexpr int kInputGateBiasTensor = 12;
4708       constexpr int kForgetGateBiasTensor = 13;
4709       constexpr int kCellGateBiasTensor = 14;
4710       constexpr int kOutputGateBiasTensor = 15;
4711       constexpr int kProjectionWeightsTensor = 16;
4712       constexpr int kProjectionBiasTensor = 17;
4713       constexpr int kPrevOutputTensor = 18;
4714 
4715       // Add input tensors.
4716       for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4717         const auto input_index = node->inputs->data[input_pos];
4718         if (input_index == kTfLiteOptionalTensor) {
4719           if (input_pos == kInputToInputWeightsTensor ||
4720               input_pos == kRecurrentToInputWeightsTensor ||
4721               input_pos == kProjectionWeightsTensor) {
4722             TF_LITE_ENSURE_STATUS(builder.AddVectorInt8Operand(nullptr, 0));
4723           } else if (input_pos == kInputGateBiasTensor ||
4724                      input_pos == kForgetGateBiasTensor ||
4725                      input_pos == kCellGateBiasTensor ||
4726                      input_pos == kOutputGateBiasTensor ||
4727                      input_pos == kProjectionBiasTensor) {
4728             TF_LITE_ENSURE_STATUS(builder.AddVectorInt32Operand(nullptr, 0));
4729           } else {  // cell-to-* and layer norm weights.
4730             TF_LITE_ENSURE_STATUS(builder.AddVectorInt16Operand(nullptr, 0));
4731           }
4732         } else {
4733           // Only input and previous output use INT8_ASYM_SIGNED.
4734           int flags =
4735               (input_pos == kInputTensor || input_pos == kPrevOutputTensor)
4736                   ? NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED
4737                   : 0;
4738           TF_LITE_ENSURE_STATUS(
4739               builder.AddTensorInput(input_index, /*hybrid_op=*/false, flags));
4740         }
4741       }
4742 
4743       // Add clip parameters.
4744       auto builtin = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
4745       TF_LITE_ENSURE_STATUS(
4746           builder.AddScalarFloat32Operand(builtin->cell_clip));
4747       TF_LITE_ENSURE_STATUS(
4748           builder.AddScalarFloat32Operand(builtin->proj_clip));
4749 
4750       // Add quantization parameters for intermediate tensors.
4751       TF_LITE_ENSURE_EQ(context, node->intermediates->size, 5);
4752       for (int intermediate_pos = 0;
4753            intermediate_pos < node->intermediates->size; ++intermediate_pos) {
4754         const auto intermediate_index =
4755             node->intermediates->data[intermediate_pos];
4756         const TfLiteTensor& tensor = context->tensors[intermediate_index];
4757         TfLiteAffineQuantization* quantization_params =
4758             static_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
4759         if (intermediate_pos == 4) {
4760           TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
4761               quantization_params->zero_point->data[0]));
4762         }
4763         TF_LITE_ENSURE_STATUS(builder.AddScalarFloat32Operand(
4764             quantization_params->scale->data[0]));
4765       }
4766 
4767       // Activation state output.
4768       int ann_index;
4769       builder.AddStateInt8AsymTensor(
4770           node->inputs->data[/*kInputActivationStateTensor*/ 18], &ann_index);
4771       model_state_outputs_.push_back(ann_index);
4772       model_state_tfl_inputs_.push_back(
4773           node->inputs->data[/*kInputActivationStateTensor*/ 18]);
4774 
4775       // Cell state output.
4776       builder.AddStateInt16Tensor(
4777           node->inputs->data[/*kInputCellStateTensor*/ 19], &ann_index);
4778       model_state_outputs_.push_back(ann_index);
4779       model_state_tfl_inputs_.push_back(
4780           node->inputs->data[/*kInputCellStateTensor*/ 19]);
4781 
4782       // Add output tensors.
4783       for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
4784         const auto output_index = node->outputs->data[output_pos];
4785         TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(
4786             output_index, NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED));
4787       }
4788 
4789       builder.FinalizeAddOperation(quant8_full_lstm_op_code, node_index);
4790       continue;
4791     }
4792 
4793     const bool hybrid_op = IsHybridOperator(context, reg->builtin_code, node);
4794     const bool scalar_as_tensor = IsScalarInputSupported(reg->builtin_code);
4795     const bool need_int8_conversion =
4796         target_feature_level_ < kMinSdkVersionForNNAPI13 &&
4797         NeedInt8Conversion(context, reg->builtin_code, node);
4798     const bool use_int8_asymm_signed =
4799         target_feature_level_ >= kMinSdkVersionForNNAPI13 && !hybrid_op;
4800 
4801     // skip DEQUANTIZE (fp16 -> fp32) as it is handled elsewhere
4802     if (IsDequantizeConstFloat16(context, node, reg)) {
4803       continue;
4804     }
4805 
4806     int input_tensor_flags = 0;
4807     if (scalar_as_tensor) {
4808       input_tensor_flags |= NN_TENSOR_FLAG_SCALAR_AS_TENSOR;
4809     }
4810     if (use_int8_asymm_signed) {
4811       input_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
4812     }
4813 
4814     // On SDK level less than 30, h_swish will be lowered into supported NNAPI
4815     // operations. Since SDK level 30, h_swish is supported as a single
4816     // operation.
4817     if (reg->builtin_code == kTfLiteBuiltinHardSwish &&
4818         nnapi_->android_sdk_version < kMinSdkVersionForNNAPI13) {
4819       builder.TransformHardSwishIntoSupportedOps(
4820           node->inputs->data[0], node->outputs->data[0], need_int8_conversion,
4821           node_index);
4822       continue;
4823     }
4824     // Map inputs to NN API tensor indices.
4825     for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
4826       if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
4827         // Everything is added during Map since input tensors
4828         // have different order.
4829         continue;
4830       }
4831       if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
4832           node->inputs->data[input_pos] == kTfLiteOptionalTensor) {
4833         // skip optional bias and handle it during mapping
4834         continue;
4835       }
4836       const auto input_index = node->inputs->data[input_pos];
4837       // handle sparse weights for Conv2d
4838       if (reg->builtin_code == kTfLiteBuiltinConv2d && input_pos == 1) {
4839         int densify_node_id = -1;
4840         bool should_dequantize = false;
4841         int dequantize_node_id =
4842             non_const_dequantize_output_to_node_mapping_[input_index];
4843         if (dequantize_node_id != -1) {
4844           should_dequantize = true;
4845           // Find densify->dequantize pattern.
4846           TfLiteNode* dequant_node;
4847           TfLiteRegistration* reg;
4848           TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
4849               context, dequantize_node_id, &dequant_node, &reg));
4850           densify_node_id =
4851               densify_output_to_node_mapping_[dequant_node->inputs->data[0]];
4852         } else {
4853           densify_node_id = densify_output_to_node_mapping_[input_index];
4854         }
4855         if (densify_node_id != -1) {
4856           TF_LITE_ENSURE_STATUS(DensifyAndDequantizeConstTensor(
4857               context, densify_node_id, should_dequantize, builder));
4858           continue;
4859         }
4860       }
4861       if (need_int8_conversion &&
4862           (input_pos == 0 ||
4863            reg->builtin_code == kTfLiteBuiltinFullyConnected ||
4864            reg->builtin_code == kTfLiteBuiltinConv2d ||
4865            reg->builtin_code == kTfLiteBuiltinDepthwiseConv2d ||
4866            reg->builtin_code == kTfLiteBuiltinAdd ||
4867            reg->builtin_code == kTfLiteBuiltinMul ||
4868            reg->builtin_code == kTfLiteBuiltinSub ||
4869            reg->builtin_code == kTfLiteBuiltinConcatenation ||
4870            reg->builtin_code == kTfLiteBuiltinMaximum ||
4871            reg->builtin_code == kTfLiteBuiltinMinimum ||
4872            reg->builtin_code == kTfLiteBuiltinLeakyRelu ||
4873            reg->builtin_code == kTfLiteBuiltinLess ||
4874            reg->builtin_code == kTfLiteBuiltinLessEqual ||
4875            reg->builtin_code == kTfLiteBuiltinPrelu ||
4876            reg->builtin_code == kTfLiteBuiltinGreater ||
4877            reg->builtin_code == kTfLiteBuiltinGreaterEqual ||
4878            reg->builtin_code == kTfLiteBuiltinEqual ||
4879            reg->builtin_code == kTfLiteBuiltinNotEqual ||
4880            reg->builtin_code == kTfLiteBuiltinSelect)) {
4881         // Only selected inputs require int8 conversion.
4882         TF_LITE_ENSURE_STATUS(builder.AddTensorInput(
4883             input_index, hybrid_op,
4884             input_tensor_flags | NN_TENSOR_FLAG_INT8_CONVERSION));
4885         continue;
4886       }
4887       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmFullKernel(node) &&
4888           input_pos >= 20) {
4889         // Skip layer normalization weights. They are added in the Map
4890         // function (after all the other inputs added there) since layer
4891         // normalization weights are the last four inputs of the LSTM op in
4892         // NNAPI.
4893         continue;
4894       }
4895       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
4896         // Configuring all inputs in the Map function
4897         continue;
4898       }
4899       if (reg->builtin_code == kTfLiteBuiltinUnidirectionalSequenceLstm) {
4900         if (input_pos >= 20) {
4901           // Skip layer normalization weights. They are added in the Map
4902           // function (after all the other inputs added there) since layer
4903           // normalization weights are the last four inputs of the
4904           // unidirectional sequence LSTM op in NNAPI.
4905           continue;
4906         }
4907         if (input_index == kTfLiteOptionalTensor) {
4908           TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4909           continue;
4910         }
4911       }
4912       if ((reg->builtin_code == kTfLiteBuiltinSplit) &&
4913           (input_index == node->inputs->data[0])) {
4914         // Skip the axis input tensor; it will be added as a scalar operand
4915         // by the Map() mapping.
4916         continue;
4917       }
4918 
4919       // Pad and Padv2 have an optional parameter for a pad value which has
4920       // to be converted to a scalar type in NN API.
4921       if ((reg->builtin_code == kTfLiteBuiltinPadv2 ||
4922            reg->builtin_code == kTfLiteBuiltinPad) &&
4923           node->inputs->size == 3 && input_pos == 2) {
4924         const int constant_value_id = node->inputs->data[2];
4925         if (constant_value_id == kTfLiteOptionalTensor) {
4926           continue;
4927         }
4928         const TfLiteTensor constant_value = context->tensors[constant_value_id];
4929 
4930         switch (constant_value.type) {
4931           case kTfLiteFloat32:
4932             if (constant_value.allocation_type == kTfLiteMmapRo) {
4933               builder.AddScalarFloat32Operand(*constant_value.data.f);
4934             } else {
4935               builder.AddSingleValueTensorAsScalarOperand(
4936                   constant_value_id, ANEURALNETWORKS_FLOAT32);
4937             }
4938             break;
4939           case kTfLiteUInt8:
4940             if (constant_value.allocation_type == kTfLiteMmapRo) {
4941               builder.AddScalarInt32Operand(
4942                   static_cast<int32_t>(*constant_value.data.uint8));
4943             } else {
4944               builder.AddSingleValueTensorAsScalarOperand(
4945                   constant_value_id, ANEURALNETWORKS_INT32);
4946             }
4947             break;
4948           case kTfLiteInt8:
4949             if (constant_value.allocation_type == kTfLiteMmapRo) {
4950               if (need_int8_conversion) {
4951                 builder.AddScalarInt32Operand(
4952                     static_cast<int32_t>(*constant_value.data.int8) + 128);
4953               } else {
4954                 builder.AddScalarInt32Operand(*constant_value.data.int8);
4955               }
4956             } else {
4957               builder.AddSingleValueTensorAsScalarOperand(
4958                   constant_value_id, ANEURALNETWORKS_INT32);
4959             }
4960             break;
4961           default:
4962             context->ReportError(context,
4963                                  "Unsupported type of pad value for pad_v2\n");
4964             return kTfLiteError;
4965         }
4966         continue;
4967       }
4968 
4969       if (input_index == kTfLiteOptionalTensor &&
4970           (reg->builtin_code == kTfLiteBuiltinLstm ||
4971            reg->builtin_code == kTfLiteBuiltinSvdf ||
4972            reg->builtin_code == kTfLiteBuiltinBidirectionalSequenceLstm)) {
4973         // properly handle the optional tensor for LSTM and SVDF.
4974         // currently only support float32.
4975         TF_LITE_ENSURE_STATUS(builder.AddVectorFloat32Operand(nullptr, 0));
4976       } else if (reg->builtin_code == kTfLiteBuiltinResizeBilinear ||
4977                  reg->builtin_code == kTfLiteBuiltinResizeNearestNeighbor) {
4978         if (input_pos == 0) {
4979           // Only the first input tensor is added. The second one,
4980           // specifying the output height and width, is not added and
4981           // instead the height and width will be added individually as
4982           // scalars by the mapping function returned by Map().
4983           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4984                                                        input_tensor_flags));
4985         }
4986       } else if (reg->builtin_code == kTfLiteBuiltinTopkV2 && input_pos > 0) {
4987         // The K parameter tensor is not handled here but by the functor
4988         // returned by Map, the input tensor is instead added in
4989         // the else clause below
4990         continue;
4991       } else if (reg->builtin_code == kTfLiteBuiltinGather) {
4992         // Everything else is added during Map since input tensors
4993         // have different order.
4994         if (input_pos == 0) {
4995           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
4996                                                        input_tensor_flags));
4997         }
4998         continue;
4999       } else if (reg->builtin_code == kTfLiteBuiltinExpandDims &&
5000                  input_pos == 1) {
5001         // The axis param is added during Map
5002         continue;
5003       } else if (reg->builtin_code == kTfLiteBuiltinBatchToSpaceNd &&
5004                  input_pos == 2) {
5005         // NNAPI does not support crops.
5006         // The Map function will check if all crops are zero.
5007         continue;
5008       } else if (reg->builtin_code == kTfLiteBuiltinArgMin ||
5009                  reg->builtin_code == kTfLiteBuiltinArgMax) {
5010         // The first input tensor is added as is. The second one, specifying
5011         // the axis, needs to be converted to a scalar since TFLite uses a
5012         // tensor but NNAPI uses a scalar as the axis.
5013         if (input_pos == 0) {
5014           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5015                                                        input_tensor_flags));
5016         } else {
5017           const int axis_id = node->inputs->data[1];
5018           const TfLiteTensor& axis_tensor = context->tensors[axis_id];
5019           switch (axis_tensor.type) {
5020             case kTfLiteInt32:
5021               if (axis_tensor.allocation_type == kTfLiteMmapRo) {
5022                 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5023                     static_cast<int32_t>(*axis_tensor.data.i32)));
5024               } else {
5025                 TF_LITE_ENSURE_STATUS(
5026                     builder.AddSingleValueTensorAsScalarOperand(
5027                         axis_id, ANEURALNETWORKS_INT32));
5028               }
5029               break;
5030             case kTfLiteInt64:
5031               // Map() function already makes sure int64 input is constant.
5032               TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5033                   static_cast<int32_t>(*axis_tensor.data.i64)));
5034               break;
5035             default:
5036               return kTfLiteError;
5037           }
5038         }
5039       } else if (reg->builtin_code == kTfLiteBuiltinMaximum ||
5040                  reg->builtin_code == kTfLiteBuiltinMinimum) {
5041         const TfLiteTensor& operand_tensor =
5042             context->tensors[node->inputs->data[input_pos]];
5043         if (operand_tensor.dims->size == 0) {
5044           int tensor_index;
5045 
5046           TF_LITE_ENSURE_EQ(context, operand_tensor.allocation_type,
5047                             kTfLiteMmapRo);
5048           switch (operand_tensor.type) {
5049             case kTfLiteFloat32:
5050               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5051                   ANEURALNETWORKS_TENSOR_FLOAT32, operand_tensor.type, {1},
5052                   std::vector<float>(1, operand_tensor.data.f[0]),
5053                   operand_tensor.params, &tensor_index));
5054               break;
5055             case kTfLiteUInt8:
5056               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5057                   ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type, {1},
5058                   std::vector<uint8_t>(1, operand_tensor.data.uint8[0]),
5059                   operand_tensor.params, &tensor_index));
5060               break;
5061             case kTfLiteInt8: {
5062               auto params = operand_tensor.params;
5063               if (params.scale == 0.0) {
5064                 params.scale = 1.0;
5065               }
5066 
5067               if (use_int8_asymm_signed) {
5068                 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5069                     ANEURALNETWORKS_TENSOR_QUANT8_ASYMM_SIGNED,
5070                     operand_tensor.type, {1},
5071                     std::vector<int8_t>(1, operand_tensor.data.int8[0]), params,
5072                     &tensor_index));
5073               } else {
5074                 TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5075                     ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, operand_tensor.type,
5076                     {1},
5077                     std::vector<int8_t>(1, operand_tensor.data.int8[0] + 128),
5078                     params, &tensor_index));
5079               }
5080             } break;
5081             case kTfLiteInt32:
5082               TF_LITE_ENSURE_STATUS(builder.AddNewInputConstantTensor(
5083                   ANEURALNETWORKS_TENSOR_INT32, operand_tensor.type, {1},
5084                   std::vector<int32_t>(1, operand_tensor.data.i32[0]),
5085                   operand_tensor.params, &tensor_index));
5086               break;
5087             default:
5088               return kTfLiteError;
5089           }
5090         } else {
5091           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5092                                                        input_tensor_flags));
5093         }
5094       } else if ((reg->builtin_code == kTfLiteBuiltinReduceAny ||
5095                   reg->builtin_code == kTfLiteBuiltinReduceMax ||
5096                   reg->builtin_code == kTfLiteBuiltinReduceMin ||
5097                   reg->builtin_code == kTfLiteBuiltinReduceProd ||
5098                   reg->builtin_code == kTfLiteBuiltinSum) &&
5099                  (input_pos == 1)) {
5100         // The axis needs, be converted to a tensor if specified as scalar
5101         const TfLiteTensor& axis_tensor =
5102             context->tensors[node->inputs->data[input_pos]];
5103         if (axis_tensor.dims->size == 0) {
5104           TF_LITE_ENSURE_STATUS(
5105               builder.AddVectorInt32Operand(axis_tensor.data.i32, 1));
5106         } else {
5107           TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index, hybrid_op,
5108                                                        input_tensor_flags));
5109         }
5110       } else if (reg->builtin_code == kTfLiteBuiltinFill) {
5111         if (input_pos == 0) {
5112           const int dims_id = node->inputs->data[0];
5113           const TfLiteTensor& dims_tensor = context->tensors[dims_id];
5114           switch (dims_tensor.type) {
5115             case kTfLiteInt32:
5116               TF_LITE_ENSURE_STATUS(
5117                   builder.AddTensorInput(input_index, hybrid_op));
5118               break;
5119             case kTfLiteInt64: {
5120               // We made sure that dimensions are constant and fit into int32
5121               // in Map(), so we can safely create a new tensor with casted
5122               // values.
5123               const int dims_size = dims_tensor.dims->data[0];
5124               std::vector<int32_t> dims_int32(dims_size);
5125               std::copy(dims_tensor.data.i64, dims_tensor.data.i64 + dims_size,
5126                         dims_int32.begin());
5127               int new_tensor_index = -1;
5128               builder.AddNewInputConstantTensor(
5129                   ANEURALNETWORKS_TENSOR_INT32, kTfLiteInt32, dims_tensor.dims,
5130                   dims_int32, dims_tensor.params, &new_tensor_index);
5131             } break;
5132             default:
5133               return kTfLiteError;
5134           }
5135         } else {
5136           const int value_id = node->inputs->data[1];
5137           const TfLiteTensor& value_tensor = context->tensors[value_id];
5138           switch (value_tensor.type) {
5139             case kTfLiteFloat32:
5140               if (value_tensor.allocation_type == kTfLiteMmapRo) {
5141                 TF_LITE_ENSURE_STATUS(
5142                     builder.AddScalarFloat32Operand(*value_tensor.data.f));
5143               } else {
5144                 TF_LITE_ENSURE_STATUS(
5145                     builder.AddSingleValueTensorAsScalarOperand(
5146                         value_id, ANEURALNETWORKS_FLOAT32));
5147               }
5148               break;
5149             case kTfLiteInt32:
5150               if (value_tensor.allocation_type == kTfLiteMmapRo) {
5151                 TF_LITE_ENSURE_STATUS(
5152                     builder.AddScalarInt32Operand(*value_tensor.data.i32));
5153               } else {
5154                 TF_LITE_ENSURE_STATUS(
5155                     builder.AddSingleValueTensorAsScalarOperand(
5156                         value_id, ANEURALNETWORKS_INT32));
5157               }
5158               break;
5159             case kTfLiteInt64:
5160               if (value_tensor.allocation_type == kTfLiteMmapRo) {
5161                 // Map() function already makes sure const int64 input fits into
5162                 // int32.
5163                 TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(
5164                     static_cast<int32_t>(*value_tensor.data.i64)));
5165               } else {
5166                 TF_LITE_ENSURE_STATUS(
5167                     builder.AddSingleValueTensorAsScalarOperand(
5168                         value_id, ANEURALNETWORKS_INT32));
5169               }
5170               break;
5171             default:
5172               return kTfLiteError;
5173           }
5174         }
5175       } else {
5176         TF_LITE_ENSURE_STATUS(
5177             builder.AddTensorInput(input_index, hybrid_op, input_tensor_flags));
5178       }
5179     }
5180 
5181     // Get op type and operands
5182     // Fails if the Validate function failed
5183     int nn_op_type;
5184     TF_LITE_ENSURE_STATUS(
5185         Map(context, reg->builtin_code, reg->version, target_feature_level_,
5186             {context, &builder, node, node_index, &model_state_outputs_,
5187              &model_state_tfl_inputs_, &feedback_loops_, nnapi_errno},
5188             &nn_op_type));
5189 
5190     // Map outputs to NN API tensor indices.
5191     int output_tensor_flags = 0;
5192     if (need_int8_conversion) {
5193       output_tensor_flags |= NN_TENSOR_FLAG_INT8_CONVERSION;
5194     }
5195     if (use_int8_asymm_signed) {
5196       output_tensor_flags |= NN_TENSOR_FLAG_USE_INT8_ASYMM_SIGNED;
5197     }
5198     // fc_nn_intermediate_output_index is used to indicate whether additional
5199     // RESHAPE op is needed.
5200     int fc_nn_intermediate_output_index = -1;
5201     for (int output_pos = 0; output_pos < node->outputs->size; ++output_pos) {
5202       auto output_index = node->outputs->data[output_pos];
5203 
5204       // Outputs for  basic LSTM cell are set in the Map function since
5205       if (reg->builtin_code == kTfLiteBuiltinLstm && isLstmBasicKernel(node)) {
5206         continue;
5207       }
5208       // Handle FC with keep_num_dims==true.
5209       if (reg->builtin_code == kTfLiteBuiltinFullyConnected &&
5210           reinterpret_cast<TfLiteFullyConnectedParams*>(node->builtin_data)
5211               ->keep_num_dims) {
5212         auto& output_tensor = context->tensors[output_index];
5213 
5214         int num_units = output_tensor.dims->data[output_tensor.dims->size - 1];
5215         std::vector<uint32_t> output_dims(2);
5216         output_dims[0] = NumElements(output_tensor.dims) / num_units;
5217         output_dims[1] = num_units;
5218         TF_LITE_ENSURE_STATUS(builder.AddIntermediateOutputTensor(
5219             output_tensor.type, output_dims.size(), output_dims.data(),
5220             output_tensor.params.scale, output_tensor.params.zero_point,
5221             &fc_nn_intermediate_output_index));
5222       } else {
5223         TF_LITE_ENSURE_STATUS(
5224             builder.AddTensorOutput(output_index, output_tensor_flags));
5225       }
5226     }
5227 
5228     // Dequantize operators may have to be added in case inputs are to be
5229     // floating-point.
5230     AddDequantizeOperatorsWhereNeeded(context, reg->builtin_code, node,
5231                                       node_index, &builder, nnapi_errno);
5232 
5233     TF_LITE_ENSURE_OK(context_,
5234                       builder.FinalizeAddOperation(nn_op_type, node_index));
5235     if (fc_nn_intermediate_output_index > -1) {
5236       TF_LITE_ENSURE_STATUS(builder.AppendReshape(
5237           fc_nn_intermediate_output_index, node->outputs->data[0], node_index));
5238     }
5239   }
5240   return kTfLiteOk;
5241 }
5242 
BuildGraph(TfLiteContext * context,const StatefulNnApiDelegate::Options & delegate_options,const TfLiteIntArray * input_tensors,const TfLiteIntArray * output_tensors,int * nnapi_errno)5243 TfLiteStatus NNAPIDelegateKernel::BuildGraph(
5244     TfLiteContext* context,
5245     const StatefulNnApiDelegate::Options& delegate_options,
5246     const TfLiteIntArray* input_tensors, const TfLiteIntArray* output_tensors,
5247     int* nnapi_errno) {
5248   // Build the ops and tensors.
5249   TF_LITE_ENSURE_STATUS(AddOpsAndTensors(
5250       context, nnapi_errno, delegate_options.allow_dynamic_dimensions));
5251   // Map input and output tensor indices to ANN
5252   std::vector<uint32_t> inputs;
5253   inputs.reserve(input_tensors->size);
5254   std::vector<uint32_t> outputs;
5255   outputs.reserve(output_tensors->size);
5256 
5257   size_t total_input_byte_size = 0;
5258   // Make the TensorFlow Lite inputs and outputs to ann_indices.
5259   for (int i : TfLiteIntArrayView(input_tensors)) {
5260     // Constant tensors are not NNAPI inputs.
5261     if (i != kTfLiteOptionalTensor &&
5262         context->tensors[i].allocation_type != kTfLiteMmapRo &&
5263         // The delegate might not have mapped this input (this can
5264         // happen if one tensor is split in several ones)
5265         operand_mapping_.lite_index_to_ann(i) != -1) {
5266       inputs.push_back(operand_mapping_.lite_index_to_ann(i));
5267       if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
5268         continue;
5269       }
5270       const TfLiteType nn_type_conversion =
5271           operand_mapping_.lite_index_to_ann_type_conversion(i);
5272       int tensor_size = 0;
5273       if (nn_type_conversion == kTfLiteNoType) {
5274         tensor_size = context->tensors[i].bytes;
5275       } else {
5276         size_t type_size;
5277         TF_LITE_ENSURE_OK(
5278             context, GetSizeOfType(context, nn_type_conversion, &type_size));
5279         tensor_size = NumElements(&context->tensors[i]) * type_size;
5280       }
5281       total_input_byte_size += tensor_size;
5282       total_input_byte_size += GetNumPaddingBytes(tensor_size);
5283     }
5284   }
5285 
5286   size_t total_output_byte_size = 0;
5287   for (int i : TfLiteIntArrayView(output_tensors)) {
5288     const int output_tensor_ann_index = operand_mapping_.lite_index_to_ann(i);
5289     // Unmapped outputs are not added
5290     if (output_tensor_ann_index != -1) {
5291       outputs.push_back(output_tensor_ann_index);
5292     }
5293     if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
5294       continue;
5295     }
5296     total_output_byte_size += context->tensors[i].bytes;
5297     total_output_byte_size += GetNumPaddingBytes(context->tensors[i].bytes);
5298   }
5299 
5300   // Add state output tensors as model outputs.
5301   for (int i = 0; i < model_state_outputs_.size(); i++) {
5302     outputs.push_back(model_state_outputs_[i]);
5303     auto tfl_state_idx = model_state_tfl_inputs_[i];
5304     total_output_byte_size += context->tensors[tfl_state_idx].bytes;
5305     total_output_byte_size +=
5306         GetNumPaddingBytes(context->tensors[tfl_state_idx].bytes);
5307   }
5308 
5309   // Tell ANN to declare inputs/outputs
5310   RETURN_TFLITE_ERROR_IF_NN_ERROR(
5311       context,
5312       nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
5313           nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
5314           outputs.data()),
5315       "identifying model inputs and outputs", nnapi_errno);
5316 
5317   auto allow_fp16 =
5318       context->allow_fp32_relax_to_fp16 | delegate_options.allow_fp16;
5319   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
5320     RETURN_TFLITE_ERROR_IF_NN_ERROR(
5321         context,
5322         nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
5323             nn_model_.get(), allow_fp16),
5324         "set relaxed computation mode for fp32 if possible", nnapi_errno);
5325   }
5326 
5327   RETURN_TFLITE_ERROR_IF_NN_ERROR(
5328       context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
5329       "finalizing the model", nnapi_errno);
5330 
5331   // Create shared memory pool for inputs and outputs.
5332   nn_input_memory_.reset(
5333       new NNMemory(nnapi_, "input_pool", total_input_byte_size));
5334   nn_output_memory_.reset(
5335       new NNMemory(nnapi_, "output_pool", total_output_byte_size));
5336 
5337   return kTfLiteOk;
5338 }
5339 
5340 }  // namespace nnapi
5341 }  // namespace delegate
5342 
5343 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI;
5344 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI11;
5345 using ::tflite::delegate::nnapi::kMinSdkVersionForNNAPI12;
5346 using ::tflite::delegate::nnapi::NNAPIDelegateKernel;
5347 
Data(const NnApi * nnapi)5348 StatefulNnApiDelegate::Data::Data(const NnApi* nnapi) : nnapi(nnapi) {}
Data(std::unique_ptr<const NnApi> nnapi)5349 StatefulNnApiDelegate::Data::Data(std::unique_ptr<const NnApi> nnapi)
5350     : nnapi(nnapi.get()), owned_nnapi(std::move(nnapi)) {}
5351 
~Data()5352 StatefulNnApiDelegate::Data::~Data() {
5353   std::for_each(std::begin(delegate_state_cache),
5354                 std::end(delegate_state_cache),
5355                 [](const std::pair<int, NNAPIDelegateKernel*>& entry) {
5356                   delete entry.second;
5357                 });
5358 }
5359 
CacheDelegateKernel(const TfLiteDelegateParams * delegate_params,NNAPIDelegateKernel * delegate_state)5360 void StatefulNnApiDelegate::Data::CacheDelegateKernel(
5361     const TfLiteDelegateParams* delegate_params,
5362     NNAPIDelegateKernel* delegate_state) {
5363   const int cache_key = delegate_params->nodes_to_replace->data[0];
5364   delegate_state_cache.emplace(cache_key, delegate_state);
5365 }
5366 
MaybeGetCachedDelegateKernel(const TfLiteDelegateParams * delegate_params)5367 NNAPIDelegateKernel* StatefulNnApiDelegate::Data::MaybeGetCachedDelegateKernel(
5368     const TfLiteDelegateParams* delegate_params) {
5369   const int cache_key = delegate_params->nodes_to_replace->data[0];
5370   const auto cached_state = delegate_state_cache.find(cache_key);
5371   if (cached_state != std::end(delegate_state_cache)) {
5372     auto result = cached_state->second;
5373     delegate_state_cache.erase(cached_state);
5374     return result;
5375   } else {
5376     return nullptr;
5377   }
5378 }
5379 
StatefulNnApiDelegateConstructorImpl(const Options & options)5380 void StatefulNnApiDelegate::StatefulNnApiDelegateConstructorImpl(
5381     const Options& options) {
5382   if (options.accelerator_name) {
5383     delegate_data_.accelerator_name = options.accelerator_name;
5384   }
5385   if (options.cache_dir) {
5386     delegate_data_.cache_dir = options.cache_dir;
5387   }
5388   if (options.model_token) {
5389     delegate_data_.model_token = options.model_token;
5390   }
5391   delegate_data_.execution_preference = options.execution_preference;
5392   delegate_data_.disallow_nnapi_cpu = options.disallow_nnapi_cpu;
5393   delegate_data_.max_number_delegated_partitions =
5394       options.max_number_delegated_partitions;
5395   delegate_data_.allow_fp16 = options.allow_fp16;
5396   delegate_data_.execution_priority = options.execution_priority;
5397   delegate_data_.max_compilation_timeout_duration_ns =
5398       options.max_compilation_timeout_duration_ns;
5399   delegate_data_.max_execution_timeout_duration_ns =
5400       options.max_execution_timeout_duration_ns;
5401   delegate_data_.max_execution_loop_timeout_duration_ns =
5402       options.max_execution_loop_timeout_duration_ns;
5403   if (delegate_data_.nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
5404     delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
5405   }
5406   delegate_data_.use_burst_computation = options.use_burst_computation;
5407   TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
5408                        "Created TensorFlow Lite delegate for NNAPI.");
5409   Prepare = DoPrepare;
5410   CopyFromBufferHandle = DoCopyFromBufferHandle;
5411   CopyToBufferHandle = DoCopyToBufferHandle;
5412   FreeBufferHandle = DoFreeBufferHandle;
5413   data_ = &delegate_data_;
5414   if (delegate_data_.allow_dynamic_dimensions) {
5415     flags |= kTfLiteDelegateFlagsAllowDynamicTensors;
5416     flags |= kTfLiteDelegateFlagsRequirePropagatedShapes;
5417   }
5418 }
5419 
StatefulNnApiDelegate(const NnApi * nnapi)5420 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi)
5421     : StatefulNnApiDelegate(nnapi, Options()) {}
5422 
StatefulNnApiDelegate(Options options)5423 StatefulNnApiDelegate::StatefulNnApiDelegate(Options options)
5424     : StatefulNnApiDelegate(NnApiImplementation(), options) {}
5425 
StatefulNnApiDelegate(const NnApiSLDriverImplFL5 * nnapi_support_library_driver,Options options)5426 StatefulNnApiDelegate::StatefulNnApiDelegate(
5427     const NnApiSLDriverImplFL5* nnapi_support_library_driver, Options options)
5428     : TfLiteDelegate(TfLiteDelegateCreate()),
5429       delegate_data_(
5430           CreateNnApiFromSupportLibrary(nnapi_support_library_driver)) {
5431   StatefulNnApiDelegateConstructorImpl(options);
5432 }
5433 
StatefulNnApiDelegate(const NnApi * nnapi,Options options)5434 StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
5435                                              Options options)
5436     : TfLiteDelegate(TfLiteDelegateCreate()), delegate_data_(nnapi) {
5437   StatefulNnApiDelegateConstructorImpl(options);
5438 }
5439 
StatefulNnApiDelegate()5440 StatefulNnApiDelegate::StatefulNnApiDelegate()
5441     : StatefulNnApiDelegate(Options()) {}
5442 
GetOptions(TfLiteDelegate * delegate)5443 const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
5444     TfLiteDelegate* delegate) {
5445   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5446   StatefulNnApiDelegate::Options options;
5447   options.execution_preference = delegate_data->execution_preference;
5448   options.accelerator_name = delegate_data->accelerator_name.empty()
5449                                  ? nullptr
5450                                  : delegate_data->accelerator_name.c_str();
5451   options.cache_dir = delegate_data->cache_dir.empty()
5452                           ? nullptr
5453                           : delegate_data->cache_dir.c_str();
5454   options.model_token = delegate_data->model_token.empty()
5455                             ? nullptr
5456                             : delegate_data->model_token.c_str();
5457   options.disallow_nnapi_cpu = delegate_data->disallow_nnapi_cpu;
5458   options.max_number_delegated_partitions =
5459       delegate_data->max_number_delegated_partitions;
5460   options.allow_fp16 = delegate_data->allow_fp16;
5461   options.execution_priority = delegate_data->execution_priority;
5462   options.max_compilation_timeout_duration_ns =
5463       delegate_data->max_compilation_timeout_duration_ns;
5464   options.max_execution_timeout_duration_ns =
5465       delegate_data->max_execution_timeout_duration_ns;
5466   options.max_execution_loop_timeout_duration_ns =
5467       delegate_data->max_execution_loop_timeout_duration_ns;
5468   options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
5469   options.use_burst_computation = delegate_data->use_burst_computation;
5470   return options;
5471 }
5472 
5473 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
GetTensorMemoryMap(TfLiteDelegate * delegate)5474 StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
5475   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5476   return delegate_data->tensor_memory_map;
5477 }
5478 
GetCache(TfLiteDelegate * delegate)5479 delegates::Serialization* StatefulNnApiDelegate::GetCache(
5480     TfLiteDelegate* delegate) {
5481   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5482   return delegate_data->cache.get();
5483 }
5484 
RegisterNnapiMemory(ANeuralNetworksMemory * memory,CopyToHostTensorFnPtr callback,void * callback_context)5485 TfLiteBufferHandle StatefulNnApiDelegate::RegisterNnapiMemory(
5486     ANeuralNetworksMemory* memory, CopyToHostTensorFnPtr callback,
5487     void* callback_context) {
5488   int map_size = delegate_data_.tensor_memory_map.size();
5489   for (int i = 0; i < map_size; i++) {
5490     if (delegate_data_.tensor_memory_map[i].memory == nullptr) {
5491       delegate_data_.tensor_memory_map[i] = {memory, callback,
5492                                              callback_context};
5493       return i;
5494     }
5495   }
5496   delegate_data_.tensor_memory_map.push_back(
5497       {memory, callback, callback_context});
5498   return map_size;
5499 }
5500 
DoCopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)5501 TfLiteStatus StatefulNnApiDelegate::DoCopyFromBufferHandle(
5502     TfLiteContext* context, TfLiteDelegate* delegate,
5503     TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
5504   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5505   if (buffer_handle < 0 ||
5506       buffer_handle >= delegate_data->tensor_memory_map.size()) {
5507     return kTfLiteError;
5508   }
5509   auto memory = delegate_data->tensor_memory_map[buffer_handle].memory;
5510   auto callback = delegate_data->tensor_memory_map[buffer_handle].callback;
5511   auto callback_context =
5512       delegate_data->tensor_memory_map[buffer_handle].callback_context;
5513   if (!memory || !callback) {
5514     return kTfLiteError;
5515   }
5516   return callback(tensor, memory, 0, tensor->bytes, callback_context);
5517 }
5518 
DoCopyToBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)5519 TfLiteStatus StatefulNnApiDelegate::DoCopyToBufferHandle(
5520     TfLiteContext* context, TfLiteDelegate* delegate,
5521     TfLiteBufferHandle buffer_handle, TfLiteTensor* tensor) {
5522   return kTfLiteError;
5523 }
5524 
DoFreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)5525 void StatefulNnApiDelegate::DoFreeBufferHandle(TfLiteContext* context,
5526                                                TfLiteDelegate* delegate,
5527                                                TfLiteBufferHandle* handle) {
5528   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
5529   if (*handle >= 0 && *handle < delegate_data->tensor_memory_map.size()) {
5530     delegate_data->tensor_memory_map[*handle] = {nullptr, nullptr, nullptr};
5531     *handle = kTfLiteNullBufferHandle;
5532   }
5533 }
5534 
GetNnApiErrno() const5535 int StatefulNnApiDelegate::GetNnApiErrno() const {
5536   return delegate_data_.nnapi_errno;
5537 }
5538 
5539 // static
GetNodesSupportedByAccelerator(TfLiteContext * context,TfLiteDelegate * delegate,const NnApi * nnapi,const std::vector<int> & supported_nodes,std::vector<int> * device_supported_nodes,int * num_partitions,TfLiteDelegateParams ** params_array,int * nnapi_errno)5540 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
5541     TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
5542     const std::vector<int>& supported_nodes,
5543     std::vector<int>* device_supported_nodes, int* num_partitions,
5544     TfLiteDelegateParams** params_array, int* nnapi_errno) {
5545   auto* delegate_data = static_cast<Data*>(delegate->data_);
5546   // The first entry in the array is the element count
5547 
5548   auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
5549   TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5550       context, supported_nodes_int_array.get(), params_array, num_partitions));
5551   // For each partition check if which nodes are actually supported by the
5552   // target accelerators.
5553   delegate_data->delegate_state_cache.clear();
5554   for (int idx = 0; idx < *num_partitions; idx++) {
5555     const auto& partition_params = (*params_array)[idx];
5556     std::unique_ptr<NNAPIDelegateKernel> kernel_state(
5557         new NNAPIDelegateKernel(nnapi));
5558     TfLiteDelegateParams params_with_delegate = partition_params;
5559     params_with_delegate.delegate = delegate;
5560     TF_LITE_ENSURE_STATUS(
5561         kernel_state->Init(context, &params_with_delegate, nnapi_errno));
5562     std::vector<int> supported_partition_nodes;
5563     TF_LITE_ENSURE_STATUS(
5564         kernel_state->GetOperationsSupportedByTargetNnApiDevices(
5565             context, &supported_partition_nodes, nnapi_errno));
5566     device_supported_nodes->insert(device_supported_nodes->end(),
5567                                    supported_partition_nodes.begin(),
5568                                    supported_partition_nodes.end());
5569 
5570     bool model_fully_supported = (supported_partition_nodes.size() ==
5571                                   partition_params.nodes_to_replace->size);
5572     if (model_fully_supported) {
5573       delegate_data->CacheDelegateKernel(&partition_params,
5574                                          kernel_state.release());
5575     }
5576   }
5577 
5578   if (device_supported_nodes->size() != supported_nodes.size()) {
5579     // We changed the set of nodes to delegate this will create a different
5580     // partitioning layout.
5581     auto device_sup_nodes_int_array =
5582         BuildTfLiteIntArray(*device_supported_nodes);
5583     TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5584         context, device_sup_nodes_int_array.get(), params_array,
5585         num_partitions));
5586   }
5587 
5588   return kTfLiteOk;
5589 }
5590 
5591 // static
LimitDelegatedPartitions(int max_partitions,std::vector<TfLiteDelegateParams> partition_params_array,std::vector<int> * nodes_to_delegate)5592 TfLiteStatus StatefulNnApiDelegate::LimitDelegatedPartitions(
5593     int max_partitions,
5594     std::vector<TfLiteDelegateParams> partition_params_array,
5595     std::vector<int>* nodes_to_delegate) {
5596   int num_partitions = partition_params_array.size();
5597   if (max_partitions <= 0 || num_partitions <= max_partitions) {
5598     return kTfLiteOk;
5599   }
5600 
5601   int number_delegated_partitions = std::count_if(
5602       partition_params_array.begin(), partition_params_array.end(),
5603       [nodes_to_delegate](const TfLiteDelegateParams& partition_params) {
5604         return std::find(nodes_to_delegate->begin(), nodes_to_delegate->end(),
5605                          partition_params.nodes_to_replace->data[0]) !=
5606                nodes_to_delegate->end();
5607       });
5608 
5609   if (number_delegated_partitions > max_partitions) {
5610     std::sort(partition_params_array.begin(), partition_params_array.end(),
5611               [](const TfLiteDelegateParams& left,
5612                  const TfLiteDelegateParams& right) -> bool {
5613                 // Reverse sort
5614                 return left.nodes_to_replace->size >
5615                        right.nodes_to_replace->size;
5616               });
5617 
5618     nodes_to_delegate->clear();
5619 
5620     for (int i = 0; i < max_partitions; i++) {
5621       const TfLiteDelegateParams& partition_params = partition_params_array[i];
5622 
5623       nodes_to_delegate->insert(nodes_to_delegate->end(),
5624                                 partition_params.nodes_to_replace->data,
5625                                 partition_params.nodes_to_replace->data +
5626                                     partition_params.nodes_to_replace->size);
5627     }
5628   }
5629 
5630   return kTfLiteOk;
5631 }
5632 
GetSupportedOpsWithFp16WeightRemapping(TfLiteContext * context,int target_feature_level,bool is_accelerator_specified,int max_number_delegated_partitions)5633 static std::vector<int> GetSupportedOpsWithFp16WeightRemapping(
5634     TfLiteContext* context, int target_feature_level,
5635     bool is_accelerator_specified, int max_number_delegated_partitions) {
5636   std::vector<int> supported_nodes;
5637   delegates::IsNodeSupportedFn node_supported_fn =
5638       [=](TfLiteContext* context, TfLiteNode* node,
5639           TfLiteRegistration* registration,
5640           std::string* unsupported_details) -> bool {
5641     std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
5642     const auto is_supported = NNAPIDelegateKernel::Validate(
5643         context, registration->builtin_code, registration->version,
5644         target_feature_level, node, is_accelerator_specified, &map_failures);
5645     if (!is_supported) {
5646       if (unsupported_details) {
5647         for (auto& failure : map_failures) {
5648           unsupported_details->append(failure.message.c_str());
5649         }
5650       }
5651       return false;
5652     }
5653     return true;
5654   };
5655 
5656   delegates::FP16GraphPartitionHelper partition_helper(context,
5657                                                        node_supported_fn);
5658   std::set<std::string> unsupported_nodes_info;
5659   if (partition_helper.Partition(&unsupported_nodes_info) == kTfLiteOk) {
5660     // By default, we simply get 1st largest partition as
5661     // 'max_delegate_partions'
5662     // is set to 1 by default.
5663     supported_nodes = partition_helper.GetNodesOfFirstNLargestPartitions(
5664         max_number_delegated_partitions);
5665   }
5666   return supported_nodes;
5667 }
5668 
DoPrepare(TfLiteContext * context,TfLiteDelegate * delegate)5669 TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
5670                                               TfLiteDelegate* delegate) {
5671   auto* delegate_data = static_cast<Data*>(delegate->data_);
5672   int* nnapi_errno = &(delegate_data->nnapi_errno);
5673   const NnApi* nnapi = delegate_data->nnapi;
5674 
5675   // Resetting the error code when the delegate is initialized
5676   // by TFLite. This causes the error to be reset if reusing the same
5677   // StatefulNnApiDelegate after a failure
5678   *nnapi_errno = 0;
5679 
5680   // Do not check nodes_ if NN API is unavailable.
5681   if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
5682       !nnapi->nnapi_exists) {
5683     return kTfLiteOk;
5684   }
5685 
5686   int target_feature_level = nnapi->android_sdk_version;
5687   const StatefulNnApiDelegate::Options delegate_options =
5688       StatefulNnApiDelegate::GetOptions(delegate);
5689   // For NNAPI 1.2+, check if there is any accelerator available.
5690   // If not, don't delegate to NNAPI's CPU reference implementation unless
5691   // it has been specified as target accelerator.
5692   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5693     if (ShouldUseTargetDevices(delegate_options, nnapi)) {
5694       std::vector<ANeuralNetworksDevice*> devices;
5695       TF_LITE_ENSURE_STATUS(
5696           GetTargetDevices(context, delegate, nnapi, nnapi_errno, &devices));
5697 
5698       if (devices.empty()) {
5699         if (delegate_options.accelerator_name) {
5700           // There was a selected device and it is not available.
5701           return kTfLiteError;
5702         } else {
5703           // Only nnapi-reference is available but was disabled by the delegate
5704           // options
5705           return kTfLiteOk;
5706         }
5707       }
5708 
5709       TF_LITE_ENSURE_STATUS(GetTargetFeatureLevel(
5710           context, nnapi, devices, &target_feature_level, nnapi_errno));
5711     } else {
5712       // If no accelerator is specified, only use NNAPI if an accelerator is
5713       // available. Any available accelerator will make the device_count larger
5714       // than 1. More sophisticated check and allowlisting can be added later.
5715       uint32_t device_count = 0;
5716       RETURN_TFLITE_ERROR_IF_NN_ERROR(
5717           context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
5718           "getting number of NNAPI devices", nnapi_errno);
5719       if (device_count <= 1) {
5720         return kTfLiteOk;
5721       }
5722     }
5723   }
5724 
5725   std::vector<int> supported_nodes;
5726   // We don't care about all nodes_, we only care about ones in the
5727   // current plan.
5728   TfLiteIntArray* plan;
5729   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
5730 
5731   // Check for every node if it is supported
5732   const bool is_accelerator_specified = ShouldUseTargetDevices(
5733       delegate_options, nnapi, /*exclude_nnapi_reference=*/true);
5734   std::vector<delegate::nnapi::NNAPIValidationFailure> map_failures;
5735   bool should_prune_fp16_dequantize = false;
5736   for (int i = 0; i < plan->size; ++i) {
5737     const int node_id = plan->data[i];
5738     TfLiteNode* node = nullptr;
5739     TfLiteRegistration* registration = nullptr;
5740     TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5741         context, node_id, &node, &registration));
5742     if (IsDequantizeConstFloat16(context, node, registration)) {
5743       should_prune_fp16_dequantize = true;
5744       break;
5745     }
5746   }
5747   if (should_prune_fp16_dequantize) {
5748     supported_nodes = GetSupportedOpsWithFp16WeightRemapping(
5749         context, target_feature_level, is_accelerator_specified,
5750         delegate_options.max_number_delegated_partitions);
5751   } else {
5752     for (int node_index : TfLiteIntArrayView(plan)) {
5753       TfLiteNode* node;
5754       TfLiteRegistration* registration;
5755       TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
5756           context, node_index, &node, &registration));
5757       if (NNAPIDelegateKernel::Validate(
5758               context, registration->builtin_code, registration->version,
5759               target_feature_level, node, is_accelerator_specified,
5760               &map_failures)) {
5761         supported_nodes.push_back(node_index);
5762       }
5763 #ifdef NNAPI_VERBOSE_VALIDATION
5764       for (auto& failure : map_failures) {
5765         TFLITE_LOG_PROD(
5766             TFLITE_LOG_WARNING,
5767             "Operator %s (v%d) refused by NNAPI delegate: %s",
5768             tflite::EnumNameBuiltinOperator(
5769                 static_cast<BuiltinOperator>(registration->builtin_code)),
5770             registration->version, failure.message.c_str());
5771       }
5772       map_failures.clear();
5773 #endif
5774     }
5775   }
5776 
5777   // If there are no delegated nodes, short-circuit node replacement.
5778   if (supported_nodes.empty()) {
5779     return kTfLiteOk;
5780   }
5781 
5782   // NN API Delegate Registration (the pseudo kernel that will invoke NN
5783   // API node sub sets)
5784   static const TfLiteRegistration nnapi_delegate_kernel = {
5785       .init = [](TfLiteContext* context, const char* buffer,
5786                  size_t length) -> void* {
5787         const TfLiteDelegateParams* params =
5788             reinterpret_cast<const TfLiteDelegateParams*>(buffer);
5789 
5790         auto* delegate_data = static_cast<Data*>(params->delegate->data_);
5791         int* nnapi_errno = &(delegate_data->nnapi_errno);
5792 
5793         NNAPIDelegateKernel* kernel_state =
5794             delegate_data->MaybeGetCachedDelegateKernel(params);
5795         if (!kernel_state) {
5796           kernel_state = new NNAPIDelegateKernel(delegate_data->nnapi);
5797           kernel_state->Init(context, params, nnapi_errno);
5798         }
5799 
5800         return kernel_state;
5801       },
5802 
5803       .free = [](TfLiteContext* context, void* buffer) -> void {
5804         delete reinterpret_cast<NNAPIDelegateKernel*>(buffer);
5805       },
5806 
5807       .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5808         NNAPIDelegateKernel* state =
5809             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5810         int* nnapi_errno =
5811             &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5812         return state->Prepare(context, node, nnapi_errno);
5813       },
5814 
5815       .invoke = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
5816         NNAPIDelegateKernel* state =
5817             reinterpret_cast<NNAPIDelegateKernel*>(node->user_data);
5818         int* nnapi_errno =
5819             &(static_cast<Data*>(node->delegate->data_)->nnapi_errno);
5820         return state->Invoke(context, node, nnapi_errno);
5821       },
5822 
5823       .profiling_string = nullptr,
5824       .builtin_code = kTfLiteBuiltinDelegate,
5825       .custom_name = "TfLiteNnapiDelegate",
5826       .version = 1,
5827   };
5828 
5829   // Initialize caching, if applicable, from Options.
5830   const char* cache_dir = delegate_options.cache_dir;
5831   const char* model_token = delegate_options.model_token;
5832   delegates::SerializationParams params = {model_token, cache_dir};
5833   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12 && cache_dir &&
5834       model_token) {
5835     delegate_data->cache.reset(new delegates::Serialization(params));
5836   }
5837 
5838   delegates::Serialization* cache_ptr = delegate_data->cache.get();
5839 
5840   if (cache_ptr) {
5841     // Reuse cached delegation decision if possible.
5842     std::string accelerator_id = NnApiBackendId(delegate_options);
5843     TfLiteIntArray* cached_nodes_to_delegate = nullptr;
5844     if (delegates::GetDelegatedNodes(context, cache_ptr, accelerator_id,
5845                                      &cached_nodes_to_delegate) == kTfLiteOk) {
5846       if (cached_nodes_to_delegate->size == 0) return kTfLiteOk;
5847       auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
5848           context, nnapi_delegate_kernel, cached_nodes_to_delegate, delegate);
5849       TfLiteIntArrayFree(cached_nodes_to_delegate);
5850       return status;
5851     }
5852   }
5853 
5854   std::vector<int> nodes_to_delegate;
5855 
5856   int num_partitions;
5857   TfLiteDelegateParams* params_array;
5858   if (is_accelerator_specified &&
5859       nnapi->android_sdk_version >= kMinSdkVersionForNNAPI12) {
5860     // Filtering out nodes not supported by target accelerators.
5861     // Cannot query supported operation before NNAPI 1.2
5862     TF_LITE_ENSURE_STATUS(GetNodesSupportedByAccelerator(
5863         context, delegate, nnapi, supported_nodes, &nodes_to_delegate,
5864         &num_partitions, &params_array, nnapi_errno));
5865   } else {
5866     nodes_to_delegate = supported_nodes;
5867     auto supported_nodes_int_array = BuildTfLiteIntArray(supported_nodes);
5868     TF_LITE_ENSURE_STATUS(context->PreviewDelegatePartitioning(
5869         context, supported_nodes_int_array.get(), &params_array,
5870         &num_partitions));
5871   }
5872 
5873   TF_LITE_ENSURE_STATUS(
5874       LimitDelegatedPartitions(delegate_options.max_number_delegated_partitions,
5875                                std::vector<TfLiteDelegateParams>(
5876                                    params_array, params_array + num_partitions),
5877                                &nodes_to_delegate));
5878 
5879   auto nodes_to_delegate_int_array = BuildTfLiteIntArray(nodes_to_delegate);
5880 
5881   if (cache_ptr) {
5882     // Cache list of nodes to be delegated for later.
5883     std::string accelerator_id = NnApiBackendId(delegate_options);
5884     if (delegates::SaveDelegatedNodes(context, cache_ptr, accelerator_id,
5885                                       nodes_to_delegate_int_array.get()) !=
5886         kTfLiteOk) {
5887       // Not a critical error.
5888       TF_LITE_KERNEL_LOG(context, "Could not save delegated nodes");
5889     }
5890   }
5891 
5892   if (nodes_to_delegate_int_array->size == 0) {
5893     return kTfLiteOk;
5894   } else {
5895     // Request TFLite to partition the graph and make kernels
5896     // for each independent node sub set a new nnapi_delegate_kernel.
5897     return context->ReplaceNodeSubsetsWithDelegateKernels(
5898         context, nnapi_delegate_kernel, nodes_to_delegate_int_array.get(),
5899         delegate);
5900   }
5901 }
5902 
5903 // Returns a singleton NNAPI Delegate that can check for support of ops.
NnApiDelegate()5904 TfLiteDelegate* NnApiDelegate() {
5905   static StatefulNnApiDelegate* delegate = new StatefulNnApiDelegate();
5906   return delegate;
5907 }
5908 
5909 }  // namespace tflite
5910