• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
18 
19 #include <map>
20 #include <memory>
21 
22 #include "tensorflow/lite/allocation.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
25 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
26 
27 namespace tflite {
28 namespace delegate {
29 namespace nnapi {
30 
31 constexpr int32_t kMinSdkVersionForNNAPI = 27;
32 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
33 constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
34 constexpr int32_t kMinSdkVersionForNNAPI13 = 30;
35 // TODO(b/185838597): change the remaining kMinSdkVersionForNNAPI* to
36 // kNNAPIRuntimeFeatureLevel*.
37 constexpr int32_t kNNAPIRuntimeFeatureLevel5 = 31;
38 
39 // Track tensor indices to NN API tensor indices mapping.
40 class OperandMapping {
41  public:
42   // Given a TFLite index return the ANN index. If it doesn't exist
43   // return -1.
lite_index_to_ann(int index)44   int lite_index_to_ann(int index) const {
45     const int64_t max_size = lite_tensor_to_ann_tensor_.size();
46     if (index >= 0 && index < max_size)
47       return lite_tensor_to_ann_tensor_[index];
48     else
49       return -1;
50   }
51 
52   // NN API uses non tensor operands instead of structs. This creates one
53   // and returns the index. It uses a std::vector and resizes it as needed
54   // keeping -1 to unmapped values. Intermediate tensors likely will not
55   // be mapped.
add_new_non_tensor_operand()56   int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }
57 
58   // This call is necessary for input operands generated by the delegate
59   // to map constant inputs not present in TFLite but required by NNAPI,
60   // for example when splitting one input in several ones.
add_delegate_generated_input_ann_tensors_operand()61   int add_delegate_generated_input_ann_tensors_operand() {
62     return next_ann_tensor_index_++;
63   }
64 
65   // Add a new mapping from `tflite_index` and return the NN API tensor index.
add_new_ann_tensor_index(int tflite_index)66   int add_new_ann_tensor_index(int tflite_index) {
67     const int64_t current_size = lite_tensor_to_ann_tensor_.size();
68     if (tflite_index >= current_size) {
69       lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
70     }
71     const int new_tensor_index = next_ann_tensor_index_++;
72     lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
73     return new_tensor_index;
74   }
75 
76   // Given a TFLite index returns a TFLite type to which a tensor must be
77   // converted during copying the data to the memory allocated for NN API.
78   // kTfLiteNoType means no conversion is needed.
lite_index_to_ann_type_conversion(int index)79   TfLiteType lite_index_to_ann_type_conversion(int index) const {
80     const int64_t max_size = index_to_type_conversion_.size();
81     if (index >= 0 && index < max_size)
82       return index_to_type_conversion_[index];
83     else
84       return kTfLiteNoType;
85   }
86 
87   // Add a new mapping from TFLite index to a type conversion.
add_type_conversion(int tflite_index,TfLiteType tflite_type)88   void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
89     const int64_t current_size = index_to_type_conversion_.size();
90     if (tflite_index >= current_size) {
91       index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
92     }
93     index_to_type_conversion_[tflite_index] = tflite_type;
94   }
95 
96  private:
97   // Next index of ann tensor
98   int next_ann_tensor_index_ = 0;
99 
100   // Mapping from lite index. Use a std::vector for speed and code size
101   // rather than a map.
102   std::vector<int> lite_tensor_to_ann_tensor_;
103   // Mapping from lite index to a type which tensor must be converted to during
104   // the copying of the data to the memory allocated for NN API. kTfLiteNoType
105   // means no conversion is needed. Use an std::vector for speed and code size
106   // rather than a map.
107   std::vector<TfLiteType> index_to_type_conversion_;
108 };
109 
110 class NNAPIOpBuilder;
111 
112 // The kernel that represents the node sub set of TF Lite being run on NN API.
113 struct NNAPIOpMappingArgs {
114   TfLiteContext* context;
115   NNAPIOpBuilder* builder;
116   TfLiteNode* node;
117   int node_index;
118   std::vector<int>* model_state_outputs;
119   std::vector<int>* model_state_tfl_inputs;
120   std::vector<std::tuple<int, int>>* feedback_loops;
121   int* nnapi_errno;
122 };
123 
124 // RAII NN API Model Destructor for use with std::unique_ptr
125 class NNFreeModel {
126  public:
NNFreeModel(const NnApi * nnapi)127   explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()128   void operator()(ANeuralNetworksModel* model) {
129     nnapi_->ANeuralNetworksModel_free(model);
130   }
131 
132  private:
133   // NnApi instance to use. Not owned by this object.
134   const NnApi* nnapi_;
135 };
136 // RAII NN API Compilation Destructor for use with std::unique_ptr
137 class NNFreeCompilation {
138  public:
NNFreeCompilation(const NnApi * nnapi)139   explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()140   void operator()(ANeuralNetworksCompilation* model) {
141     nnapi_->ANeuralNetworksCompilation_free(model);
142   }
143 
144  private:
145   // NnApi instance to use. Not owned by this object.
146   const NnApi* nnapi_;
147 };
148 // RAII NN API Execution Destructor for use with std::unique_ptr
149 class NNFreeExecution {
150  public:
NNFreeExecution(const NnApi * nnapi)151   explicit NNFreeExecution(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()152   void operator()(ANeuralNetworksExecution* execution) {
153     nnapi_->ANeuralNetworksExecution_free(execution);
154   }
155 
156  private:
157   // NnApi instance to use. Not owned by this object.
158   const NnApi* nnapi_;
159 };
160 // RAII NN API Burst Destructor for use with std::unique_ptr
161 class NNFreeBurst {
162  public:
NNFreeBurst(const NnApi * nnapi)163   explicit NNFreeBurst(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()164   void operator()(ANeuralNetworksBurst* model) {
165     nnapi_->ANeuralNetworksBurst_free(model);
166   }
167 
168  private:
169   // NnApi instance to use. Not owned by this object.
170   const NnApi* nnapi_;
171 };
172 
173 // Manage NNAPI shared memory handle
174 class NNMemory {
175  public:
176   NNMemory(const NnApi* nnapi, const char* name, size_t size);
177 
178   ~NNMemory();
179 
get_handle()180   ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
get_data_ptr()181   uint8_t* get_data_ptr() { return data_ptr_; }
get_byte_size()182   size_t get_byte_size() { return byte_size_; }
183 
184  private:
185   // NnApi instance to use. Not owned by this object.
186   const NnApi* nnapi_;
187   int fd_ = 0;
188   size_t byte_size_ = 0;
189   uint8_t* data_ptr_ = nullptr;
190   ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
191 #ifndef __ANDROID__
192   std::string shm_region_name_;
193 #endif
194 };
195 
196 // LINT.IfChange
197 enum class NNAPIValidationFailureType : int {
198   // The operator is not supported by either NNAPI or the NNAPI Delegate.
199   kUnsupportedOperator = 0,
200   // The given operation or operands are not supported on the specified
201   // Android SDK version. The min supported version is specified in the
202   // validation failure message.
203   kUnsupportedAndroidVersion = 1,
204   // The version of the operator (value of TfLiteRegistration::version)
205   // for the given op is not supported. The max supported version
206   // is specified in the validation failure message.
207   // For more details on each operator version see
208   // the GetBuiltinOperatorVersion function in
209   // third_party/tensorflow/lite/tools/versioning/op_version.cc.
210   kUnsupportedOperatorVersion = 2,
211   // The given input operand type is not supported for the current combination
212   // of operator type and sdk version.
213   kUnsupportedInputType = 3,
214   // When using NN API version 1.0 or 1.1, the condition
215   //   input_scale * filter_scale < output_scale
216   // must be true for quantized versions of the following ops:
217   // * CONV_2D
218   // * DEPTHWISE_CONV_2D
219   // * FULLY_CONNECTED (where filter actually stands for weights)
220   // The condition is relaxed and no longer required since version 1.2.
221   kNotRestrictedScaleCompliant = 4,
222   // The given output operand type is not supported for the current combination
223   // of operator type and sdk version.
224   kUnsupportedOutputType = 5,
225   // The size of the operand tensor is too large.
226   kUnsupportedOperandSize = 6,
227   // The value of one of the operands or of a combination of operands is
228   // not supported. Details are provided in the failure message.
229   kUnsupportedOperandValue = 7,
230   // The combination of float inputs and quantized weights or filters
231   // is not supported
232   kUnsupportedHybridOperator = 8,
233   // The quantization type (for example per-channel quantization) is not
234   // supported.
235   kUnsupportedQuantizationType = 9,
236   // The accelerated version of operation requires a specific operand to be
237   // specified.
238   kMissingRequiredOperand = 10,
239   // The rank of the operand is not supported. Details in the failure message.
240   kUnsupportedOperandRank = 11,
241   // The input tensor cannot be dynamically-sized.
242   kInputTensorShouldHaveConstantShape = 12,
243   // The operator has a different number of inputs of the one or ones that
244   // are supported by NNAPI.
245   kUnsupportedOperatorVariant = 13,
246   // The accelerated version of the operator cannot specify an activation
247   // function.
248   kNoActivationExpected = 14,
249   // Quantization scale and/or zero point are not in the supported value(s)
250   // for the accelerated operation.
251   kUnsupportedQuantizationParameters = 15,
252 };
253 // LINT.ThenChange(nnapi_linter/linter.proto)
254 
255 struct NNAPIValidationFailure {
256   NNAPIValidationFailureType type;
257   std::string message;
258 
NNAPIValidationFailureNNAPIValidationFailure259   NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message)
260       : type(type), message(message) {}
261 };
262 
263 // The kernel that represents the node sub set of TF Lite being run on NN API.
264 class NNAPIDelegateKernel {
265  public:
NNAPIDelegateKernel(const NnApi * nnapi)266   explicit NNAPIDelegateKernel(const NnApi* nnapi)
267       : initialised_(false),
268         nnapi_(nnapi),
269         nn_model_(nullptr, NNFreeModel(nnapi_)),
270         nn_compilation_(nullptr, NNFreeCompilation(nnapi_)),
271         nn_burst_(nullptr, NNFreeBurst(nnapi_)),
272         nn_execution_(nullptr, NNFreeExecution(nnapi_)) {}
NNAPIDelegateKernel()273   NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {}
~NNAPIDelegateKernel()274   ~NNAPIDelegateKernel() {
275     for (auto content : allocation_memory_mapping_) {
276       nnapi_->ANeuralNetworksMemory_free(content.second);
277     }
278   }
279 
280   // Translate a node into its operands
281   // It assumes that the call to Validate for has been successful for
282   // the operation.
283   // In case of success it returns kTfLiteOk and stores in n_op_type the
284   // NNAPI Operation code.
285   // Returns kTfLiteError in case of failures during mapping.
286   static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version,
287                           int android_sdk_version,
288                           const NNAPIOpMappingArgs& mapping_args,
289                           ANeuralNetworksOperationType* nn_op_type);
290 
291   // Returns true if the node can be accelerated with NNAPI.
292   static bool Validate(
293       const TfLiteContext* context, int builtin_code, int version,
294       int android_sdk_version, const TfLiteNode* node,
295       bool is_accelerator_specified,
296       // Collects lists of failures collected during
297       // the validation of the possibility of accelerating
298       // the given node
299       std::vector<NNAPIValidationFailure>* map_failures = nullptr);
300 
301   // Initialize the kernel (a NN model) and builds the NN Model.
302   // Any NNAPI Related error causing this method to fail will have the
303   // associated error number stored in nnapi_errno
304   TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params,
305                     int* nnapi_errno);
306 
307   // Creates the NNAPI Compilation for the NN model. It assumes that Init has
308   // been called and completed successfully.
309   // Any NNAPI Related error causing this method to fail will have the
310   // associated error number stored in nnapi_errno
311   TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node,
312                        int* nnapi_errno);
313 
314   // Invoke the NN Model. Expects Init and Prepare to have been completed
315   // successfully.
316   // Any NNAPI Related error causing this method to fail will have the
317   // associated error number stored in nnapi_errno
318   TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node,
319                       int* nnapi_errno);
320 
321   // Returns the list of operations supported by the current NNAPI model as
322   // built in Prepare. Every operation is identified by the index as provided
323   // in the delegate parameters given to the delegate during the Init call.
324   // It expects the Init method has been called and completed successfully and
325   // returns kTfLiteError if not. Returns an error if any of the NNAPI
326   // operations fails or if the
327   // ANeuralNetworksModel_getSupportedOperationsForDevices function is not
328   // available in the NnApi object.
329   TfLiteStatus GetOperationsSupportedByTargetNnApiDevices(
330       TfLiteContext* context, std::vector<int>* supported_nodes,
331       int* nnapi_errno);
332 
333  private:
334   // True if initialization has been completed successfully
335   bool initialised_;
336   // Access to NNApi.
337   const NnApi* nnapi_;
338   // ANN device handle.
339   std::vector<ANeuralNetworksDevice*> nnapi_devices_;
340   // Name of the nnapi device, empty if nnapi_devices_ is empty;
341   std::string device_name_;
342   // ANN API state.
343   std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
344   std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
345       nn_compilation_;
346   std::unique_ptr<ANeuralNetworksBurst, NNFreeBurst> nn_burst_;
347   std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution> nn_execution_;
348   // The mappings of tenor id to BufferHandle. Needed to track BufferHandle
349   // change and alter nn_reusable_execution_ if necessary.
350   std::vector<int> tensor_handle_map_;
351   // Node indices that this delegate is responsible for. Indices here
352   // indexes into the nodes array in the TfLiteContext.
353   std::vector<int> nodes_;
354   // Track indices we use
355   OperandMapping operand_mapping_;
356   std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
357       allocation_memory_mapping_;
358   // Track memory map
359   const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
360       tensor_memory_map_;
361   std::vector<int> model_state_outputs_;
362   std::vector<int> model_state_tfl_inputs_;
363   // This is the equivalent of the pair model_state_outputs_,
364   // model_state_tfl_inputs_ for all tensors where we have to keep the output
365   // data available for TFLite model users
366   std::vector<std::tuple<int, int>> feedback_loops_;
367 
368   std::unique_ptr<NNMemory> nn_input_memory_;
369   std::unique_ptr<NNMemory> nn_output_memory_;
370 
371   std::vector<uint8_t> nn_compilation_cache_token_;
372 
373   std::vector<int> nnapi_to_tflite_op_mapping_;
374   // Map of DENSIFY output tensor id to node id.
375   std::vector<int> densify_output_to_node_mapping_;
376   // Map of DEQUANTIZE output tensor id to node id.
377   // Only contains DEQUANTIZE nodes with non-const input.
378   std::vector<int> non_const_dequantize_output_to_node_mapping_;
379 
380   // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors
381   int target_feature_level_ = 27;  // kMinSdkVersionForNNAPI10
382 
383   void AddDequantizeOperatorsWhereNeeded(
384       const TfLiteContext* context, int builtin_code, const TfLiteNode* node,
385       int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno);
386 
387   TfLiteStatus DensifyAndDequantizeConstTensor(TfLiteContext* context,
388                                                int densify_node_id,
389                                                bool should_dequantize,
390                                                NNAPIOpBuilder& builder);
391 
392   TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno,
393                                 bool allow_dynamic_dimensions);
394 
395   TfLiteStatus BuildGraph(TfLiteContext* context,
396                           const StatefulNnApiDelegate::Options& options,
397                           const TfLiteIntArray* input_tensors,
398                           const TfLiteIntArray* output_tensors,
399                           int* nnapi_errno);
400 };
401 
402 }  // namespace nnapi
403 }  // namespace delegate
404 }  // namespace tflite
405 
406 #endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
407