• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
18 
19 #include <map>
20 #include <memory>
21 
22 #include "tensorflow/lite/allocation.h"
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
25 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
26 
27 namespace tflite {
28 namespace delegate {
29 namespace nnapi {
30 
31 constexpr int32_t kMinSdkVersionForNNAPI = 27;
32 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
33 constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
34 
35 // Track tensor indices to NN API tensor indices mapping.
36 class OperandMapping {
37  public:
38   // Given a TFLite index return the ANN index. If it doesn't exist
39   // return -1.
lite_index_to_ann(int index)40   int lite_index_to_ann(int index) const {
41     if (index >= 0 && index < lite_tensor_to_ann_tensor_.size())
42       return lite_tensor_to_ann_tensor_[index];
43     else
44       return -1;
45   }
46 
47   // NN API uses non tensor operands instead of structs. This creates one
48   // and returns the index. It uses a std::vector and resizes it as needed
49   // keeping -1 to unmapped values. Intermediate tensors likely will not
50   // be mapped.
add_new_non_tensor_operand()51   int add_new_non_tensor_operand() { return next_ann_tensor_index_++; }
52 
53   // This call is necessary for input operands generated by the delegate
54   // to map constant inputs not present in TFLite but required by NNAPI,
55   // for example when splitting one input in several ones.
add_delegate_generated_input_ann_tensors_operand()56   int add_delegate_generated_input_ann_tensors_operand() {
57     return next_ann_tensor_index_++;
58   }
59 
60   // Add a new mapping from `tflite_index` and return the NN API tensor index.
add_new_ann_tensor_index(int tflite_index)61   int add_new_ann_tensor_index(int tflite_index) {
62     if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
63       lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
64     }
65     const int new_tensor_index = next_ann_tensor_index_++;
66     lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
67     return new_tensor_index;
68   }
69 
70   // Given a TFLite index returns a TFLite type to which a tensor must be
71   // converted during copying the data to the memory allocated for NN API.
72   // kTfLiteNoType means no conversion is needed.
lite_index_to_ann_type_conversion(int index)73   TfLiteType lite_index_to_ann_type_conversion(int index) const {
74     if (index >= 0 && index < index_to_type_conversion_.size())
75       return index_to_type_conversion_[index];
76     else
77       return kTfLiteNoType;
78   }
79 
80   // Add a new mapping from TFLite index to a type conversion.
add_type_conversion(int tflite_index,TfLiteType tflite_type)81   void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
82     if (tflite_index >= index_to_type_conversion_.size()) {
83       index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
84     }
85     index_to_type_conversion_[tflite_index] = tflite_type;
86   }
87 
88  private:
89   // Next index of ann tensor
90   int next_ann_tensor_index_ = 0;
91 
92   // Mapping from lite index. Use a std::vector for speed and code size
93   // rather than a map.
94   std::vector<int> lite_tensor_to_ann_tensor_;
95   // Mapping from lite index to a type which tensor must be converted to during
96   // the copying of the data to the memory allocated for NN API. kTfLiteNoType
97   // means no conversion is needed. Use an std::vector for speed and code size
98   // rather than a map.
99   std::vector<TfLiteType> index_to_type_conversion_;
100 };
101 
102 class NNAPIOpBuilder;
103 
104 // The kernel that represents the node sub set of TF Lite being run on NN API.
105 struct NNAPIOpMappingArgs {
106   TfLiteContext* context;
107   NNAPIOpBuilder* builder;
108   TfLiteNode* node;
109   std::vector<int>* model_state_outputs;
110   std::vector<int>* model_state_tfl_inputs;
111   std::vector<std::tuple<int, int>>* feedback_loops;
112   int* nnapi_errno;
113 };
114 
115 // RAII NN API Model Destructor for use with std::unique_ptr
116 class NNFreeModel {
117  public:
NNFreeModel(const NnApi * nnapi)118   explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()119   void operator()(ANeuralNetworksModel* model) {
120     nnapi_->ANeuralNetworksModel_free(model);
121   }
122 
123  private:
124   const NnApi* nnapi_;
125 };
126 // RAII NN API Compilation Destructor for use with std::unique_ptr
127 class NNFreeCompilation {
128  public:
NNFreeCompilation(const NnApi * nnapi)129   explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {}
operator()130   void operator()(ANeuralNetworksCompilation* model) {
131     nnapi_->ANeuralNetworksCompilation_free(model);
132   }
133 
134  private:
135   const NnApi* nnapi_;
136 };
137 
138 // Manage NNAPI shared memory handle
139 class NNMemory {
140  public:
141   NNMemory(const NnApi* nnapi, const char* name, size_t size);
142 
143   ~NNMemory();
144 
get_handle()145   ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
get_data_ptr()146   uint8_t* get_data_ptr() { return data_ptr_; }
147 
148  private:
149   const NnApi* nnapi_;
150   int fd_ = 0;
151   size_t byte_size_ = 0;
152   uint8_t* data_ptr_ = nullptr;
153   ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
154 };
155 
156 
157 enum class NNAPIValidationFailureType : int {
158   // The operator is not supported by either NNAPI or the NNAPI Delegate.
159   kUnsupportedOperator = 0,
160   // The given operation or operands are not supported on the specified
161   // Android SDK version. The min supported version is specified in the
162   // validation failure message.
163   kUnsupportedAndroidVersion = 1,
164   // The version of the operator (value of TfLiteRegistration::version)
165   // for the given op is not supported. The max supported version
166   // is specified in the validation failure message.
167   // For more details on each operator version see
168   // the GetBuiltinOperatorVersion function in
169   // third_party/tensorflow/lite/tools/versioning/op_version.cc.
170   kUnsupportedOperatorVersion = 2,
171   // The given input operand type is not supported for the current combination
172   // of operator type and sdk version.
173   kUnsupportedInputType = 3,
174   // When using NN API version 1.0 or 1.1, the condition
175   //   input_scale * filter_scale < output_scale
176   // must be true for quantized versions of the following ops:
177   // * CONV_2D
178   // * DEPTHWISE_CONV_2D
179   // * FULLY_CONNECTED (where filter actually stands for weights)
180   // The condition is relaxed and no longer required since version 1.2.
181   kNotRestrictedScaleCompliant = 4,
182   // The given output operand type is not supported for the current combination
183   // of operator type and sdk version.
184   kUnsupportedOutputType = 5,
185   // The size of the operand tensor is too large.
186   kUnsupportedOperandSize = 6,
187   // The value of one of the operands or of a combination of operands is
188   // not supported. Details are provided in the failure message.
189   kUnsupportedOperandValue = 7,
190   // The combination of float inputs and quantized weights or filters
191   // is not supported
192   kUnsupportedHybridOperator = 8,
193   // The quantization type (for example per-channel quantization) is not
194   // supported.
195   kUnsupportedQuantizationType = 9,
196   // The accelerated version of operation requires a specific operand to be
197   // specified.
198   kMissingRequiredOperand = 10,
199   // The rank of the operand is not supported. Details in the failure message.
200   kUnsupportedOperandRank = 11,
201   // The input tensor cannot be dynamically-sized.
202   kInputTensorShouldHaveConstantShape = 12,
203   // The operator has a different number of inputs of the one or ones that
204   // are supported by NNAPI.
205   kUnsupportedOperatorVariant = 13,
206   // The accelerated version of the operator cannot specify an activation
207   // function.
208   kNoActivationExpected = 14,
209   // Quantization scale and/or zero point are not in the supported value(s)
210   // for the accelerated operation.
211   kUnsupportedQuantizationParameters = 15,
212 };
213 
214 
215 struct NNAPIValidationFailure {
216   NNAPIValidationFailureType type;
217   std::string message;
218 
NNAPIValidationFailureNNAPIValidationFailure219   NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message)
220       : type(type), message(message) {}
221 };
222 
223 // The kernel that represents the node sub set of TF Lite being run on NN API.
224 class NNAPIDelegateKernel {
225  public:
NNAPIDelegateKernel(const NnApi * nnapi)226   explicit NNAPIDelegateKernel(const NnApi* nnapi)
227       : initialised_(false),
228         nnapi_(nnapi),
229         nn_model_(nullptr, NNFreeModel(nnapi_)),
230         nn_compilation_(nullptr, NNFreeCompilation(nnapi_)) {}
NNAPIDelegateKernel()231   NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {}
~NNAPIDelegateKernel()232   ~NNAPIDelegateKernel() {
233     for (auto content : allocation_memory_mapping_) {
234       nnapi_->ANeuralNetworksMemory_free(content.second);
235     }
236   }
237 
238   // Translate a node into its operands
239   // It assumes that the call to Validate for has been successful for
240   // the operation.
241   // In case of success it returns kTfLiteOk and stores in n_op_type the
242   // NNAPI Operation code.
243   // Returns kTfLiteError in case of failures during mapping.
244   static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version,
245                           int android_sdk_version,
246                           const NNAPIOpMappingArgs& mapping_args,
247                           ANeuralNetworksOperationType* nn_op_type);
248 
249   // Returns true if the node can be accelerated with NNAPI.
250   static bool Validate(
251       const TfLiteContext* context, int builtin_code, int version,
252       int android_sdk_version, const TfLiteNode* node,
253       bool is_accelerator_specified,
254       // Collects lists of failures collected during
255       // the validation of the possibility of accelerating
256       // the given node
257       std::vector<NNAPIValidationFailure>* map_failures = nullptr);
258 
259   // Initialize the kernel (a NN model) and builds the NN Model.
260   // Any NNAPI Related error causing this method to fail will have the
261   // associated error number stored in nnapi_errno
262   TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params,
263                     int* nnapi_errno);
264 
265   // Creates the NNAPI Compilation for the NN model. It assumes that Init has
266   // been called and completed successfully.
267   // Any NNAPI Related error causing this method to fail will have the
268   // associated error number stored in nnapi_errno
269   TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node,
270                        int* nnapi_errno);
271 
272   // Invoke the NN Model. Expects Init and Prepare to have been completed
273   // successfully.
274   // Any NNAPI Related error causing this method to fail will have the
275   // associated error number stored in nnapi_errno
276   TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node,
277                       int* nnapi_errno);
278 
279   // Returns the list of operations supported by the current NNAPI model as
280   // built in Prepare. Every operation is identified by the index as provided
281   // in the delegate parameters given to the delegate during the Init call.
282   // It expects the Init method has been called and completed successfully and
283   // returns kTfLiteError if not. Returns an error if any of the NNAPI
284   // operations fails or if the
285   // ANeuralNetworksModel_getSupportedOperationsForDevices function is not
286   // available in the NnApi object.
287   TfLiteStatus GetOperationsSupportedByTargetNnApiDevices(
288       TfLiteContext* context, std::vector<int>* supported_nodes,
289       int* nnapi_errno);
290 
291  private:
292   // True if initialization has been completed successfully
293   bool initialised_;
294   // Access to NNApi.
295   const NnApi* nnapi_;
296   // ANN device handle.
297   std::vector<ANeuralNetworksDevice*> nnapi_devices_;
298   // Name of the nnapi device, empty if nnapi_devices_ is empty;
299   std::string device_name_;
300   // ANN API state.
301   std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
302   std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
303       nn_compilation_;
304   // Node indices that this delegate is responsible for. Indices here
305   // indexes into the nodes array in the TfLiteContext.
306   std::vector<int> nodes_;
307   // Track indices we use
308   OperandMapping operand_mapping_;
309   std::map<const MMAPAllocation*, ANeuralNetworksMemory*>
310       allocation_memory_mapping_;
311   // Track memory map
312   const std::vector<StatefulNnApiDelegate::MemoryRegistration>*
313       tensor_memory_map_;
314   std::vector<int> model_state_outputs_;
315   std::vector<int> model_state_tfl_inputs_;
316   // This is the equivalent of the pair model_state_outputs_,
317   // model_state_tfl_inputs_ for all tensors where we have to keep the output
318   // data available for TFLite model users
319   std::vector<std::tuple<int, int>> feedback_loops_;
320 
321   std::unique_ptr<NNMemory> nn_input_memory_;
322   std::unique_ptr<NNMemory> nn_output_memory_;
323 
324   std::vector<uint8_t> nn_compilation_cache_token_;
325 
326   void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context,
327                                          int builtin_code,
328                                          const TfLiteNode* node,
329                                          NNAPIOpBuilder* builder,
330                                          int* nnapi_errno);
331 
332   TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno);
333 
334   TfLiteStatus BuildGraph(TfLiteContext* context,
335                           const TfLiteIntArray* input_tensors,
336                           const TfLiteIntArray* output_tensors,
337                           int* nnapi_errno);
338 };
339 
340 }  // namespace nnapi
341 }  // namespace delegate
342 }  // namespace tflite
343 
344 #endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_
345