1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 18 19 #include <map> 20 #include <memory> 21 22 #include "tensorflow/lite/allocation.h" 23 #include "tensorflow/lite/c/common.h" 24 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" 25 #include "tensorflow/lite/nnapi/nnapi_implementation.h" 26 27 namespace tflite { 28 namespace delegate { 29 namespace nnapi { 30 31 constexpr int32_t kMinSdkVersionForNNAPI = 27; 32 constexpr int32_t kMinSdkVersionForNNAPI11 = 28; 33 constexpr int32_t kMinSdkVersionForNNAPI12 = 29; 34 constexpr int32_t kMinSdkVersionForNNAPI13 = 30; 35 // TODO(b/185838597): change the remaining kMinSdkVersionForNNAPI* to 36 // kNNAPIRuntimeFeatureLevel*. 37 constexpr int32_t kNNAPIRuntimeFeatureLevel5 = 31; 38 39 // Track tensor indices to NN API tensor indices mapping. 40 class OperandMapping { 41 public: 42 // Given a TFLite index return the ANN index. If it doesn't exist 43 // return -1. lite_index_to_ann(int index)44 int lite_index_to_ann(int index) const { 45 const int64_t max_size = lite_tensor_to_ann_tensor_.size(); 46 if (index >= 0 && index < max_size) 47 return lite_tensor_to_ann_tensor_[index]; 48 else 49 return -1; 50 } 51 52 // NN API uses non tensor operands instead of structs. This creates one 53 // and returns the index. It uses a std::vector and resizes it as needed 54 // keeping -1 to unmapped values. Intermediate tensors likely will not 55 // be mapped. add_new_non_tensor_operand()56 int add_new_non_tensor_operand() { return next_ann_tensor_index_++; } 57 58 // This call is necessary for input operands generated by the delegate 59 // to map constant inputs not present in TFLite but required by NNAPI, 60 // for example when splitting one input in several ones. add_delegate_generated_input_ann_tensors_operand()61 int add_delegate_generated_input_ann_tensors_operand() { 62 return next_ann_tensor_index_++; 63 } 64 65 // Add a new mapping from `tflite_index` and return the NN API tensor index. add_new_ann_tensor_index(int tflite_index)66 int add_new_ann_tensor_index(int tflite_index) { 67 const int64_t current_size = lite_tensor_to_ann_tensor_.size(); 68 if (tflite_index >= current_size) { 69 lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1); 70 } 71 const int new_tensor_index = next_ann_tensor_index_++; 72 lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index; 73 return new_tensor_index; 74 } 75 76 // Given a TFLite index returns a TFLite type to which a tensor must be 77 // converted during copying the data to the memory allocated for NN API. 78 // kTfLiteNoType means no conversion is needed. lite_index_to_ann_type_conversion(int index)79 TfLiteType lite_index_to_ann_type_conversion(int index) const { 80 const int64_t max_size = index_to_type_conversion_.size(); 81 if (index >= 0 && index < max_size) 82 return index_to_type_conversion_[index]; 83 else 84 return kTfLiteNoType; 85 } 86 87 // Add a new mapping from TFLite index to a type conversion. add_type_conversion(int tflite_index,TfLiteType tflite_type)88 void add_type_conversion(int tflite_index, TfLiteType tflite_type) { 89 const int64_t current_size = index_to_type_conversion_.size(); 90 if (tflite_index >= current_size) { 91 index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType); 92 } 93 index_to_type_conversion_[tflite_index] = tflite_type; 94 } 95 96 private: 97 // Next index of ann tensor 98 int next_ann_tensor_index_ = 0; 99 100 // Mapping from lite index. Use a std::vector for speed and code size 101 // rather than a map. 102 std::vector<int> lite_tensor_to_ann_tensor_; 103 // Mapping from lite index to a type which tensor must be converted to during 104 // the copying of the data to the memory allocated for NN API. kTfLiteNoType 105 // means no conversion is needed. Use an std::vector for speed and code size 106 // rather than a map. 107 std::vector<TfLiteType> index_to_type_conversion_; 108 }; 109 110 class NNAPIOpBuilder; 111 112 // The kernel that represents the node sub set of TF Lite being run on NN API. 113 struct NNAPIOpMappingArgs { 114 TfLiteContext* context; 115 NNAPIOpBuilder* builder; 116 TfLiteNode* node; 117 int node_index; 118 std::vector<int>* model_state_outputs; 119 std::vector<int>* model_state_tfl_inputs; 120 std::vector<std::tuple<int, int>>* feedback_loops; 121 int* nnapi_errno; 122 }; 123 124 // RAII NN API Model Destructor for use with std::unique_ptr 125 class NNFreeModel { 126 public: NNFreeModel(const NnApi * nnapi)127 explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {} operator()128 void operator()(ANeuralNetworksModel* model) { 129 nnapi_->ANeuralNetworksModel_free(model); 130 } 131 132 private: 133 // NnApi instance to use. Not owned by this object. 134 const NnApi* nnapi_; 135 }; 136 // RAII NN API Compilation Destructor for use with std::unique_ptr 137 class NNFreeCompilation { 138 public: NNFreeCompilation(const NnApi * nnapi)139 explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {} operator()140 void operator()(ANeuralNetworksCompilation* model) { 141 nnapi_->ANeuralNetworksCompilation_free(model); 142 } 143 144 private: 145 // NnApi instance to use. Not owned by this object. 146 const NnApi* nnapi_; 147 }; 148 // RAII NN API Execution Destructor for use with std::unique_ptr 149 class NNFreeExecution { 150 public: NNFreeExecution(const NnApi * nnapi)151 explicit NNFreeExecution(const NnApi* nnapi) : nnapi_(nnapi) {} operator()152 void operator()(ANeuralNetworksExecution* execution) { 153 nnapi_->ANeuralNetworksExecution_free(execution); 154 } 155 156 private: 157 // NnApi instance to use. Not owned by this object. 158 const NnApi* nnapi_; 159 }; 160 // RAII NN API Burst Destructor for use with std::unique_ptr 161 class NNFreeBurst { 162 public: NNFreeBurst(const NnApi * nnapi)163 explicit NNFreeBurst(const NnApi* nnapi) : nnapi_(nnapi) {} operator()164 void operator()(ANeuralNetworksBurst* model) { 165 nnapi_->ANeuralNetworksBurst_free(model); 166 } 167 168 private: 169 // NnApi instance to use. Not owned by this object. 170 const NnApi* nnapi_; 171 }; 172 173 // Manage NNAPI shared memory handle 174 class NNMemory { 175 public: 176 NNMemory(const NnApi* nnapi, const char* name, size_t size); 177 178 ~NNMemory(); 179 get_handle()180 ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; } get_data_ptr()181 uint8_t* get_data_ptr() { return data_ptr_; } get_byte_size()182 size_t get_byte_size() { return byte_size_; } 183 184 private: 185 // NnApi instance to use. Not owned by this object. 186 const NnApi* nnapi_; 187 int fd_ = 0; 188 size_t byte_size_ = 0; 189 uint8_t* data_ptr_ = nullptr; 190 ANeuralNetworksMemory* nn_memory_handle_ = nullptr; 191 #ifndef __ANDROID__ 192 std::string shm_region_name_; 193 #endif 194 }; 195 196 // LINT.IfChange 197 enum class NNAPIValidationFailureType : int { 198 // The operator is not supported by either NNAPI or the NNAPI Delegate. 199 kUnsupportedOperator = 0, 200 // The given operation or operands are not supported on the specified 201 // Android SDK version. The min supported version is specified in the 202 // validation failure message. 203 kUnsupportedAndroidVersion = 1, 204 // The version of the operator (value of TfLiteRegistration::version) 205 // for the given op is not supported. The max supported version 206 // is specified in the validation failure message. 207 // For more details on each operator version see 208 // the GetBuiltinOperatorVersion function in 209 // third_party/tensorflow/lite/tools/versioning/op_version.cc. 210 kUnsupportedOperatorVersion = 2, 211 // The given input operand type is not supported for the current combination 212 // of operator type and sdk version. 213 kUnsupportedInputType = 3, 214 // When using NN API version 1.0 or 1.1, the condition 215 // input_scale * filter_scale < output_scale 216 // must be true for quantized versions of the following ops: 217 // * CONV_2D 218 // * DEPTHWISE_CONV_2D 219 // * FULLY_CONNECTED (where filter actually stands for weights) 220 // The condition is relaxed and no longer required since version 1.2. 221 kNotRestrictedScaleCompliant = 4, 222 // The given output operand type is not supported for the current combination 223 // of operator type and sdk version. 224 kUnsupportedOutputType = 5, 225 // The size of the operand tensor is too large. 226 kUnsupportedOperandSize = 6, 227 // The value of one of the operands or of a combination of operands is 228 // not supported. Details are provided in the failure message. 229 kUnsupportedOperandValue = 7, 230 // The combination of float inputs and quantized weights or filters 231 // is not supported 232 kUnsupportedHybridOperator = 8, 233 // The quantization type (for example per-channel quantization) is not 234 // supported. 235 kUnsupportedQuantizationType = 9, 236 // The accelerated version of operation requires a specific operand to be 237 // specified. 238 kMissingRequiredOperand = 10, 239 // The rank of the operand is not supported. Details in the failure message. 240 kUnsupportedOperandRank = 11, 241 // The input tensor cannot be dynamically-sized. 242 kInputTensorShouldHaveConstantShape = 12, 243 // The operator has a different number of inputs of the one or ones that 244 // are supported by NNAPI. 245 kUnsupportedOperatorVariant = 13, 246 // The accelerated version of the operator cannot specify an activation 247 // function. 248 kNoActivationExpected = 14, 249 // Quantization scale and/or zero point are not in the supported value(s) 250 // for the accelerated operation. 251 kUnsupportedQuantizationParameters = 15, 252 }; 253 // LINT.ThenChange(nnapi_linter/linter.proto) 254 255 struct NNAPIValidationFailure { 256 NNAPIValidationFailureType type; 257 std::string message; 258 NNAPIValidationFailureNNAPIValidationFailure259 NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message) 260 : type(type), message(message) {} 261 }; 262 263 // The kernel that represents the node sub set of TF Lite being run on NN API. 264 class NNAPIDelegateKernel { 265 public: NNAPIDelegateKernel(const NnApi * nnapi)266 explicit NNAPIDelegateKernel(const NnApi* nnapi) 267 : initialised_(false), 268 nnapi_(nnapi), 269 nn_model_(nullptr, NNFreeModel(nnapi_)), 270 nn_compilation_(nullptr, NNFreeCompilation(nnapi_)), 271 nn_burst_(nullptr, NNFreeBurst(nnapi_)), 272 nn_execution_(nullptr, NNFreeExecution(nnapi_)) {} NNAPIDelegateKernel()273 NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {} ~NNAPIDelegateKernel()274 ~NNAPIDelegateKernel() { 275 for (auto content : allocation_memory_mapping_) { 276 nnapi_->ANeuralNetworksMemory_free(content.second); 277 } 278 } 279 280 // Translate a node into its operands 281 // It assumes that the call to Validate for has been successful for 282 // the operation. 283 // In case of success it returns kTfLiteOk and stores in n_op_type the 284 // NNAPI Operation code. 285 // Returns kTfLiteError in case of failures during mapping. 286 static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version, 287 int android_sdk_version, 288 const NNAPIOpMappingArgs& mapping_args, 289 ANeuralNetworksOperationType* nn_op_type); 290 291 // Returns true if the node can be accelerated with NNAPI. 292 static bool Validate( 293 const TfLiteContext* context, int builtin_code, int version, 294 int android_sdk_version, const TfLiteNode* node, 295 bool is_accelerator_specified, 296 // Collects lists of failures collected during 297 // the validation of the possibility of accelerating 298 // the given node 299 std::vector<NNAPIValidationFailure>* map_failures = nullptr); 300 301 // Initialize the kernel (a NN model) and builds the NN Model. 302 // Any NNAPI Related error causing this method to fail will have the 303 // associated error number stored in nnapi_errno 304 TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params, 305 int* nnapi_errno); 306 307 // Creates the NNAPI Compilation for the NN model. It assumes that Init has 308 // been called and completed successfully. 309 // Any NNAPI Related error causing this method to fail will have the 310 // associated error number stored in nnapi_errno 311 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node, 312 int* nnapi_errno); 313 314 // Invoke the NN Model. Expects Init and Prepare to have been completed 315 // successfully. 316 // Any NNAPI Related error causing this method to fail will have the 317 // associated error number stored in nnapi_errno 318 TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node, 319 int* nnapi_errno); 320 321 // Returns the list of operations supported by the current NNAPI model as 322 // built in Prepare. Every operation is identified by the index as provided 323 // in the delegate parameters given to the delegate during the Init call. 324 // It expects the Init method has been called and completed successfully and 325 // returns kTfLiteError if not. Returns an error if any of the NNAPI 326 // operations fails or if the 327 // ANeuralNetworksModel_getSupportedOperationsForDevices function is not 328 // available in the NnApi object. 329 TfLiteStatus GetOperationsSupportedByTargetNnApiDevices( 330 TfLiteContext* context, std::vector<int>* supported_nodes, 331 int* nnapi_errno); 332 333 private: 334 // True if initialization has been completed successfully 335 bool initialised_; 336 // Access to NNApi. 337 const NnApi* nnapi_; 338 // ANN device handle. 339 std::vector<ANeuralNetworksDevice*> nnapi_devices_; 340 // Name of the nnapi device, empty if nnapi_devices_ is empty; 341 std::string device_name_; 342 // ANN API state. 343 std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_; 344 std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation> 345 nn_compilation_; 346 std::unique_ptr<ANeuralNetworksBurst, NNFreeBurst> nn_burst_; 347 std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution> nn_execution_; 348 // The mappings of tenor id to BufferHandle. Needed to track BufferHandle 349 // change and alter nn_reusable_execution_ if necessary. 350 std::vector<int> tensor_handle_map_; 351 // Node indices that this delegate is responsible for. Indices here 352 // indexes into the nodes array in the TfLiteContext. 353 std::vector<int> nodes_; 354 // Track indices we use 355 OperandMapping operand_mapping_; 356 std::map<const MMAPAllocation*, ANeuralNetworksMemory*> 357 allocation_memory_mapping_; 358 // Track memory map 359 const std::vector<StatefulNnApiDelegate::MemoryRegistration>* 360 tensor_memory_map_; 361 std::vector<int> model_state_outputs_; 362 std::vector<int> model_state_tfl_inputs_; 363 // This is the equivalent of the pair model_state_outputs_, 364 // model_state_tfl_inputs_ for all tensors where we have to keep the output 365 // data available for TFLite model users 366 std::vector<std::tuple<int, int>> feedback_loops_; 367 368 std::unique_ptr<NNMemory> nn_input_memory_; 369 std::unique_ptr<NNMemory> nn_output_memory_; 370 371 std::vector<uint8_t> nn_compilation_cache_token_; 372 373 std::vector<int> nnapi_to_tflite_op_mapping_; 374 // Map of DENSIFY output tensor id to node id. 375 std::vector<int> densify_output_to_node_mapping_; 376 // Map of DEQUANTIZE output tensor id to node id. 377 // Only contains DEQUANTIZE nodes with non-const input. 378 std::vector<int> non_const_dequantize_output_to_node_mapping_; 379 380 // Fully initialized in NNAPIDelegateKernel::AddOpsAndTensors 381 int target_feature_level_ = 27; // kMinSdkVersionForNNAPI10 382 383 void AddDequantizeOperatorsWhereNeeded( 384 const TfLiteContext* context, int builtin_code, const TfLiteNode* node, 385 int tflite_node_index, NNAPIOpBuilder* builder, int* nnapi_errno); 386 387 TfLiteStatus DensifyAndDequantizeConstTensor(TfLiteContext* context, 388 int densify_node_id, 389 bool should_dequantize, 390 NNAPIOpBuilder& builder); 391 392 TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno, 393 bool allow_dynamic_dimensions); 394 395 TfLiteStatus BuildGraph(TfLiteContext* context, 396 const StatefulNnApiDelegate::Options& options, 397 const TfLiteIntArray* input_tensors, 398 const TfLiteIntArray* output_tensors, 399 int* nnapi_errno); 400 }; 401 402 } // namespace nnapi 403 } // namespace delegate 404 } // namespace tflite 405 406 #endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 407