1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 17 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 18 19 #include <map> 20 #include <memory> 21 22 #include "tensorflow/lite/allocation.h" 23 #include "tensorflow/lite/c/common.h" 24 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h" 25 #include "tensorflow/lite/nnapi/nnapi_implementation.h" 26 27 namespace tflite { 28 namespace delegate { 29 namespace nnapi { 30 31 constexpr int32_t kMinSdkVersionForNNAPI = 27; 32 constexpr int32_t kMinSdkVersionForNNAPI11 = 28; 33 constexpr int32_t kMinSdkVersionForNNAPI12 = 29; 34 35 // Track tensor indices to NN API tensor indices mapping. 36 class OperandMapping { 37 public: 38 // Given a TFLite index return the ANN index. If it doesn't exist 39 // return -1. lite_index_to_ann(int index)40 int lite_index_to_ann(int index) const { 41 if (index >= 0 && index < lite_tensor_to_ann_tensor_.size()) 42 return lite_tensor_to_ann_tensor_[index]; 43 else 44 return -1; 45 } 46 47 // NN API uses non tensor operands instead of structs. This creates one 48 // and returns the index. It uses a std::vector and resizes it as needed 49 // keeping -1 to unmapped values. Intermediate tensors likely will not 50 // be mapped. add_new_non_tensor_operand()51 int add_new_non_tensor_operand() { return next_ann_tensor_index_++; } 52 53 // This call is necessary for input operands generated by the delegate 54 // to map constant inputs not present in TFLite but required by NNAPI, 55 // for example when splitting one input in several ones. add_delegate_generated_input_ann_tensors_operand()56 int add_delegate_generated_input_ann_tensors_operand() { 57 return next_ann_tensor_index_++; 58 } 59 60 // Add a new mapping from `tflite_index` and return the NN API tensor index. add_new_ann_tensor_index(int tflite_index)61 int add_new_ann_tensor_index(int tflite_index) { 62 if (tflite_index >= lite_tensor_to_ann_tensor_.size()) { 63 lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1); 64 } 65 const int new_tensor_index = next_ann_tensor_index_++; 66 lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index; 67 return new_tensor_index; 68 } 69 70 // Given a TFLite index returns a TFLite type to which a tensor must be 71 // converted during copying the data to the memory allocated for NN API. 72 // kTfLiteNoType means no conversion is needed. lite_index_to_ann_type_conversion(int index)73 TfLiteType lite_index_to_ann_type_conversion(int index) const { 74 if (index >= 0 && index < index_to_type_conversion_.size()) 75 return index_to_type_conversion_[index]; 76 else 77 return kTfLiteNoType; 78 } 79 80 // Add a new mapping from TFLite index to a type conversion. add_type_conversion(int tflite_index,TfLiteType tflite_type)81 void add_type_conversion(int tflite_index, TfLiteType tflite_type) { 82 if (tflite_index >= index_to_type_conversion_.size()) { 83 index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType); 84 } 85 index_to_type_conversion_[tflite_index] = tflite_type; 86 } 87 88 private: 89 // Next index of ann tensor 90 int next_ann_tensor_index_ = 0; 91 92 // Mapping from lite index. Use a std::vector for speed and code size 93 // rather than a map. 94 std::vector<int> lite_tensor_to_ann_tensor_; 95 // Mapping from lite index to a type which tensor must be converted to during 96 // the copying of the data to the memory allocated for NN API. kTfLiteNoType 97 // means no conversion is needed. Use an std::vector for speed and code size 98 // rather than a map. 99 std::vector<TfLiteType> index_to_type_conversion_; 100 }; 101 102 class NNAPIOpBuilder; 103 104 // The kernel that represents the node sub set of TF Lite being run on NN API. 105 struct NNAPIOpMappingArgs { 106 TfLiteContext* context; 107 NNAPIOpBuilder* builder; 108 TfLiteNode* node; 109 std::vector<int>* model_state_outputs; 110 std::vector<int>* model_state_tfl_inputs; 111 std::vector<std::tuple<int, int>>* feedback_loops; 112 int* nnapi_errno; 113 }; 114 115 // RAII NN API Model Destructor for use with std::unique_ptr 116 class NNFreeModel { 117 public: NNFreeModel(const NnApi * nnapi)118 explicit NNFreeModel(const NnApi* nnapi) : nnapi_(nnapi) {} operator()119 void operator()(ANeuralNetworksModel* model) { 120 nnapi_->ANeuralNetworksModel_free(model); 121 } 122 123 private: 124 const NnApi* nnapi_; 125 }; 126 // RAII NN API Compilation Destructor for use with std::unique_ptr 127 class NNFreeCompilation { 128 public: NNFreeCompilation(const NnApi * nnapi)129 explicit NNFreeCompilation(const NnApi* nnapi) : nnapi_(nnapi) {} operator()130 void operator()(ANeuralNetworksCompilation* model) { 131 nnapi_->ANeuralNetworksCompilation_free(model); 132 } 133 134 private: 135 const NnApi* nnapi_; 136 }; 137 138 // Manage NNAPI shared memory handle 139 class NNMemory { 140 public: 141 NNMemory(const NnApi* nnapi, const char* name, size_t size); 142 143 ~NNMemory(); 144 get_handle()145 ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; } get_data_ptr()146 uint8_t* get_data_ptr() { return data_ptr_; } 147 148 private: 149 const NnApi* nnapi_; 150 int fd_ = 0; 151 size_t byte_size_ = 0; 152 uint8_t* data_ptr_ = nullptr; 153 ANeuralNetworksMemory* nn_memory_handle_ = nullptr; 154 }; 155 156 157 enum class NNAPIValidationFailureType : int { 158 // The operator is not supported by either NNAPI or the NNAPI Delegate. 159 kUnsupportedOperator = 0, 160 // The given operation or operands are not supported on the specified 161 // Android SDK version. The min supported version is specified in the 162 // validation failure message. 163 kUnsupportedAndroidVersion = 1, 164 // The version of the operator (value of TfLiteRegistration::version) 165 // for the given op is not supported. The max supported version 166 // is specified in the validation failure message. 167 // For more details on each operator version see 168 // the GetBuiltinOperatorVersion function in 169 // third_party/tensorflow/lite/tools/versioning/op_version.cc. 170 kUnsupportedOperatorVersion = 2, 171 // The given input operand type is not supported for the current combination 172 // of operator type and sdk version. 173 kUnsupportedInputType = 3, 174 // When using NN API version 1.0 or 1.1, the condition 175 // input_scale * filter_scale < output_scale 176 // must be true for quantized versions of the following ops: 177 // * CONV_2D 178 // * DEPTHWISE_CONV_2D 179 // * FULLY_CONNECTED (where filter actually stands for weights) 180 // The condition is relaxed and no longer required since version 1.2. 181 kNotRestrictedScaleCompliant = 4, 182 // The given output operand type is not supported for the current combination 183 // of operator type and sdk version. 184 kUnsupportedOutputType = 5, 185 // The size of the operand tensor is too large. 186 kUnsupportedOperandSize = 6, 187 // The value of one of the operands or of a combination of operands is 188 // not supported. Details are provided in the failure message. 189 kUnsupportedOperandValue = 7, 190 // The combination of float inputs and quantized weights or filters 191 // is not supported 192 kUnsupportedHybridOperator = 8, 193 // The quantization type (for example per-channel quantization) is not 194 // supported. 195 kUnsupportedQuantizationType = 9, 196 // The accelerated version of operation requires a specific operand to be 197 // specified. 198 kMissingRequiredOperand = 10, 199 // The rank of the operand is not supported. Details in the failure message. 200 kUnsupportedOperandRank = 11, 201 // The input tensor cannot be dynamically-sized. 202 kInputTensorShouldHaveConstantShape = 12, 203 // The operator has a different number of inputs of the one or ones that 204 // are supported by NNAPI. 205 kUnsupportedOperatorVariant = 13, 206 // The accelerated version of the operator cannot specify an activation 207 // function. 208 kNoActivationExpected = 14, 209 // Quantization scale and/or zero point are not in the supported value(s) 210 // for the accelerated operation. 211 kUnsupportedQuantizationParameters = 15, 212 }; 213 214 215 struct NNAPIValidationFailure { 216 NNAPIValidationFailureType type; 217 std::string message; 218 NNAPIValidationFailureNNAPIValidationFailure219 NNAPIValidationFailure(NNAPIValidationFailureType type, const char* message) 220 : type(type), message(message) {} 221 }; 222 223 // The kernel that represents the node sub set of TF Lite being run on NN API. 224 class NNAPIDelegateKernel { 225 public: NNAPIDelegateKernel(const NnApi * nnapi)226 explicit NNAPIDelegateKernel(const NnApi* nnapi) 227 : initialised_(false), 228 nnapi_(nnapi), 229 nn_model_(nullptr, NNFreeModel(nnapi_)), 230 nn_compilation_(nullptr, NNFreeCompilation(nnapi_)) {} NNAPIDelegateKernel()231 NNAPIDelegateKernel() : NNAPIDelegateKernel(NnApiImplementation()) {} ~NNAPIDelegateKernel()232 ~NNAPIDelegateKernel() { 233 for (auto content : allocation_memory_mapping_) { 234 nnapi_->ANeuralNetworksMemory_free(content.second); 235 } 236 } 237 238 // Translate a node into its operands 239 // It assumes that the call to Validate for has been successful for 240 // the operation. 241 // In case of success it returns kTfLiteOk and stores in n_op_type the 242 // NNAPI Operation code. 243 // Returns kTfLiteError in case of failures during mapping. 244 static TfLiteStatus Map(TfLiteContext* context, int builtin_code, int version, 245 int android_sdk_version, 246 const NNAPIOpMappingArgs& mapping_args, 247 ANeuralNetworksOperationType* nn_op_type); 248 249 // Returns true if the node can be accelerated with NNAPI. 250 static bool Validate( 251 const TfLiteContext* context, int builtin_code, int version, 252 int android_sdk_version, const TfLiteNode* node, 253 bool is_accelerator_specified, 254 // Collects lists of failures collected during 255 // the validation of the possibility of accelerating 256 // the given node 257 std::vector<NNAPIValidationFailure>* map_failures = nullptr); 258 259 // Initialize the kernel (a NN model) and builds the NN Model. 260 // Any NNAPI Related error causing this method to fail will have the 261 // associated error number stored in nnapi_errno 262 TfLiteStatus Init(TfLiteContext* context, const TfLiteDelegateParams* params, 263 int* nnapi_errno); 264 265 // Creates the NNAPI Compilation for the NN model. It assumes that Init has 266 // been called and completed successfully. 267 // Any NNAPI Related error causing this method to fail will have the 268 // associated error number stored in nnapi_errno 269 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node, 270 int* nnapi_errno); 271 272 // Invoke the NN Model. Expects Init and Prepare to have been completed 273 // successfully. 274 // Any NNAPI Related error causing this method to fail will have the 275 // associated error number stored in nnapi_errno 276 TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node, 277 int* nnapi_errno); 278 279 // Returns the list of operations supported by the current NNAPI model as 280 // built in Prepare. Every operation is identified by the index as provided 281 // in the delegate parameters given to the delegate during the Init call. 282 // It expects the Init method has been called and completed successfully and 283 // returns kTfLiteError if not. Returns an error if any of the NNAPI 284 // operations fails or if the 285 // ANeuralNetworksModel_getSupportedOperationsForDevices function is not 286 // available in the NnApi object. 287 TfLiteStatus GetOperationsSupportedByTargetNnApiDevices( 288 TfLiteContext* context, std::vector<int>* supported_nodes, 289 int* nnapi_errno); 290 291 private: 292 // True if initialization has been completed successfully 293 bool initialised_; 294 // Access to NNApi. 295 const NnApi* nnapi_; 296 // ANN device handle. 297 std::vector<ANeuralNetworksDevice*> nnapi_devices_; 298 // Name of the nnapi device, empty if nnapi_devices_ is empty; 299 std::string device_name_; 300 // ANN API state. 301 std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_; 302 std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation> 303 nn_compilation_; 304 // Node indices that this delegate is responsible for. Indices here 305 // indexes into the nodes array in the TfLiteContext. 306 std::vector<int> nodes_; 307 // Track indices we use 308 OperandMapping operand_mapping_; 309 std::map<const MMAPAllocation*, ANeuralNetworksMemory*> 310 allocation_memory_mapping_; 311 // Track memory map 312 const std::vector<StatefulNnApiDelegate::MemoryRegistration>* 313 tensor_memory_map_; 314 std::vector<int> model_state_outputs_; 315 std::vector<int> model_state_tfl_inputs_; 316 // This is the equivalent of the pair model_state_outputs_, 317 // model_state_tfl_inputs_ for all tensors where we have to keep the output 318 // data available for TFLite model users 319 std::vector<std::tuple<int, int>> feedback_loops_; 320 321 std::unique_ptr<NNMemory> nn_input_memory_; 322 std::unique_ptr<NNMemory> nn_output_memory_; 323 324 std::vector<uint8_t> nn_compilation_cache_token_; 325 326 void AddDequantizeOperatorsWhereNeeded(const TfLiteContext* context, 327 int builtin_code, 328 const TfLiteNode* node, 329 NNAPIOpBuilder* builder, 330 int* nnapi_errno); 331 332 TfLiteStatus AddOpsAndTensors(TfLiteContext* context, int* nnapi_errno); 333 334 TfLiteStatus BuildGraph(TfLiteContext* context, 335 const TfLiteIntArray* input_tensors, 336 const TfLiteIntArray* output_tensors, 337 int* nnapi_errno); 338 }; 339 340 } // namespace nnapi 341 } // namespace delegate 342 } // namespace tflite 343 344 #endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_KERNEL_H_ 345