1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ 16 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ 17 18 #include <memory> 19 #include <string> 20 #include <unordered_map> 21 #include <vector> 22 23 #include "tensorflow/lite/c/common.h" 24 #include "tensorflow/lite/delegates/serialization.h" 25 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h" 26 #include "tensorflow/lite/nnapi/nnapi_implementation.h" 27 28 struct NnApiSLDriverImplFL5; 29 typedef struct ANeuralNetworksMemory ANeuralNetworksMemory; 30 31 namespace tflite { 32 33 namespace delegate { 34 namespace nnapi { 35 class NNAPIDelegateKernel; 36 } // namespace nnapi 37 } // namespace delegate 38 39 using tflite::delegate::nnapi::NNAPIDelegateKernel; 40 41 // TFliteDelegate to interface with NNAPI. 42 class StatefulNnApiDelegate : public TfLiteDelegate { 43 public: 44 // Encapsulates all options that are specific to NNAPI delegate. 45 struct Options { 46 // Preferred Power/perf trade-off. For more details please see 47 // ANeuralNetworksCompilation_setPreference documentation in : 48 // https://developer.android.com/ndk/reference/group/neural-networks.html 49 enum ExecutionPreference { 50 kUndefined = -1, 51 kLowPower = 0, 52 kFastSingleAnswer = 1, 53 kSustainedSpeed = 2, 54 }; 55 56 // Preferred Power/perf trade-off. 57 ExecutionPreference execution_preference = kUndefined; 58 59 // Selected NNAPI accelerator with nul-terminated name. 60 // Default to nullptr, which implies the NNAPI default behavior: NNAPI 61 // runtime is allowed to use all available accelerators. If the selected 62 // accelerator cannot be found, NNAPI will not be used. 63 // It is the caller's responsibility to ensure the string is valid for the 64 // duration of the Options object lifetime. 65 const char* accelerator_name = nullptr; 66 67 // The nul-terminated cache dir for NNAPI model. 68 // Default to nullptr, which implies the NNAPI will not try caching the 69 // compilation. 70 const char* cache_dir = nullptr; 71 72 // The unique nul-terminated token string for NNAPI model. 73 // Default to nullptr, which implies the NNAPI will not try caching the 74 // compilation. It is the caller's responsibility to ensure there is no 75 // clash of the tokens. 76 // NOTE: when using compilation caching, it is not recommended to use the 77 // same delegate instance for multiple models. 78 const char* model_token = nullptr; 79 80 // Whether to disallow NNAPI CPU usage. Only effective on Android 10 and 81 // above. The NNAPI CPU typically performs less well than built-in TfLite 82 // kernels, but allowing CPU allows partial acceleration of models. If this 83 // is set to true, NNAPI is only used if the whole model is accelerated. 84 bool disallow_nnapi_cpu = true; 85 86 // Specifies the max number of partitions to delegate. A value <= 0 means 87 // no limit. 88 // If the delegation of the full set of supported nodes would generate a 89 // number of partition greater than this parameter, only 90 // <max_number_delegated_partitions> of them will be actually accelerated. 91 // The selection is currently done sorting partitions in decreasing order 92 // of number of nodes and selecting them until the limit is reached. 93 int max_number_delegated_partitions = 3; 94 95 // allow fp32 compuation to be run in fp16. 96 bool allow_fp16 = false; 97 98 // Specifies the relative priority for executions of the model. 99 // Available values are {ANEURALNETWORKS_PRIORITY_LOW, 100 // ANEURALNETWORKS_PRIORITY_MEDIUM, ANEURALNETWORKS_PRIORITY_HIGH, 101 // ANEURALNETWORKS_PRIORITY_DEFAULT}. 102 int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT; 103 104 // Specifies the maximum expected duration in nanosecond for compiling the 105 // model. If the device is not able to complete the compilation within the 106 // specified duration, the compilation may be aborted. If set to 0, the 107 // timeout duration is considered infinite. 108 uint64_t max_compilation_timeout_duration_ns = 0; 109 110 // Specifies the maximum expected duration in nanosecond for executing the 111 // model. If the device is not able to complete the execution within the 112 // specified duration, the execution may be aborted. If set to 0, the 113 // timeout duration is considered infinite. 114 uint64_t max_execution_timeout_duration_ns = 0; 115 116 // Specifies the maximum expected duration in nanosecond for WHILE loops in 117 // the execution. If a WHILE loop condition model does not output false 118 // within the specified duration, the execution will be aborted. If set to 119 // 0, the default timeout for loops will be used. 120 uint64_t max_execution_loop_timeout_duration_ns = 0; 121 122 // Whether to allow dynamic dimension sizes without re-compilation. 123 // A tensor of with dynamic dimension must have a valid dim_signature 124 // defined. 125 // Only supported in NNAPI 1.1 and newer versions. 126 // WARNING: Setting this flag to true may result in model being rejected by 127 // accelerator. This should only be enabled if the target device supports 128 // dynamic dimensions of the model. 129 bool allow_dynamic_dimensions = false; 130 131 // Force using NNAPI Burst mode if supported. 132 // Burst mode allows accelerators to efficiently manage resources, which 133 // would significantly reduce overhead especially if the same delegate 134 // instance is to be used for multiple inferences. 135 // If NNAPI devices are specified and are of NNAPI feature level 5 or 136 // higher, NNAPI delegate will automatically enable burst mode for better 137 // performance. 138 // Default: Disabled for devices with NNAPI feature level 4 or lower. 139 bool use_burst_computation = false; 140 }; 141 142 // Uses default options. 143 StatefulNnApiDelegate(); 144 145 // The ownership of the NnApi instance is left to the caller of the 146 // StatefulNnApiDelegate constructor; the caller must ensure that the lifetime 147 // of the NnApi instance exceeds the lifetime of the StatefulNnApiDelegate. 148 explicit StatefulNnApiDelegate(const NnApi* nnapi); 149 150 // The constructor that accepts options from user. 151 // This makes a copy of any data that it needs from Options, so 152 // the caller can safely deallocate any storage pointed to by 153 // the 'const char *' members of Options immediately after calling this. 154 explicit StatefulNnApiDelegate(Options options); 155 156 // Constructor that accepts both an NnApi instance and options. 157 // The ownership of the NnApi instance is left to the caller of the 158 // StatefulNnApiDelegate constructor; the caller must ensure that the lifetime 159 // of the NnApi instance exceeds the lifetime of the StatefulNnApiDelegate. 160 // This constructor makes a copy of any data that it needs from Options, so 161 // the caller can safely deallocate any storage pointed to by 162 // the 'const char *' members of Options immediately after calling this. 163 StatefulNnApiDelegate(const NnApi* nnapi, Options options); 164 165 // Constructor that accepts an NnApiSLDriverImplFL5 instance and options. 166 // The ownership of the NnApiSLDriverImplFL5 instance is left to the caller of 167 // the StatefulNnApiDelegate constructor; the caller must ensure that the 168 // lifetime of the NnApiSLDriverImplFL5 instance encompasses all calls to 169 // methods on the StatefulNnApiDelegate instance, other than the destructor. 170 // This constructor makes a copy of any data that it needs from Options, so 171 // the caller can safely deallocate any storage pointed to by 172 // the 'const char *' members of Options immediately after calling this. 173 // 174 // The NN API Support Library Driver must support at least NNAPI Feature Level 175 // 5 (introduced in SDK level 31), but this might point to a compatible struct 176 // that also supports a higher NNAPI Feature Level. These cases can be 177 // distinguished by examining the base.implFeatureLevel field, which should be 178 // set to the supported feature level (which must be >= 179 // ANEURALNETWORKS_FEATURE_LEVEL_5). 180 // 181 // Please note that since NNAPI Support Library doesn't implement some of the 182 // functions (see CreateNnApiFromSupportLibrary implementation and NNAPI SL 183 // documentation for details), the underlying NnApi structure will have 184 // nullptr stored in some of the function pointers. Calling such functions 185 // will result in a crash. 186 // 187 // WARNING: This is an experimental interface that is subject to change. 188 StatefulNnApiDelegate( 189 const NnApiSLDriverImplFL5* nnapi_support_library_driver, 190 Options options); 191 192 ~StatefulNnApiDelegate() = default; 193 194 // Returns the delegate options. 195 // The lifetime of the storage pointed to by the 'const char *' members of the 196 // returned Options object is the same as the lifetime of the supplied 197 // TfLiteDelegate instance. 198 static const Options GetOptions(TfLiteDelegate* delegate); 199 200 // Callback function which copies data from ANeuralNetworksMemory to host 201 // tensor CPU buffer. It is the users responsibility to implement these 202 // callbacks for the specific types of shared memory they intend to use. 203 // WARNING: This is an experimental interface that is subject to change. 204 typedef TfLiteStatus (*CopyToHostTensorFnPtr)(TfLiteTensor* tensor, 205 ANeuralNetworksMemory* memory, 206 size_t memory_offset, 207 size_t byte_size, 208 void* callback_context); 209 210 // Encapsulates all fields related to memory registration for internal 211 // bookkeeping only. 212 struct MemoryRegistration { 213 ANeuralNetworksMemory* memory; 214 CopyToHostTensorFnPtr callback; 215 void* callback_context; 216 }; 217 218 // Register the ANeuralNetworksMemory handle with the delegate. A 219 // TfLiteBufferHandle will be returned to be used with 220 // Interpreter::SetBufferHandle. The callback_context will be passed to the 221 // callback function when invoked. 222 // Note: the returned TfLiteBufferHandle can only be used with a single 223 // Interpreter instance. However, the caller can register the same memory 224 // multiple times to get different handles to use with difference Interpreter 225 // instances 226 // WARNING: This is an experimental interface that is subject to change. 227 TfLiteBufferHandle RegisterNnapiMemory(ANeuralNetworksMemory* memory, 228 CopyToHostTensorFnPtr callback, 229 void* callback_context); 230 231 // Returns the vector of known ANeuralNetworksMemory handles. 232 // Note: this function is not intended to be called by developers. 233 // WARNING: This is an experimental interface that is subject to change. 234 static const std::vector<MemoryRegistration>& GetTensorMemoryMap( 235 TfLiteDelegate* delegate); 236 237 // Returns ptr to delegates::Serialization, if caching is enabled by user via 238 // cache_dir & model_token. 239 static delegates::Serialization* GetCache(TfLiteDelegate* delegate); 240 241 // Returns the int value of the ResultCode returned by the latest 242 // failed call to NNAPI, if any. Zero only in case of NO failed calls since 243 // the construction of this instance of StatefulNnApiDelegate. 244 // The error code is reset when the delegate is re-initialized 245 // (i.e. when calling interpreter.ModifyGraphWithDelegate(delegate)). 246 int GetNnApiErrno() const; 247 248 private: 249 // Encapsulates all delegate data. 250 struct Data { 251 // Pointer to NNAPI implementation to be used by this delegate as 252 // set when building the StatefulNnApiDelegate instance. 253 // Will generally be the NnApiInstance() singleton but can be overridden 254 // for testing or for users needing to wrap or stub parts of NNAPI. 255 // The ownership of the nnapi instance is left to the caller of 256 // the StatefulNnApiDelegate constructor. 257 const NnApi* nnapi; 258 // Preferred Power/perf trade-off. 259 Options::ExecutionPreference execution_preference; 260 // Selected NNAPI accelerator name. 261 std::string accelerator_name; 262 // The cache dir for NNAPI model. 263 std::string cache_dir; 264 // The unique token string for NNAPI model. 265 std::string model_token; 266 // Whether to disallow NNAPI CPU. 267 bool disallow_nnapi_cpu; 268 // Tensor to ANeuralNetworksMemory mapping. 269 std::vector<MemoryRegistration> tensor_memory_map; 270 // Contains a non zero value if any NNAPI method call 271 // operation returned a non zero result code. 272 int nnapi_errno = ANEURALNETWORKS_NO_ERROR; 273 // Cache of kernels already built in StatefulNnApiDelegate::DoPrepare 274 // when trying to understand if all nodes are supported by the target 275 // accelerators. 276 // The key is the index of the first node in the partition. 277 // Couldn't use unique_ptr because of problems building on gcc 278 std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache; 279 // Maximum number of NNAPI partition to delegate. Zero or negative means 280 // no limit. Copied from StatefulNnApiDelegate::Options 281 int max_number_delegated_partitions; 282 // allow fp32 computation to be run in fp16. 283 bool allow_fp16; 284 // Specifies the relative priority for executions of the model. 285 int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT; 286 // Specifies the maximum expected duration in nanosecond for compiling the 287 // model. 288 uint64_t max_compilation_timeout_duration_ns = 0; 289 // Specifies the maximum expected duration in nanosecond for executing the 290 // model. 291 uint64_t max_execution_timeout_duration_ns = 0; 292 // Specifies the maximum expected duration in nanosecond for WHILE loops in 293 // the execution 294 uint64_t max_execution_loop_timeout_duration_ns = 0; 295 // Whether to allow dynamic dimension sizes without re-compilation. 296 bool allow_dynamic_dimensions = false; 297 // Whether to use NNAPI Burst mode. 298 bool use_burst_computation = false; 299 300 // Smart pointer for automatically cleaning up NnApi structure in case the 301 // delegate was constructed from an NNAPI support library 302 std::unique_ptr<const NnApi> owned_nnapi = nullptr; 303 304 // TFLite Serialization in case caching has been enabled by the user through 305 // Options. 306 std::unique_ptr<delegates::Serialization> cache; 307 308 explicit Data(const NnApi* nnapi); 309 explicit Data(std::unique_ptr<const NnApi> nnapi); 310 ~Data(); 311 312 // Caches an initialised NNAPIDelegateKernel. 313 void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params, 314 NNAPIDelegateKernel* delegate_state); 315 // Returns a cached NNAPIDelegateKernel if available and removes it 316 // from the cache transferring the ownership to the caller. 317 NNAPIDelegateKernel* MaybeGetCachedDelegateKernel( 318 const TfLiteDelegateParams* delegate_params); 319 }; 320 321 // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate 322 // documentation for more info. 323 static TfLiteStatus DoPrepare(TfLiteContext* context, 324 TfLiteDelegate* delegate); 325 326 // Copy the data from delegate buffer handle into raw memory of the given 327 // 'tensor'. The delegate is allowed to allocate the raw 328 // bytes as long as it follows the rules for kTfLiteDynamic tensors. 329 static TfLiteStatus DoCopyFromBufferHandle(TfLiteContext* context, 330 TfLiteDelegate* delegate, 331 TfLiteBufferHandle buffer_handle, 332 TfLiteTensor* tensor); 333 334 // Copy the data from raw memory of the given 'tensor' to delegate buffer 335 // handle. Currently this function is not supported, and calling the function 336 // will result in an error. 337 static TfLiteStatus DoCopyToBufferHandle(TfLiteContext* context, 338 TfLiteDelegate* delegate, 339 TfLiteBufferHandle buffer_handle, 340 TfLiteTensor* tensor); 341 342 // Free the Delegate Buffer Handle. Note: This only frees the handle, but 343 // this doesn't release the underlying resource (e.g. textures). The 344 // resources are either owned by application layer or the delegate. 345 static void DoFreeBufferHandle(TfLiteContext* context, 346 TfLiteDelegate* delegate, 347 TfLiteBufferHandle* handle); 348 349 // Returns the nodes that can be delegated via NNAPI to the accelerator 350 // specified in the delegate options and information about the way the 351 // graph will be partitioned if the supported nodes will be delegated. 352 // Partition information is composed by the number of partitions and 353 // the delegate parameters associated to each partition. 354 // The method also caches in delegate->data the NNApiDelegateKernel instances 355 // that have been created during the device evaluation. 356 // All arguments are expected to be non-null. 357 static TfLiteStatus GetNodesSupportedByAccelerator( 358 TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi, 359 const std::vector<int>& supported_nodes, 360 std::vector<int>* device_supported_nodes, int* num_partitions, 361 TfLiteDelegateParams** params_array, int* nnapi_errno); 362 363 // Alters the given array of nodes_to_delegate to limit the number of NNAPI 364 // owned partition to be less or equal than num_partitions. If num_partitions 365 // is less or equal to zero the input is left unaltered. 366 // The nodes_to_delegate array is expected to contain at element 0 the number 367 // of nodes to delegate and in remaining elements the set of nodes 368 // that would be delegated to NNAPI if this function wouldn't be 369 // called. It will be altered storing in the first element the count of 370 // nodes to actually delegate and in the remainder of the array the indexes. 371 // The params_array params might be altered during the functions execution. 372 static TfLiteStatus LimitDelegatedPartitions( 373 int max_partitions, 374 std::vector<TfLiteDelegateParams> partition_params_array, 375 std::vector<int>* nodes_to_delegate); 376 377 void StatefulNnApiDelegateConstructorImpl(const Options& options); 378 379 // Delegate data presented through TfLiteDelegate::data_. 380 Data delegate_data_; 381 }; 382 383 // DEPRECATED: Please use StatefulNnApiDelegate class instead. 384 // 385 // Returns a singleton delegate that can be used to use the NN API. 386 // e.g. 387 // NnApiDelegate* delegate = NnApiDelegate(); 388 // interpreter->ModifyGraphWithDelegate(&delegate); 389 // NnApiDelegate() returns a singleton, so you should not free this 390 // pointer or worry about its lifetime. 391 TfLiteDelegate* NnApiDelegate(); 392 393 } // namespace tflite 394 395 #endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ 396