1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ 16 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ 17 18 #include <memory> 19 #include <string> 20 #include <unordered_map> 21 #include <vector> 22 23 #include "tensorflow/lite/c/common.h" 24 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h" 25 #include "tensorflow/lite/nnapi/nnapi_implementation.h" 26 27 typedef struct ANeuralNetworksMemory ANeuralNetworksMemory; 28 29 namespace tflite { 30 31 namespace delegate { 32 namespace nnapi { 33 class NNAPIDelegateKernel; 34 } // namespace nnapi 35 } // namespace delegate 36 37 using tflite::delegate::nnapi::NNAPIDelegateKernel; 38 39 // TFliteDelegate to interface with NNAPI. 40 class StatefulNnApiDelegate : public TfLiteDelegate { 41 public: 42 // Encapsulates all options that are specific to NNAPI delegate. 43 struct Options { 44 // Preferred Power/perf trade-off. For more details please see 45 // ANeuralNetworksCompilation_setPreference documentation in : 46 // https://developer.android.com/ndk/reference/group/neural-networks.html 47 enum ExecutionPreference { 48 kUndefined = -1, 49 kLowPower = 0, 50 kFastSingleAnswer = 1, 51 kSustainedSpeed = 2, 52 }; 53 54 // Preferred Power/perf trade-off. 55 ExecutionPreference execution_preference = kUndefined; 56 57 // Selected NNAPI accelerator with nul-terminated name. 58 // Default to nullptr, which implies the NNAPI default behavior: NNAPI 59 // runtime is allowed to use all available accelerators. If the selected 60 // accelerator cannot be found, NNAPI will not be used. 61 // It is the caller's responsibility to ensure the string is valid for the 62 // duration of the Options object lifetime. 63 const char* accelerator_name = nullptr; 64 65 // The nul-terminated cache dir for NNAPI model. 66 // Default to nullptr, which implies the NNAPI will not try caching the 67 // compilation. 68 const char* cache_dir = nullptr; 69 70 // The unique nul-terminated token string for NNAPI model. 71 // Default to nullptr, which implies the NNAPI will not try caching the 72 // compilation. It is the caller's responsibility to ensure there is no 73 // clash of the tokens. 74 // NOTE: when using compilation caching, it is not recommended to use the 75 // same delegate instance for multiple models. 76 const char* model_token = nullptr; 77 78 // Whether to disallow NNAPI CPU usage. Only effective on Android 10 and 79 // above. The NNAPI CPU typically performs less well than built-in TfLite 80 // kernels, but allowing CPU allows partial acceleration of models. If this 81 // is set to true, NNAPI is only used if the whole model is accelerated. 82 bool disallow_nnapi_cpu = true; 83 84 // Specifies the max number of partitions to delegate. A value <= 0 means 85 // no limit. 86 // If the delegation of the full set of supported nodes would generate a 87 // number of partition greater than this parameter, only 88 // <max_number_delegated_partitions> of them will be actually accelerated. 89 // The selection is currently done sorting partitions in decreasing order 90 // of number of nodes and selecting them until the limit is reached. 91 int max_number_delegated_partitions = 3; 92 93 // allow fp32 compuation to be run in fp16. 94 bool allow_fp16 = false; 95 96 // Specifies the relative priority for executions of the model. 97 // Available values are {ANEURALNETWORKS_PRIORITY_LOW, 98 // ANEURALNETWORKS_PRIORITY_MEDIUM, ANEURALNETWORKS_PRIORITY_HIGH, 99 // ANEURALNETWORKS_PRIORITY_DEFAULT}. 100 int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT; 101 102 // Specifies the maximum expected duration in nanosecond for compiling the 103 // model. If the device is not able to complete the compilation within the 104 // specified duration, the compilation may be aborted. If set to 0, the 105 // timeout duration is considered infinite. 106 uint64_t max_compilation_timeout_duration_ns = 0; 107 108 // Specifies the maximum expected duration in nanosecond for executing the 109 // model. If the device is not able to complete the execution within the 110 // specified duration, the execution may be aborted. If set to 0, the 111 // timeout duration is considered infinite. 112 uint64_t max_execution_timeout_duration_ns = 0; 113 114 // Specifies the maximum expected duration in nanosecond for WHILE loops in 115 // the execution. If a WHILE loop condition model does not output false 116 // within the specified duration, the execution will be aborted. If set to 117 // 0, the default timeout for loops will be used. 118 uint64_t max_execution_loop_timeout_duration_ns = 0; 119 120 // Whether to allow dynamic dimension sizes without re-compilation. 121 // A tensor of with dynamic dimension must have a valid dim_signature 122 // defined. 123 // Only supported in NNAPI 1.1 and newer versions. 124 // WARNING: Setting this flag to true may result in model being rejected by 125 // accelerator. This should only be enabled if the target device supports 126 // dynamic dimensions of the model. 127 bool allow_dynamic_dimensions = false; 128 }; 129 130 // Uses default options. 131 StatefulNnApiDelegate(); 132 133 // The ownership of the NnApi instance is left to the caller of the 134 // StatefulNnApiDelegate constructor; the caller must ensure that the lifetime 135 // of the NnApi instance exceeds the lifetime of the StatefulNnApiDelegate. 136 explicit StatefulNnApiDelegate(const NnApi* nnapi); 137 138 // The constructor that accepts options from user. 139 // This makes a copy of any data that it needs from Options, so 140 // the caller can safely deallocate any storage pointed to by 141 // the 'const char *' members of Options immediately after calling this. 142 explicit StatefulNnApiDelegate(Options options); 143 144 // Constructor that accepts both an NnApi instance and options. 145 // The ownership of the NnApi instance is left to the caller of the 146 // StatefulNnApiDelegate constructor; the caller must ensure that the lifetime 147 // of the NnApi instance exceeds the lifetime of the StatefulNnApiDelegate. 148 // This constructor makes a copy of any data that it needs from Options, so 149 // the caller can safely deallocate any storage pointed to by 150 // the 'const char *' members of Options immediately after calling this. 151 StatefulNnApiDelegate(const NnApi* nnapi, Options options); 152 153 ~StatefulNnApiDelegate() = default; 154 155 // Returns the delegate options. 156 // The lifetime of the storage pointed to by the 'const char *' members of the 157 // returned Options object is the same as the lifetime of the supplied 158 // TfLiteDelegate instance. 159 static const Options GetOptions(TfLiteDelegate* delegate); 160 161 // Callback function which copies data from ANeuralNetworksMemory to host 162 // tensor CPU buffer. It is the users responsibility to implement these 163 // callbacks for the specific types of shared memory they intend to use. 164 // WARNING: This is an experimental interface that is subject to change. 165 typedef TfLiteStatus (*CopyToHostTensorFnPtr)(TfLiteTensor* tensor, 166 ANeuralNetworksMemory* memory, 167 size_t memory_offset, 168 size_t byte_size, 169 void* callback_context); 170 171 // Encapsulates all fields related to memory registration for internal 172 // bookkeeping only. 173 struct MemoryRegistration { 174 ANeuralNetworksMemory* memory; 175 CopyToHostTensorFnPtr callback; 176 void* callback_context; 177 }; 178 179 // Register the ANeuralNetworksMemory handle with the delegate. A 180 // TfLiteBufferHandle will be returned to be used with 181 // Interpreter::SetBufferHandle. The callback_context will be passed to the 182 // callback function when invoked. 183 // Note: the returned TfLiteBufferHandle can only be used with a single 184 // Interpreter instance. However, the caller can register the same memory 185 // multiple times to get different handles to use with difference Interpreter 186 // instances 187 // WARNING: This is an experimental interface that is subject to change. 188 TfLiteBufferHandle RegisterNnapiMemory(ANeuralNetworksMemory* memory, 189 CopyToHostTensorFnPtr callback, 190 void* callback_context); 191 192 // Returns the vector of known ANeuralNetworksMemory handles. 193 // Note: this function is not intended to be called by developers. 194 // WARNING: This is an experimental interface that is subject to change. 195 static const std::vector<MemoryRegistration>& GetTensorMemoryMap( 196 TfLiteDelegate* delegate); 197 198 // Returns the int value of the ResultCode returned by the latest 199 // failed call to NNAPI, if any. Zero only in case of NO failed calls since 200 // the construction of this instance of StatefulNnApiDelegate. 201 // The error code is reset when the delegate is re-initialized 202 // (i.e. when calling interpreter.ModifyGraphWithDelegate(delegate)). 203 int GetNnApiErrno() const; 204 205 private: 206 // Encapsulates all delegate data. 207 struct Data { 208 // Pointer to NNAPI implementation to be used by this delegate as 209 // set when building the StatefulNnApiDelegate instance. 210 // Will generally be the NnApiInstance() singleton but can be overridden 211 // for testing or for users needing to wrap or stub parts of NNAPI. 212 // The ownership of the nnapi instance is left to the caller of 213 // the StatefulNnApiDelegate constructor. 214 const NnApi* nnapi; 215 // Preferred Power/perf trade-off. 216 Options::ExecutionPreference execution_preference; 217 // Selected NNAPI accelerator name. 218 std::string accelerator_name; 219 // The cache dir for NNAPI model. 220 std::string cache_dir; 221 // The unique token string for NNAPI model. 222 std::string model_token; 223 // Whether to disallow NNAPI CPU. 224 bool disallow_nnapi_cpu; 225 // Tensor to ANeuralNetworksMemory mapping. 226 std::vector<MemoryRegistration> tensor_memory_map; 227 // Contains a non zero value if any NNAPI method call 228 // operation returned a non zero result code. 229 int nnapi_errno = ANEURALNETWORKS_NO_ERROR; 230 // Cache of kernels already built in StatefulNnApiDelegate::DoPrepare 231 // when trying to understand if all nodes are supported by the target 232 // accelerators. 233 // The key is the index of the first node in the partition. 234 // Couldn't use unique_ptr because of problems building on gcc 235 std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache; 236 // Maximum number of NNAPI partition to delegate. Zero or negative means 237 // no limit. Copied from StatefulNnApiDelegate::Options 238 int max_number_delegated_partitions; 239 // allow fp32 computation to be run in fp16. 240 bool allow_fp16; 241 // Specifies the relative priority for executions of the model. 242 int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT; 243 // Specifies the maximum expected duration in nanosecond for compiling the 244 // model. 245 uint64_t max_compilation_timeout_duration_ns = 0; 246 // Specifies the maximum expected duration in nanosecond for executing the 247 // model. 248 uint64_t max_execution_timeout_duration_ns = 0; 249 // Specifies the maximum expected duration in nanosecond for WHILE loops in 250 // the execution 251 uint64_t max_execution_loop_timeout_duration_ns = 0; 252 // Whether to allow dynamic dimension sizes without re-compilation. 253 bool allow_dynamic_dimensions = false; 254 255 explicit Data(const NnApi* nnapi); 256 ~Data(); 257 258 // Caches an initialised NNAPIDelegateKernel. 259 void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params, 260 NNAPIDelegateKernel* delegate_state); 261 // Returns a cached NNAPIDelegateKernel if available and removes it 262 // from the cache transferring the ownership to the caller. 263 NNAPIDelegateKernel* MaybeGetCachedDelegateKernel( 264 const TfLiteDelegateParams* delegate_params); 265 }; 266 267 // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate 268 // documentation for more info. 269 static TfLiteStatus DoPrepare(TfLiteContext* context, 270 TfLiteDelegate* delegate); 271 272 // Copy the data from delegate buffer handle into raw memory of the given 273 // 'tensor'. The delegate is allowed to allocate the raw 274 // bytes as long as it follows the rules for kTfLiteDynamic tensors. 275 static TfLiteStatus DoCopyFromBufferHandle(TfLiteContext* context, 276 TfLiteDelegate* delegate, 277 TfLiteBufferHandle buffer_handle, 278 TfLiteTensor* tensor); 279 280 // Copy the data from raw memory of the given 'tensor' to delegate buffer 281 // handle. Currently this function is not supported, and calling the function 282 // will result in an error. 283 static TfLiteStatus DoCopyToBufferHandle(TfLiteContext* context, 284 TfLiteDelegate* delegate, 285 TfLiteBufferHandle buffer_handle, 286 TfLiteTensor* tensor); 287 288 // Free the Delegate Buffer Handle. Note: This only frees the handle, but 289 // this doesn't release the underlying resource (e.g. textures). The 290 // resources are either owned by application layer or the delegate. 291 static void DoFreeBufferHandle(TfLiteContext* context, 292 TfLiteDelegate* delegate, 293 TfLiteBufferHandle* handle); 294 295 // Returns the nodes that can be delegated via NNAPI to the accelerator 296 // specified in the delegate options and information about the way the 297 // graph will be partitioned if the supported nodes will be delegated. 298 // Partition information is composed by the number of partitions and 299 // the delegate parameters associated to each partition. 300 // The method also caches in delegate->data the NNApiDelegateKernel instances 301 // that have been created during the device evaluation. 302 // All arguments are expected to be non-null. 303 static TfLiteStatus GetNodesSupportedByAccelerator( 304 TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi, 305 const std::vector<int>& supported_nodes, 306 std::vector<int>* device_supported_nodes, int* num_partitions, 307 TfLiteDelegateParams** params_array, int* nnapi_errno); 308 309 // Alters the given array of nodes_to_delegate to limit the number of NNAPI 310 // owned partition to be less or equal than num_partitions. If num_partitions 311 // is less or equal to zero the input is left unaltered. 312 // The nodes_to_delegate array is expected to contain at element 0 the number 313 // of nodes to delegate and in remaining elements the set of nodes 314 // that would be delegated to NNAPI if this function wouldn't be 315 // called. It will be altered storing in the first element the count of 316 // nodes to actually delegate and in the remainder of the array the indexes. 317 // The params_array params might be altered during the functions execution. 318 static TfLiteStatus LimitDelegatedPartitions( 319 int max_partitions, 320 std::vector<TfLiteDelegateParams> partition_params_array, 321 std::vector<int>* nodes_to_delegate); 322 323 // Delegate data presented through TfLiteDelegate::data_. 324 Data delegate_data_; 325 }; 326 327 // DEPRECATED: Please use StatefulNnApiDelegate class instead. 328 // 329 // Returns a singleton delegate that can be used to use the NN API. 330 // e.g. 331 // NnApiDelegate* delegate = NnApiDelegate(); 332 // interpreter->ModifyGraphWithDelegate(&delegate); 333 // NnApiDelegate() returns a singleton, so you should not free this 334 // pointer or worry about its lifetime. 335 TfLiteDelegate* NnApiDelegate(); 336 337 } // namespace tflite 338 339 #endif // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_ 340