• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_
16 #define TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_
17 
18 #include <memory>
19 #include <string>
20 #include <unordered_map>
21 #include <vector>
22 
23 #include "tensorflow/lite/c/common.h"
24 #include "tensorflow/lite/delegates/serialization.h"
25 #include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
26 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
27 
28 struct NnApiSLDriverImplFL5;
29 typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
30 
31 namespace tflite {
32 
33 namespace delegate {
34 namespace nnapi {
35 class NNAPIDelegateKernel;
36 }  // namespace nnapi
37 }  // namespace delegate
38 
39 using tflite::delegate::nnapi::NNAPIDelegateKernel;
40 
41 // TFliteDelegate to interface with NNAPI.
42 class StatefulNnApiDelegate : public TfLiteDelegate {
43  public:
44   // Encapsulates all options that are specific to NNAPI delegate.
45   struct Options {
46     // Preferred Power/perf trade-off. For more details please see
47     // ANeuralNetworksCompilation_setPreference documentation in :
48     // https://developer.android.com/ndk/reference/group/neural-networks.html
49     enum ExecutionPreference {
50       kUndefined = -1,
51       kLowPower = 0,
52       kFastSingleAnswer = 1,
53       kSustainedSpeed = 2,
54     };
55 
56     // Preferred Power/perf trade-off.
57     ExecutionPreference execution_preference = kUndefined;
58 
59     // Selected NNAPI accelerator with nul-terminated name.
60     // Default to nullptr, which implies the NNAPI default behavior: NNAPI
61     // runtime is allowed to use all available accelerators. If the selected
62     // accelerator cannot be found, NNAPI will not be used.
63     // It is the caller's responsibility to ensure the string is valid for the
64     // duration of the Options object lifetime.
65     const char* accelerator_name = nullptr;
66 
67     // The nul-terminated cache dir for NNAPI model.
68     // Default to nullptr, which implies the NNAPI will not try caching the
69     // compilation.
70     const char* cache_dir = nullptr;
71 
72     // The unique nul-terminated token string for NNAPI model.
73     // Default to nullptr, which implies the NNAPI will not try caching the
74     // compilation. It is the caller's responsibility to ensure there is no
75     // clash of the tokens.
76     // NOTE: when using compilation caching, it is not recommended to use the
77     // same delegate instance for multiple models.
78     const char* model_token = nullptr;
79 
80     // Whether to disallow NNAPI CPU usage. Only effective on Android 10 and
81     // above. The NNAPI CPU typically performs less well than built-in TfLite
82     // kernels, but allowing CPU allows partial acceleration of models. If this
83     // is set to true, NNAPI is only used if the whole model is accelerated.
84     bool disallow_nnapi_cpu = true;
85 
86     // Specifies the max number of partitions to delegate. A value <= 0 means
87     // no limit.
88     // If the delegation of the full set of supported nodes would generate a
89     // number of partition greater than this parameter, only
90     // <max_number_delegated_partitions> of them will be actually accelerated.
91     // The selection is currently done sorting partitions in decreasing order
92     // of number of nodes and selecting them until the limit is reached.
93     int max_number_delegated_partitions = 3;
94 
95     // allow fp32 compuation to be run in fp16.
96     bool allow_fp16 = false;
97 
98     // Specifies the relative priority for executions of the model.
99     // Available values are {ANEURALNETWORKS_PRIORITY_LOW,
100     // ANEURALNETWORKS_PRIORITY_MEDIUM, ANEURALNETWORKS_PRIORITY_HIGH,
101     // ANEURALNETWORKS_PRIORITY_DEFAULT}.
102     int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
103 
104     // Specifies the maximum expected duration in nanosecond for compiling the
105     // model. If the device is not able to complete the compilation within the
106     // specified duration, the compilation may be aborted. If set to 0, the
107     // timeout duration is considered infinite.
108     uint64_t max_compilation_timeout_duration_ns = 0;
109 
110     // Specifies the maximum expected duration in nanosecond for executing the
111     // model. If the device is not able to complete the execution within the
112     // specified duration, the execution may be aborted. If set to 0, the
113     // timeout duration is considered infinite.
114     uint64_t max_execution_timeout_duration_ns = 0;
115 
116     // Specifies the maximum expected duration in nanosecond for WHILE loops in
117     // the execution. If a WHILE loop condition model does not output false
118     // within the specified duration, the execution will be aborted. If set to
119     // 0, the default timeout for loops will be used.
120     uint64_t max_execution_loop_timeout_duration_ns = 0;
121 
122     // Whether to allow dynamic dimension sizes without re-compilation.
123     // A tensor of with dynamic dimension must have a valid dim_signature
124     // defined.
125     // Only supported in NNAPI 1.1 and newer versions.
126     // WARNING: Setting this flag to true may result in model being rejected by
127     // accelerator. This should only be enabled if the target device supports
128     // dynamic dimensions of the model.
129     bool allow_dynamic_dimensions = false;
130 
131     // Force using NNAPI Burst mode if supported.
132     // Burst mode allows accelerators to efficiently manage resources, which
133     // would significantly reduce overhead especially if the same delegate
134     // instance is to be used for multiple inferences.
135     // If NNAPI devices are specified and are of NNAPI feature level 5 or
136     // higher, NNAPI delegate will automatically enable burst mode for better
137     // performance.
138     // Default: Disabled for devices with NNAPI feature level 4 or lower.
139     bool use_burst_computation = false;
140   };
141 
142   // Uses default options.
143   StatefulNnApiDelegate();
144 
145   // The ownership of the NnApi instance is left to the caller of the
146   // StatefulNnApiDelegate constructor; the caller must ensure that the lifetime
147   // of the NnApi instance exceeds the lifetime of the StatefulNnApiDelegate.
148   explicit StatefulNnApiDelegate(const NnApi* nnapi);
149 
150   // The constructor that accepts options from user.
151   // This makes a copy of any data that it needs from Options, so
152   // the caller can safely deallocate any storage pointed to by
153   // the 'const char *' members of Options immediately after calling this.
154   explicit StatefulNnApiDelegate(Options options);
155 
156   // Constructor that accepts both an NnApi instance and options.
157   // The ownership of the NnApi instance is left to the caller of the
158   // StatefulNnApiDelegate constructor; the caller must ensure that the lifetime
159   // of the NnApi instance exceeds the lifetime of the StatefulNnApiDelegate.
160   // This constructor makes a copy of any data that it needs from Options, so
161   // the caller can safely deallocate any storage pointed to by
162   // the 'const char *' members of Options immediately after calling this.
163   StatefulNnApiDelegate(const NnApi* nnapi, Options options);
164 
165   // Constructor that accepts an NnApiSLDriverImplFL5 instance and options.
166   // The ownership of the NnApiSLDriverImplFL5 instance is left to the caller of
167   // the StatefulNnApiDelegate constructor; the caller must ensure that the
168   // lifetime of the NnApiSLDriverImplFL5 instance encompasses all calls to
169   // methods on the StatefulNnApiDelegate instance, other than the destructor.
170   // This constructor makes a copy of any data that it needs from Options, so
171   // the caller can safely deallocate any storage pointed to by
172   // the 'const char *' members of Options immediately after calling this.
173   //
174   // The NN API Support Library Driver must support at least NNAPI Feature Level
175   // 5 (introduced in SDK level 31), but this might point to a compatible struct
176   // that also supports a higher NNAPI Feature Level. These cases can be
177   // distinguished by examining the base.implFeatureLevel field, which should be
178   // set to the supported feature level (which must be >=
179   // ANEURALNETWORKS_FEATURE_LEVEL_5).
180   //
181   // Please note that since NNAPI Support Library doesn't implement some of the
182   // functions (see CreateNnApiFromSupportLibrary implementation and NNAPI SL
183   // documentation for details), the underlying NnApi structure will have
184   // nullptr stored in some of the function pointers. Calling such functions
185   // will result in a crash.
186   //
187   // WARNING: This is an experimental interface that is subject to change.
188   StatefulNnApiDelegate(
189       const NnApiSLDriverImplFL5* nnapi_support_library_driver,
190       Options options);
191 
192   ~StatefulNnApiDelegate() = default;
193 
194   // Returns the delegate options.
195   // The lifetime of the storage pointed to by the 'const char *' members of the
196   // returned Options object is the same as the lifetime of the supplied
197   // TfLiteDelegate instance.
198   static const Options GetOptions(TfLiteDelegate* delegate);
199 
200   // Callback function which copies data from ANeuralNetworksMemory to host
201   // tensor CPU buffer. It is the users responsibility to implement these
202   // callbacks for the specific types of shared memory they intend to use.
203   // WARNING: This is an experimental interface that is subject to change.
204   typedef TfLiteStatus (*CopyToHostTensorFnPtr)(TfLiteTensor* tensor,
205                                                 ANeuralNetworksMemory* memory,
206                                                 size_t memory_offset,
207                                                 size_t byte_size,
208                                                 void* callback_context);
209 
210   // Encapsulates all fields related to memory registration for internal
211   // bookkeeping only.
212   struct MemoryRegistration {
213     ANeuralNetworksMemory* memory;
214     CopyToHostTensorFnPtr callback;
215     void* callback_context;
216   };
217 
218   // Register the ANeuralNetworksMemory handle with the delegate. A
219   // TfLiteBufferHandle will be returned to be used with
220   // Interpreter::SetBufferHandle. The callback_context will be passed to the
221   // callback function when invoked.
222   // Note: the returned TfLiteBufferHandle can only be used with a single
223   // Interpreter instance. However, the caller can register the same memory
224   // multiple times to get different handles to use with difference Interpreter
225   // instances
226   // WARNING: This is an experimental interface that is subject to change.
227   TfLiteBufferHandle RegisterNnapiMemory(ANeuralNetworksMemory* memory,
228                                          CopyToHostTensorFnPtr callback,
229                                          void* callback_context);
230 
231   // Returns the vector of known ANeuralNetworksMemory handles.
232   // Note: this function is not intended to be called by developers.
233   // WARNING: This is an experimental interface that is subject to change.
234   static const std::vector<MemoryRegistration>& GetTensorMemoryMap(
235       TfLiteDelegate* delegate);
236 
237   // Returns ptr to delegates::Serialization, if caching is enabled by user via
238   // cache_dir & model_token.
239   static delegates::Serialization* GetCache(TfLiteDelegate* delegate);
240 
241   // Returns the int value of the ResultCode returned by the latest
242   // failed call to NNAPI, if any. Zero only in case of NO failed calls since
243   // the construction of this instance of StatefulNnApiDelegate.
244   // The error code is reset when the delegate is re-initialized
245   // (i.e. when calling interpreter.ModifyGraphWithDelegate(delegate)).
246   int GetNnApiErrno() const;
247 
248  private:
249   // Encapsulates all delegate data.
250   struct Data {
251     // Pointer to NNAPI implementation to be used by this delegate as
252     // set when building the StatefulNnApiDelegate instance.
253     // Will generally be the NnApiInstance() singleton but can be overridden
254     // for testing or for users needing to wrap or stub parts of NNAPI.
255     // The ownership of the nnapi instance is left to the caller of
256     // the StatefulNnApiDelegate constructor.
257     const NnApi* nnapi;
258     // Preferred Power/perf trade-off.
259     Options::ExecutionPreference execution_preference;
260     // Selected NNAPI accelerator name.
261     std::string accelerator_name;
262     // The cache dir for NNAPI model.
263     std::string cache_dir;
264     // The unique token string for NNAPI model.
265     std::string model_token;
266     // Whether to disallow NNAPI CPU.
267     bool disallow_nnapi_cpu;
268     // Tensor to ANeuralNetworksMemory mapping.
269     std::vector<MemoryRegistration> tensor_memory_map;
270     // Contains a non zero value if any NNAPI method call
271     // operation returned a non zero result code.
272     int nnapi_errno = ANEURALNETWORKS_NO_ERROR;
273     // Cache of kernels already built in StatefulNnApiDelegate::DoPrepare
274     // when trying to understand if all nodes are supported by the target
275     // accelerators.
276     // The key is the index of the first node in the partition.
277     // Couldn't use unique_ptr because of problems building on gcc
278     std::unordered_map<int, NNAPIDelegateKernel*> delegate_state_cache;
279     // Maximum number of NNAPI partition to delegate. Zero or negative means
280     // no limit. Copied from StatefulNnApiDelegate::Options
281     int max_number_delegated_partitions;
282     // allow fp32 computation to be run in fp16.
283     bool allow_fp16;
284     // Specifies the relative priority for executions of the model.
285     int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
286     // Specifies the maximum expected duration in nanosecond for compiling the
287     // model.
288     uint64_t max_compilation_timeout_duration_ns = 0;
289     // Specifies the maximum expected duration in nanosecond for executing the
290     // model.
291     uint64_t max_execution_timeout_duration_ns = 0;
292     // Specifies the maximum expected duration in nanosecond for WHILE loops in
293     // the execution
294     uint64_t max_execution_loop_timeout_duration_ns = 0;
295     // Whether to allow dynamic dimension sizes without re-compilation.
296     bool allow_dynamic_dimensions = false;
297     // Whether to use NNAPI Burst mode.
298     bool use_burst_computation = false;
299 
300     // Smart pointer for automatically cleaning up NnApi structure in case the
301     // delegate was constructed from an NNAPI support library
302     std::unique_ptr<const NnApi> owned_nnapi = nullptr;
303 
304     // TFLite Serialization in case caching has been enabled by the user through
305     // Options.
306     std::unique_ptr<delegates::Serialization> cache;
307 
308     explicit Data(const NnApi* nnapi);
309     explicit Data(std::unique_ptr<const NnApi> nnapi);
310     ~Data();
311 
312     // Caches an initialised NNAPIDelegateKernel.
313     void CacheDelegateKernel(const TfLiteDelegateParams* delegate_params,
314                              NNAPIDelegateKernel* delegate_state);
315     // Returns a cached NNAPIDelegateKernel if available and removes it
316     // from the cache transferring the ownership to the caller.
317     NNAPIDelegateKernel* MaybeGetCachedDelegateKernel(
318         const TfLiteDelegateParams* delegate_params);
319   };
320 
321   // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate
322   // documentation for more info.
323   static TfLiteStatus DoPrepare(TfLiteContext* context,
324                                 TfLiteDelegate* delegate);
325 
326   // Copy the data from delegate buffer handle into raw memory of the given
327   // 'tensor'. The delegate is allowed to allocate the raw
328   // bytes as long as it follows the rules for kTfLiteDynamic tensors.
329   static TfLiteStatus DoCopyFromBufferHandle(TfLiteContext* context,
330                                              TfLiteDelegate* delegate,
331                                              TfLiteBufferHandle buffer_handle,
332                                              TfLiteTensor* tensor);
333 
334   // Copy the data from raw memory of the given 'tensor' to delegate buffer
335   // handle. Currently this function is not supported, and calling the function
336   // will result in an error.
337   static TfLiteStatus DoCopyToBufferHandle(TfLiteContext* context,
338                                            TfLiteDelegate* delegate,
339                                            TfLiteBufferHandle buffer_handle,
340                                            TfLiteTensor* tensor);
341 
342   // Free the Delegate Buffer Handle. Note: This only frees the handle, but
343   // this doesn't release the underlying resource (e.g. textures). The
344   // resources are either owned by application layer or the delegate.
345   static void DoFreeBufferHandle(TfLiteContext* context,
346                                  TfLiteDelegate* delegate,
347                                  TfLiteBufferHandle* handle);
348 
349   // Returns the nodes that can be delegated via NNAPI to the accelerator
350   // specified in the delegate options and information about the way the
351   // graph will be partitioned if the supported nodes will be delegated.
352   // Partition information is composed by the number of partitions and
353   // the delegate parameters associated to each partition.
354   // The method also caches in delegate->data the NNApiDelegateKernel instances
355   // that have been created during the device evaluation.
356   // All arguments are expected to be non-null.
357   static TfLiteStatus GetNodesSupportedByAccelerator(
358       TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
359       const std::vector<int>& supported_nodes,
360       std::vector<int>* device_supported_nodes, int* num_partitions,
361       TfLiteDelegateParams** params_array, int* nnapi_errno);
362 
363   // Alters the given array of nodes_to_delegate to limit the number of NNAPI
364   // owned partition to be less or equal than num_partitions. If num_partitions
365   // is less or equal to zero the input is left unaltered.
366   // The nodes_to_delegate array is expected to contain at element 0 the number
367   // of nodes to delegate and in remaining elements the set of nodes
368   // that would be delegated to NNAPI if this function wouldn't be
369   // called. It will be altered storing in the first element the count of
370   // nodes to actually delegate and in the remainder of the array the indexes.
371   // The params_array params might be altered during the functions execution.
372   static TfLiteStatus LimitDelegatedPartitions(
373       int max_partitions,
374       std::vector<TfLiteDelegateParams> partition_params_array,
375       std::vector<int>* nodes_to_delegate);
376 
377   void StatefulNnApiDelegateConstructorImpl(const Options& options);
378 
379   // Delegate data presented through TfLiteDelegate::data_.
380   Data delegate_data_;
381 };
382 
383 // DEPRECATED: Please use StatefulNnApiDelegate class instead.
384 //
385 // Returns a singleton delegate that can be used to use the NN API.
386 // e.g.
387 //   NnApiDelegate* delegate = NnApiDelegate();
388 //   interpreter->ModifyGraphWithDelegate(&delegate);
389 // NnApiDelegate() returns a singleton, so you should not free this
390 // pointer or worry about its lifetime.
391 TfLiteDelegate* NnApiDelegate();
392 
393 }  // namespace tflite
394 
395 #endif  // TENSORFLOW_LITE_DELEGATES_NNAPI_NNAPI_DELEGATE_H_
396