• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
17 #define TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
18 
19 #include "tensorflow/lite/c/common.h"
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif  // __cplusplus
24 
25 // Enable XNNPACK acceleration for signed quantized 8-bit inference.
26 // This includes operators with channel-wise quantized weights.
27 #define TFLITE_XNNPACK_DELEGATE_FLAG_QS8 0x00000001
28 // Enable XNNPACK acceleration for unsigned quantized 8-bit inference.
29 #define TFLITE_XNNPACK_DELEGATE_FLAG_QU8 0x00000002
30 // Force FP16 inference for FP32 operators.
31 #define TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 0x00000004
32 
33 struct TfLiteXNNPackDelegateWeightsCache;
34 
35 typedef struct {
36   // Number of threads to use in the thread pool.
37   // 0 or negative value means no thread pool used.
38   int32_t num_threads;
39   // Bitfield with any combination of the following binary options:
40   // - TFLITE_XNNPACK_DELEGATE_FLAG_QS8
41   // - TFLITE_XNNPACK_DELEGATE_FLAG_QU8
42   // - TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16
43   uint32_t flags;
44   // Cache for packed weights, can be shared between multiple instances of
45   // delegates.
46   struct TfLiteXNNPackDelegateWeightsCache* weights_cache;
47 } TfLiteXNNPackDelegateOptions;
48 
49 // Returns a structure with the default XNNPack delegate options.
50 TFL_CAPI_EXPORT TfLiteXNNPackDelegateOptions
51 TfLiteXNNPackDelegateOptionsDefault();
52 
53 // Creates a new delegate instance that need to be destroyed with
54 // `TfLiteXNNPackDelegateDelete` when delegate is no longer used by TFLite.
55 // When `options` is set to `nullptr`, default values are used (see
56 // implementation of TfLiteXNNPackDelegateOptionsDefault in the .cc file for
57 // details).
58 TFL_CAPI_EXPORT TfLiteDelegate* TfLiteXNNPackDelegateCreate(
59     const TfLiteXNNPackDelegateOptions* options);
60 
61 // Returns the pthreadpool_t object used for parallelization in XNNPACK.
62 // Can return NULL if the XNNPack delegate is single-threaded.
63 //
64 // WARNING: This API is experimental and subject to change.
65 TFL_CAPI_EXPORT void* TfLiteXNNPackDelegateGetThreadPool(
66     TfLiteDelegate* delegate);
67 
68 // Destroys a delegate created with `TfLiteXNNPackDelegateCreate` call.
69 TFL_CAPI_EXPORT void TfLiteXNNPackDelegateDelete(TfLiteDelegate* delegate);
70 
71 // Creates a new weights cache that can be shared with multiple delegate
72 // instances. Prefer TfLiteXNNPackDelegateWeightsCacheCreateWithSize which can
73 // reduce memory bandwidth.
74 TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache*
75 TfLiteXNNPackDelegateWeightsCacheCreate();
76 // Creates a new weights cache with a specified initial size that can be shared
77 // with multiple delegate instances. The weights cache can hold up to size bytes
78 // without growing.
79 TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache*
80 TfLiteXNNPackDelegateWeightsCacheCreateWithSize(size_t size);
81 // Soft-finalize a weights cache. Extra space will be left in the weights cache
82 // to allow for cache "insertion" only if it is a cache hit. This has memory
83 // overhead compared to TfLiteXNNPackDelegateWeightsCacheFinalizeHard. Use this
84 // if the number of interpreter instances using XNNPACK delegate is not fixed
85 // (e.g. created based on workload in a server daemon).
86 // Returns true on success, false on error.
87 TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeSoft(
88     struct TfLiteXNNPackDelegateWeightsCache* cache);
89 // Hard-finalize a weights cache, cache is effectively frozen and no more cache
90 // operations are allowed. Memory is resized to smallest possible. Use this if
91 // the number of interpreter instances using XNNPACK delegate can be fixed and
92 // all creation of instances can happen up front. This has the lowest memory
93 // usage.
94 // Returns true on success, false on error.
95 TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeHard(
96     struct TfLiteXNNPackDelegateWeightsCache* cache);
97 // Destroys a weights cache created with
98 // `TfLiteXNNPackDelegateWeightsCacheCreate` call.
99 TFL_CAPI_EXPORT void TfLiteXNNPackDelegateWeightsCacheDelete(
100     struct TfLiteXNNPackDelegateWeightsCache* cache);
101 
102 #ifdef __cplusplus
103 }
104 #endif  // __cplusplus
105 
106 #endif  // TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_
107