1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_ 17 #define TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_ 18 19 #include "tensorflow/lite/c/common.h" 20 21 #ifdef __cplusplus 22 extern "C" { 23 #endif // __cplusplus 24 25 // Enable XNNPACK acceleration for signed quantized 8-bit inference. 26 // This includes operators with channel-wise quantized weights. 27 #define TFLITE_XNNPACK_DELEGATE_FLAG_QS8 0x00000001 28 // Enable XNNPACK acceleration for unsigned quantized 8-bit inference. 29 #define TFLITE_XNNPACK_DELEGATE_FLAG_QU8 0x00000002 30 // Force FP16 inference for FP32 operators. 31 #define TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 0x00000004 32 33 struct TfLiteXNNPackDelegateWeightsCache; 34 35 typedef struct { 36 // Number of threads to use in the thread pool. 37 // 0 or negative value means no thread pool used. 38 int32_t num_threads; 39 // Bitfield with any combination of the following binary options: 40 // - TFLITE_XNNPACK_DELEGATE_FLAG_QS8 41 // - TFLITE_XNNPACK_DELEGATE_FLAG_QU8 42 // - TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16 43 uint32_t flags; 44 // Cache for packed weights, can be shared between multiple instances of 45 // delegates. 46 struct TfLiteXNNPackDelegateWeightsCache* weights_cache; 47 } TfLiteXNNPackDelegateOptions; 48 49 // Returns a structure with the default XNNPack delegate options. 50 TFL_CAPI_EXPORT TfLiteXNNPackDelegateOptions 51 TfLiteXNNPackDelegateOptionsDefault(); 52 53 // Creates a new delegate instance that need to be destroyed with 54 // `TfLiteXNNPackDelegateDelete` when delegate is no longer used by TFLite. 55 // When `options` is set to `nullptr`, default values are used (see 56 // implementation of TfLiteXNNPackDelegateOptionsDefault in the .cc file for 57 // details). 58 TFL_CAPI_EXPORT TfLiteDelegate* TfLiteXNNPackDelegateCreate( 59 const TfLiteXNNPackDelegateOptions* options); 60 61 // Returns the pthreadpool_t object used for parallelization in XNNPACK. 62 // Can return NULL if the XNNPack delegate is single-threaded. 63 // 64 // WARNING: This API is experimental and subject to change. 65 TFL_CAPI_EXPORT void* TfLiteXNNPackDelegateGetThreadPool( 66 TfLiteDelegate* delegate); 67 68 // Destroys a delegate created with `TfLiteXNNPackDelegateCreate` call. 69 TFL_CAPI_EXPORT void TfLiteXNNPackDelegateDelete(TfLiteDelegate* delegate); 70 71 // Creates a new weights cache that can be shared with multiple delegate 72 // instances. Prefer TfLiteXNNPackDelegateWeightsCacheCreateWithSize which can 73 // reduce memory bandwidth. 74 TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache* 75 TfLiteXNNPackDelegateWeightsCacheCreate(); 76 // Creates a new weights cache with a specified initial size that can be shared 77 // with multiple delegate instances. The weights cache can hold up to size bytes 78 // without growing. 79 TFL_CAPI_EXPORT struct TfLiteXNNPackDelegateWeightsCache* 80 TfLiteXNNPackDelegateWeightsCacheCreateWithSize(size_t size); 81 // Soft-finalize a weights cache. Extra space will be left in the weights cache 82 // to allow for cache "insertion" only if it is a cache hit. This has memory 83 // overhead compared to TfLiteXNNPackDelegateWeightsCacheFinalizeHard. Use this 84 // if the number of interpreter instances using XNNPACK delegate is not fixed 85 // (e.g. created based on workload in a server daemon). 86 // Returns true on success, false on error. 87 TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeSoft( 88 struct TfLiteXNNPackDelegateWeightsCache* cache); 89 // Hard-finalize a weights cache, cache is effectively frozen and no more cache 90 // operations are allowed. Memory is resized to smallest possible. Use this if 91 // the number of interpreter instances using XNNPACK delegate can be fixed and 92 // all creation of instances can happen up front. This has the lowest memory 93 // usage. 94 // Returns true on success, false on error. 95 TFL_CAPI_EXPORT bool TfLiteXNNPackDelegateWeightsCacheFinalizeHard( 96 struct TfLiteXNNPackDelegateWeightsCache* cache); 97 // Destroys a weights cache created with 98 // `TfLiteXNNPackDelegateWeightsCacheCreate` call. 99 TFL_CAPI_EXPORT void TfLiteXNNPackDelegateWeightsCacheDelete( 100 struct TfLiteXNNPackDelegateWeightsCache* cache); 101 102 #ifdef __cplusplus 103 } 104 #endif // __cplusplus 105 106 #endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_XNNPACK_DELEGATE_H_ 107