1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_ 18 19 #include <stdint.h> 20 21 #include "tensorflow/lite/c/common.h" 22 23 #ifdef __cplusplus 24 extern "C" { 25 #endif // __cplusplus 26 27 // Encapsulated compilation/runtime tradeoffs. 28 enum TfLiteGpuInferenceUsage { 29 // Delegate will be used only once, therefore, bootstrap/init time should 30 // be taken into account. 31 TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = 0, 32 33 // Prefer maximizing the throughput. Same delegate will be used repeatedly on 34 // multiple inputs. 35 TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = 1, 36 }; 37 38 enum TfLiteGpuInferencePriority { 39 // AUTO priority is needed when a single priority is the most important 40 // factor. For example, 41 // priority1 = MIN_LATENCY would result in the configuration that achieves 42 // maximum performance. 43 TFLITE_GPU_INFERENCE_PRIORITY_AUTO = 0, 44 TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION = 1, 45 TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY = 2, 46 TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE = 3, 47 }; 48 49 // Used to toggle experimental flags used in the delegate. Note that this is a 50 // bitmask, so the values should be 1, 2, 4, 8, ...etc. 51 enum TfLiteGpuExperimentalFlags { 52 TFLITE_GPU_EXPERIMENTAL_FLAGS_NONE = 0, 53 // Enables inference on quantized models with the delegate. 54 // NOTE: This is enabled in TfLiteGpuDelegateOptionsV2Default. 55 TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT = 1 << 0, 56 // Enforces execution with the provided backend. 57 TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY = 1 << 1, 58 TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY = 1 << 2 59 }; 60 61 // IMPORTANT: Always use TfLiteGpuDelegateOptionsV2Default() method to create 62 // new instance of TfLiteGpuDelegateOptionsV2, otherwise every new added option 63 // may break inference. 64 typedef struct { 65 // When set to zero, computations are carried out in maximal possible 66 // precision. Otherwise, the GPU may quantify tensors, downcast values, 67 // process in FP16 to increase performance. For most models precision loss is 68 // warranted. 69 // [OBSOLETE]: to be removed 70 int32_t is_precision_loss_allowed; 71 72 // Preference is defined in TfLiteGpuInferenceUsage. 73 int32_t inference_preference; 74 75 // Ordered priorities provide better control over desired semantics, 76 // where priority(n) is more important than priority(n+1), therefore, 77 // each time inference engine needs to make a decision, it uses 78 // ordered priorities to do so. 79 // For example: 80 // MAX_PRECISION at priority1 would not allow to decrease precision, 81 // but moving it to priority2 or priority3 would result in F16 calculation. 82 // 83 // Priority is defined in TfLiteGpuInferencePriority. 84 // AUTO priority can only be used when higher priorities are fully specified. 85 // For example: 86 // VALID: priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO 87 // VALID: priority1 = MIN_LATENCY, priority2 = MAX_PRECISION, 88 // priority3 = AUTO 89 // INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO 90 // INVALID: priority1 = MIN_LATENCY, priority2 = AUTO, 91 // priority3 = MAX_PRECISION 92 // Invalid priorities will result in error. 93 int32_t inference_priority1; 94 int32_t inference_priority2; 95 int32_t inference_priority3; 96 97 // Bitmask flags. See the comments in TfLiteGpuExperimentalFlags. 98 int64_t experimental_flags; 99 100 // A graph could have multiple partitions that can be delegated to the GPU. 101 // This limits the maximum number of partitions to be delegated. By default, 102 // it's set to 1 in TfLiteGpuDelegateOptionsV2Default(). 103 int32_t max_delegated_partitions; 104 } TfLiteGpuDelegateOptionsV2; 105 106 // Populates TfLiteGpuDelegateOptionsV2 as follows: 107 // is_precision_loss_allowed = false 108 // inference_preference = TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER 109 // priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION 110 // priority2 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO 111 // priority3 = TFLITE_GPU_INFERENCE_PRIORITY_AUTO 112 // experimental_flags = TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT 113 // max_delegated_partitions = 1 114 TFL_CAPI_EXPORT TfLiteGpuDelegateOptionsV2 TfLiteGpuDelegateOptionsV2Default(); 115 116 // Creates a new delegate instance that need to be destroyed with 117 // TfLiteGpuDelegateV2Delete when delegate is no longer used by TFLite. 118 // 119 // This delegate encapsulates multiple GPU-acceleration APIs under the hood to 120 // make use of the fastest available on a device. 121 // 122 // When `options` is set to `nullptr`, then default options are used. 123 TFL_CAPI_EXPORT TfLiteDelegate* TfLiteGpuDelegateV2Create( 124 const TfLiteGpuDelegateOptionsV2* options); 125 126 // Destroys a delegate created with `TfLiteGpuDelegateV2Create` call. 127 TFL_CAPI_EXPORT void TfLiteGpuDelegateV2Delete(TfLiteDelegate* delegate); 128 129 #ifdef __cplusplus 130 } 131 #endif // __cplusplus 132 133 #endif // TENSORFLOW_LITE_DELEGATES_GPU_DELEGATE_H_ 134