1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <string>
16 #include <utility>
17 
18 #include "tensorflow/lite/tools/delegates/delegate_provider.h"
19 #include "tensorflow/lite/tools/evaluation/utils.h"
20 #if TFLITE_SUPPORTS_GPU_DELEGATE
21 #include "tensorflow/lite/delegates/gpu/delegate.h"
22 #elif defined(__APPLE__)
23 #include "TargetConditionals.h"
24 #if (TARGET_OS_IPHONE && !TARGET_IPHONE_SIMULATOR) || \
25     (TARGET_OS_OSX && TARGET_CPU_ARM64)
26 // Only enable metal delegate when using a real iPhone device or Apple Silicon.
27 #define REAL_IPHONE_DEVICE
28 #include "tensorflow/lite/delegates/gpu/metal_delegate.h"
29 #endif
30 #endif
31 
32 namespace tflite {
33 namespace tools {
34 
35 class GpuDelegateProvider : public DelegateProvider {
36  public:
GpuDelegateProvider()37   GpuDelegateProvider() {
38     default_params_.AddParam("use_gpu", ToolParam::Create<bool>(false));
39 #if TFLITE_SUPPORTS_GPU_DELEGATE || defined(REAL_IPHONE_DEVICE)
40     default_params_.AddParam("gpu_precision_loss_allowed",
41                              ToolParam::Create<bool>(true));
42     default_params_.AddParam("gpu_experimental_enable_quant",
43                              ToolParam::Create<bool>(true));
44 #endif
45 #if TFLITE_SUPPORTS_GPU_DELEGATE
46     default_params_.AddParam("gpu_inference_for_sustained_speed",
47                              ToolParam::Create<bool>(false));
48     default_params_.AddParam("gpu_backend", ToolParam::Create<std::string>(""));
49 #endif
50 #if defined(REAL_IPHONE_DEVICE)
51     default_params_.AddParam("gpu_wait_type",
52                              ToolParam::Create<std::string>(""));
53 #endif
54   }
55 
56   std::vector<Flag> CreateFlags(ToolParams* params) const final;
57 
58   void LogParams(const ToolParams& params, bool verbose) const final;
59 
60   TfLiteDelegatePtr CreateTfLiteDelegate(const ToolParams& params) const final;
61   std::pair<TfLiteDelegatePtr, int> CreateRankedTfLiteDelegate(
62       const ToolParams& params) const final;
63 
GetName() const64   std::string GetName() const final { return "GPU"; }
65 };
66 REGISTER_DELEGATE_PROVIDER(GpuDelegateProvider);
67 
CreateFlags(ToolParams * params) const68 std::vector<Flag> GpuDelegateProvider::CreateFlags(ToolParams* params) const {
69   std::vector<Flag> flags = {
70     CreateFlag<bool>("use_gpu", params, "use gpu"),
71 #if TFLITE_SUPPORTS_GPU_DELEGATE || defined(REAL_IPHONE_DEVICE)
72     CreateFlag<bool>("gpu_precision_loss_allowed", params,
73                      "Allow to process computation in lower precision than "
74                      "FP32 in GPU. By default, it's enabled."),
75     CreateFlag<bool>("gpu_experimental_enable_quant", params,
76                      "Whether to enable the GPU delegate to run quantized "
77                      "models or not. By default, it's enabled."),
78 #endif
79 #if TFLITE_SUPPORTS_GPU_DELEGATE
80     CreateFlag<bool>("gpu_inference_for_sustained_speed", params,
81                      "Whether to prefer maximizing the throughput. This mode "
82                      "will help when the same delegate will be used repeatedly "
83                      "on multiple inputs. This is supported on non-iOS "
84                      "platforms. By default, it's disabled."),
85     CreateFlag<std::string>(
86         "gpu_backend", params,
87         "Force the GPU delegate to use a particular backend for execution, and "
88         "fail if unsuccessful. Should be one of: cl, gl"),
89 #endif
90 #if defined(REAL_IPHONE_DEVICE)
91     CreateFlag<std::string>(
92         "gpu_wait_type", params,
93         "GPU wait type. Should be one of the following: passive, active, "
94         "do_not_wait, aggressive"),
95 #endif
96   };
97   return flags;
98 }
99 
LogParams(const ToolParams & params,bool verbose) const100 void GpuDelegateProvider::LogParams(const ToolParams& params,
101                                     bool verbose) const {
102   LOG_TOOL_PARAM(params, bool, "use_gpu", "Use gpu", verbose);
103 #if TFLITE_SUPPORTS_GPU_DELEGATE || defined(REAL_IPHONE_DEVICE)
104   LOG_TOOL_PARAM(params, bool, "gpu_precision_loss_allowed",
105                  "Allow lower precision in gpu", verbose);
106   LOG_TOOL_PARAM(params, bool, "gpu_experimental_enable_quant",
107                  "Enable running quant models in gpu", verbose);
108 #endif
109 #if TFLITE_SUPPORTS_GPU_DELEGATE
110   LOG_TOOL_PARAM(params, bool, "gpu_inference_for_sustained_speed",
111                  "Prefer maximizing the throughput in gpu", verbose);
112   LOG_TOOL_PARAM(params, std::string, "gpu_backend", "GPU backend", verbose);
113 #endif
114 #if defined(REAL_IPHONE_DEVICE)
115   LOG_TOOL_PARAM(params, std::string, "gpu_wait_type", "GPU delegate wait type",
116                  verbose);
117 #endif
118 }
119 
CreateTfLiteDelegate(const ToolParams & params) const120 TfLiteDelegatePtr GpuDelegateProvider::CreateTfLiteDelegate(
121     const ToolParams& params) const {
122   TfLiteDelegatePtr delegate(nullptr, [](TfLiteDelegate*) {});
123 
124   if (params.Get<bool>("use_gpu")) {
125 #if TFLITE_SUPPORTS_GPU_DELEGATE
126     TfLiteGpuDelegateOptionsV2 gpu_opts = TfLiteGpuDelegateOptionsV2Default();
127     if (params.Get<bool>("gpu_precision_loss_allowed")) {
128       gpu_opts.inference_priority1 = TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
129       gpu_opts.inference_priority2 =
130           TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE;
131       gpu_opts.inference_priority3 =
132           TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION;
133     }
134     if (params.Get<bool>("gpu_experimental_enable_quant")) {
135       gpu_opts.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_QUANT;
136     }
137     if (params.Get<bool>("gpu_inference_for_sustained_speed")) {
138       gpu_opts.inference_preference =
139           TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
140     }
141     std::string gpu_backend = params.Get<std::string>("gpu_backend");
142     if (!gpu_backend.empty()) {
143       if (gpu_backend == "cl") {
144         gpu_opts.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_CL_ONLY;
145       } else if (gpu_backend == "gl") {
146         gpu_opts.experimental_flags |= TFLITE_GPU_EXPERIMENTAL_FLAGS_GL_ONLY;
147       }
148     }
149     gpu_opts.max_delegated_partitions =
150         params.Get<int>("max_delegated_partitions");
151 
152     // Serialization.
153     std::string serialize_dir =
154         params.Get<std::string>("delegate_serialize_dir");
155     std::string serialize_token =
156         params.Get<std::string>("delegate_serialize_token");
157     if (!serialize_dir.empty() && !serialize_token.empty()) {
158       gpu_opts.experimental_flags =
159           gpu_opts.experimental_flags |
160           TFLITE_GPU_EXPERIMENTAL_FLAGS_ENABLE_SERIALIZATION;
161       gpu_opts.serialization_dir = serialize_dir.c_str();
162       gpu_opts.model_token = serialize_token.c_str();
163     }
164 
165     delegate = evaluation::CreateGPUDelegate(&gpu_opts);
166 #elif defined(REAL_IPHONE_DEVICE)
167     TFLGpuDelegateOptions gpu_opts = {0};
168     gpu_opts.allow_precision_loss =
169         params.Get<bool>("gpu_precision_loss_allowed");
170     gpu_opts.enable_quantization =
171         params.Get<bool>("gpu_experimental_enable_quant");
172 
173     std::string string_gpu_wait_type = params.Get<std::string>("gpu_wait_type");
174     if (!string_gpu_wait_type.empty()) {
175       TFLGpuDelegateWaitType wait_type = TFLGpuDelegateWaitTypePassive;
176       if (string_gpu_wait_type == "passive") {
177         wait_type = TFLGpuDelegateWaitTypePassive;
178       } else if (string_gpu_wait_type == "active") {
179         wait_type = TFLGpuDelegateWaitTypeActive;
180       } else if (string_gpu_wait_type == "do_not_wait") {
181         wait_type = TFLGpuDelegateWaitTypeDoNotWait;
182       } else if (string_gpu_wait_type == "aggressive") {
183         wait_type = TFLGpuDelegateWaitTypeAggressive;
184       }
185       gpu_opts.wait_type = wait_type;
186     }
187     delegate = TfLiteDelegatePtr(TFLGpuDelegateCreate(&gpu_opts),
188                                  &TFLGpuDelegateDelete);
189 #else
190     TFLITE_LOG(WARN) << "The GPU delegate compile options are only supported "
191                         "on Android or iOS platforms or when the tool was "
192                         "built with -DCL_DELEGATE_NO_GL.";
193     delegate = evaluation::CreateGPUDelegate();
194 #endif
195 
196     if (!delegate.get()) {
197       TFLITE_LOG(WARN) << "GPU acceleration is unsupported on this platform.";
198     }
199   }
200   return delegate;
201 }
202 
203 std::pair<TfLiteDelegatePtr, int>
CreateRankedTfLiteDelegate(const ToolParams & params) const204 GpuDelegateProvider::CreateRankedTfLiteDelegate(
205     const ToolParams& params) const {
206   auto ptr = CreateTfLiteDelegate(params);
207   return std::make_pair(std::move(ptr), params.GetPosition<bool>("use_gpu"));
208 }
209 }  // namespace tools
210 }  // namespace tflite
211