1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/tools/benchmark/benchmark_performance_options.h"
17
18 #include <algorithm>
19 #include <iomanip>
20 #include <memory>
21 #include <sstream>
22 #include <utility>
23
24 #include "tensorflow/core/util/stats_calculator.h"
25 #include "tensorflow/lite/c/common.h"
26 #if defined(__ANDROID__)
27 #include "tensorflow/lite/delegates/gpu/delegate.h"
28 #include "tensorflow/lite/nnapi/nnapi_util.h"
29 #endif
30 #include "tensorflow/lite/profiling/time.h"
31 #include "tensorflow/lite/tools/benchmark/benchmark_params.h"
32 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
33 #include "tensorflow/lite/tools/command_line_flags.h"
34 #include "tensorflow/lite/tools/logging.h"
35
36 #if (defined(ANDROID) || defined(__ANDROID__)) && \
37 (defined(__arm__) || defined(__aarch64__))
38 #define TFLITE_ENABLE_HEXAGON
39 #endif
40
41 namespace tflite {
42 namespace benchmark {
43
PerfOptionName(const BenchmarkParams & params) const44 std::string MultiRunStatsRecorder::PerfOptionName(
45 const BenchmarkParams& params) const {
46 #if defined(__ANDROID__)
47 if (params.Get<bool>("use_nnapi")) {
48 const std::string accelerator =
49 params.Get<std::string>("nnapi_accelerator_name");
50 return accelerator.empty() ? "nnapi(w/o accel name)"
51 : "nnapi(" + accelerator + ")";
52 }
53 #endif
54
55 if (params.Get<bool>("use_gpu")) {
56 #if defined(__ANDROID__)
57 if (params.Get<bool>("gpu_precision_loss_allowed")) {
58 return "gpu-fp16";
59 } else {
60 return "gpu-default";
61 }
62 #else
63 return "gpu-default";
64 #endif
65 }
66
67 #if defined(TFLITE_ENABLE_HEXAGON)
68 if (params.Get<bool>("use_hexagon")) {
69 return "dsp w/ hexagon";
70 }
71 #endif
72
73 // Handle cases run on CPU
74 // Note: could use std::to_string to convert an integer to string but it
75 // requires C++11.
76 std::stringstream sstm;
77 sstm << "cpu w/ " << params.Get<int32_t>("num_threads") << " threads";
78
79 // Handle cases run on CPU w/ the xnnpack delegate
80 if (params.Get<bool>("use_xnnpack")) {
81 sstm << " (xnnpack)";
82 }
83
84 return sstm.str();
85 }
86
OutputStats()87 void MultiRunStatsRecorder::OutputStats() {
88 // Make a 80-character-long header.
89 TFLITE_LOG(INFO) << "\n==============Summary of All Runs w/ Different "
90 "Performance Options==============";
91 std::sort(results_.begin(), results_.end(), EachRunStatsEntryComparator());
92
93 for (const auto& run_stats : results_) {
94 const auto perf_option_name = PerfOptionName(*run_stats.params);
95 std::stringstream stream;
96 stream << std::setw(26) << perf_option_name << ": ";
97 if (!run_stats.completed) {
98 stream << " failed!";
99 } else {
100 run_stats.metrics.inference_time_us().OutputToStream(&stream);
101 // NOTE: As of 2019/11/07, the memory usage is collected in an
102 // OS-process-wide way and this program performs multiple runs in a single
103 // OS process, therefore, the memory usage information of each run becomes
104 // incorrect, hence no output here.
105 }
106 TFLITE_LOG(INFO) << stream.str();
107 }
108 }
109
BenchmarkPerformanceOptions(BenchmarkModel * single_option_run,std::unique_ptr<MultiRunStatsRecorder> all_run_stats)110 BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
111 BenchmarkModel* single_option_run,
112 std::unique_ptr<MultiRunStatsRecorder> all_run_stats)
113 : BenchmarkPerformanceOptions(DefaultParams(), single_option_run,
114 std::move(all_run_stats)) {}
115
BenchmarkPerformanceOptions(BenchmarkParams params,BenchmarkModel * single_option_run,std::unique_ptr<MultiRunStatsRecorder> all_run_stats)116 BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
117 BenchmarkParams params, BenchmarkModel* single_option_run,
118 std::unique_ptr<MultiRunStatsRecorder> all_run_stats)
119 : params_(std::move(params)),
120 single_option_run_(single_option_run),
121 single_option_run_params_(single_option_run->mutable_params()),
122 all_run_stats_(std::move(all_run_stats)) {
123 single_option_run_->AddListener(all_run_stats_.get());
124 }
125
DefaultParams()126 BenchmarkParams BenchmarkPerformanceOptions::DefaultParams() {
127 BenchmarkParams params;
128 params.AddParam("perf_options_list",
129 BenchmarkParam::Create<std::string>("all"));
130 params.AddParam("option_benchmark_run_delay",
131 BenchmarkParam::Create<float>(-1.0f));
132 params.AddParam("random_shuffle_benchmark_runs",
133 BenchmarkParam::Create<bool>(true));
134 return params;
135 }
136
GetFlags()137 std::vector<Flag> BenchmarkPerformanceOptions::GetFlags() {
138 return {
139 CreateFlag<std::string>(
140 "perf_options_list", ¶ms_,
141 "A comma-separated list of TFLite performance options to benchmark. "
142 "By default, all performance options are benchmarked. Note if it's "
143 "set to 'none', then the tool simply benchmark the model against the "
144 "specified benchmark parameters."),
145 CreateFlag<float>("option_benchmark_run_delay", ¶ms_,
146 "The delay between two consecutive runs of "
147 "benchmarking performance options in seconds."),
148 CreateFlag<bool>(
149 "random_shuffle_benchmark_runs", ¶ms_,
150 "Whether to perform all benchmark runs, each of which has different "
151 "performance options, in a random order. It is enabled by default."),
152 };
153 }
154
ParseFlags(int * argc,char ** argv)155 bool BenchmarkPerformanceOptions::ParseFlags(int* argc, char** argv) {
156 auto flag_list = GetFlags();
157 const bool parse_result =
158 Flags::Parse(argc, const_cast<const char**>(argv), flag_list);
159 if (!parse_result) {
160 std::string usage = Flags::Usage(argv[0], flag_list);
161 TFLITE_LOG(ERROR) << usage;
162 return false;
163 }
164
165 // Parse the value of --perf_options_list to find performance options to be
166 // benchmarked.
167 return ParsePerfOptions();
168 }
169
ParsePerfOptions()170 bool BenchmarkPerformanceOptions::ParsePerfOptions() {
171 const auto& perf_options_list = params_.Get<std::string>("perf_options_list");
172 if (!util::SplitAndParse(perf_options_list, ',', &perf_options_)) {
173 TFLITE_LOG(ERROR) << "Cannot parse --perf_options_list: '"
174 << perf_options_list
175 << "'. Please double-check its value.";
176 perf_options_.clear();
177 return false;
178 }
179
180 const auto valid_options = GetValidPerfOptions();
181 bool is_valid = true;
182 for (const auto& option : perf_options_) {
183 if (std::find(valid_options.begin(), valid_options.end(), option) ==
184 valid_options.end()) {
185 is_valid = false;
186 break;
187 }
188 }
189 if (!is_valid) {
190 std::string valid_options_str;
191 for (int i = 0; i < valid_options.size() - 1; ++i) {
192 valid_options_str += (valid_options[i] + ", ");
193 }
194 valid_options_str += valid_options.back();
195 TFLITE_LOG(ERROR)
196 << "There are invalid perf options in --perf_options_list: '"
197 << perf_options_list << "'. Valid perf options are: ["
198 << valid_options_str << "]";
199 perf_options_.clear();
200 return false;
201 }
202
203 if (HasOption("none") && perf_options_.size() > 1) {
204 TFLITE_LOG(ERROR) << "The 'none' option can not be used together with "
205 "other perf options in --perf_options_list!";
206 perf_options_.clear();
207 return false;
208 }
209 return true;
210 }
211
GetValidPerfOptions() const212 std::vector<std::string> BenchmarkPerformanceOptions::GetValidPerfOptions()
213 const {
214 std::vector<std::string> valid_options = {"all", "cpu", "gpu", "nnapi",
215 "none"};
216 #if defined(TFLITE_ENABLE_HEXAGON)
217 valid_options.emplace_back("dsp");
218 #endif
219 return valid_options;
220 }
221
HasOption(const std::string & option) const222 bool BenchmarkPerformanceOptions::HasOption(const std::string& option) const {
223 return std::find(perf_options_.begin(), perf_options_.end(), option) !=
224 perf_options_.end();
225 }
226
ResetPerformanceOptions()227 void BenchmarkPerformanceOptions::ResetPerformanceOptions() {
228 single_option_run_params_->Set<int32_t>("num_threads", 1);
229 single_option_run_params_->Set<bool>("use_gpu", false);
230 #if defined(__ANDROID__)
231 single_option_run_params_->Set<bool>("gpu_precision_loss_allowed", true);
232 single_option_run_params_->Set<bool>("use_nnapi", false);
233 single_option_run_params_->Set<std::string>("nnapi_accelerator_name", "");
234 single_option_run_params_->Set<bool>("disable_nnapi_cpu", false);
235 single_option_run_params_->Set<int>("max_delegated_partitions", 0);
236 single_option_run_params_->Set<bool>("nnapi_allow_fp16", false);
237 #endif
238 #if defined(TFLITE_ENABLE_HEXAGON)
239 single_option_run_params_->Set<bool>("use_hexagon", false);
240 #endif
241 single_option_run_params_->Set<bool>("use_xnnpack", false);
242 }
243
CreatePerformanceOptions()244 void BenchmarkPerformanceOptions::CreatePerformanceOptions() {
245 TFLITE_LOG(INFO) << "The list of TFLite runtime options to be benchmarked: ["
246 << params_.Get<std::string>("perf_options_list") << "]";
247
248 if (HasOption("none")) {
249 // Just add an empty BenchmarkParams instance.
250 BenchmarkParams params;
251 all_run_params_.emplace_back(std::move(params));
252 // As 'none' is exclusive to others, simply return here.
253 return;
254 }
255
256 const bool benchmark_all = HasOption("all");
257
258 if (benchmark_all || HasOption("cpu")) {
259 const std::vector<int> num_threads = {1, 2, 4};
260 for (const int count : num_threads) {
261 BenchmarkParams params;
262 params.AddParam("num_threads", BenchmarkParam::Create<int32_t>(count));
263 all_run_params_.emplace_back(std::move(params));
264
265 BenchmarkParams xnnpack_params;
266 xnnpack_params.AddParam("use_xnnpack",
267 BenchmarkParam::Create<bool>(true));
268 xnnpack_params.AddParam("num_threads",
269 BenchmarkParam::Create<int32_t>(count));
270 all_run_params_.emplace_back(std::move(xnnpack_params));
271 }
272 }
273
274 if (benchmark_all || HasOption("gpu")) {
275 #if defined(__ANDROID__)
276 const std::vector<bool> allow_precision_loss = {true, false};
277 for (const auto precision_loss : allow_precision_loss) {
278 BenchmarkParams params;
279 params.AddParam("use_gpu", BenchmarkParam::Create<bool>(true));
280 params.AddParam("gpu_precision_loss_allowed",
281 BenchmarkParam::Create<bool>(precision_loss));
282 all_run_params_.emplace_back(std::move(params));
283 }
284 #else
285 BenchmarkParams params;
286 params.AddParam("use_gpu", BenchmarkParam::Create<bool>(true));
287 all_run_params_.emplace_back(std::move(params));
288 #endif
289 }
290
291 #if defined(__ANDROID__)
292 if (benchmark_all || HasOption("nnapi")) {
293 std::string nnapi_accelerators = nnapi::GetStringDeviceNamesList();
294 if (!nnapi_accelerators.empty()) {
295 std::vector<std::string> device_names;
296 util::SplitAndParse(nnapi_accelerators, ',', &device_names);
297 for (const auto name : device_names) {
298 BenchmarkParams params;
299 params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(true));
300 params.AddParam("nnapi_accelerator_name",
301 BenchmarkParam::Create<std::string>(name));
302 params.AddParam("disable_nnapi_cpu",
303 BenchmarkParam::Create<bool>(false));
304 params.AddParam("max_delegated_partitions",
305 BenchmarkParam::Create<int>(0));
306 all_run_params_.emplace_back(std::move(params));
307 }
308 }
309 // Explicitly test the case when there's no "nnapi_accelerator_name"
310 // parameter as the nnpai execution is different from the case when
311 // an accelerator name is explicitly specified.
312 BenchmarkParams params;
313 params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(true));
314 all_run_params_.emplace_back(std::move(params));
315 }
316 #endif
317
318 #if defined(TFLITE_ENABLE_HEXAGON)
319 if (benchmark_all || HasOption("dsp")) {
320 BenchmarkParams params;
321 params.AddParam("use_hexagon", BenchmarkParam::Create<bool>(true));
322 all_run_params_.emplace_back(std::move(params));
323 }
324 #endif
325 }
326
Run()327 void BenchmarkPerformanceOptions::Run() {
328 CreatePerformanceOptions();
329
330 if (params_.Get<bool>("random_shuffle_benchmark_runs")) {
331 std::random_shuffle(all_run_params_.begin(), all_run_params_.end());
332 }
333
334 // We need to clean *internally* created benchmark listeners, like the
335 // profiling listener etc. in each Run() invoke because such listeners may be
336 // reset and become invalid in the next Run(). As a result, we record the
337 // number of externally-added listeners here to prevent they're cleared later.
338 const int num_external_listeners = single_option_run_->NumListeners();
339
340 // Now perform all runs, each with different performance-affecting parameters.
341 for (const auto& run_params : all_run_params_) {
342 // If the run_params is empty, then it means "none" is set for
343 // --perf_options_list.
344 if (!run_params.Empty()) {
345 // Reset all performance-related options before any runs.
346 ResetPerformanceOptions();
347 single_option_run_params_->Set(run_params);
348 }
349 util::SleepForSeconds(params_.Get<float>("option_benchmark_run_delay"));
350
351 // Clear internally created listeners before each run but keep externally
352 // created ones.
353 single_option_run_->RemoveListeners(num_external_listeners);
354
355 all_run_stats_->MarkBenchmarkStart(*single_option_run_params_);
356 single_option_run_->Run();
357 }
358
359 all_run_stats_->OutputStats();
360 }
361
Run(int argc,char ** argv)362 void BenchmarkPerformanceOptions::Run(int argc, char** argv) {
363 // Parse flags that are supported by this particular binary first.
364 if (!ParseFlags(&argc, argv)) return;
365
366 // Then parse flags for single-option runs to get information like parameters
367 // of the input model etc.
368 if (single_option_run_->ParseFlags(&argc, argv) != kTfLiteOk) return;
369
370 // Now, the remaining are unrecognized flags and we simply print them out.
371 for (int i = 1; i < argc; ++i) {
372 TFLITE_LOG(WARN) << "WARNING: unrecognized commandline flag: " << argv[i];
373 }
374
375 Run();
376 }
377 } // namespace benchmark
378 } // namespace tflite
379