• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/tools/benchmark/benchmark_performance_options.h"
17 
18 #include <algorithm>
19 #include <iomanip>
20 #include <memory>
21 #include <sstream>
22 #include <utility>
23 
24 #include "tensorflow/core/util/stats_calculator.h"
25 #include "tensorflow/lite/c/common.h"
26 #if defined(__ANDROID__)
27 #include "tensorflow/lite/delegates/gpu/delegate.h"
28 #include "tensorflow/lite/nnapi/nnapi_util.h"
29 #endif
30 #include "tensorflow/lite/profiling/time.h"
31 #include "tensorflow/lite/tools/benchmark/benchmark_params.h"
32 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
33 #include "tensorflow/lite/tools/command_line_flags.h"
34 #include "tensorflow/lite/tools/logging.h"
35 
36 #if (defined(ANDROID) || defined(__ANDROID__)) && \
37     (defined(__arm__) || defined(__aarch64__))
38 #define TFLITE_ENABLE_HEXAGON
39 #endif
40 
41 namespace tflite {
42 namespace benchmark {
43 
PerfOptionName(const BenchmarkParams & params) const44 std::string MultiRunStatsRecorder::PerfOptionName(
45     const BenchmarkParams& params) const {
46 #if defined(__ANDROID__)
47   if (params.Get<bool>("use_nnapi")) {
48     const std::string accelerator =
49         params.Get<std::string>("nnapi_accelerator_name");
50     return accelerator.empty() ? "nnapi(w/o accel name)"
51                                : "nnapi(" + accelerator + ")";
52   }
53 #endif
54 
55   if (params.Get<bool>("use_gpu")) {
56 #if defined(__ANDROID__)
57     if (params.Get<bool>("gpu_precision_loss_allowed")) {
58       return "gpu-fp16";
59     } else {
60       return "gpu-default";
61     }
62 #else
63     return "gpu-default";
64 #endif
65   }
66 
67 #if defined(TFLITE_ENABLE_HEXAGON)
68   if (params.Get<bool>("use_hexagon")) {
69     return "dsp w/ hexagon";
70   }
71 #endif
72 
73   // Handle cases run on CPU
74   // Note: could use std::to_string to convert an integer to string but it
75   // requires C++11.
76   std::stringstream sstm;
77   sstm << "cpu w/ " << params.Get<int32_t>("num_threads") << " threads";
78 
79   // Handle cases run on CPU w/ the xnnpack delegate
80   if (params.Get<bool>("use_xnnpack")) {
81     sstm << " (xnnpack)";
82   }
83 
84   return sstm.str();
85 }
86 
OutputStats()87 void MultiRunStatsRecorder::OutputStats() {
88   // Make a 80-character-long header.
89   TFLITE_LOG(INFO) << "\n==============Summary of All Runs w/ Different "
90                       "Performance Options==============";
91   std::sort(results_.begin(), results_.end(), EachRunStatsEntryComparator());
92 
93   for (const auto& run_stats : results_) {
94     const auto perf_option_name = PerfOptionName(*run_stats.params);
95     std::stringstream stream;
96     stream << std::setw(26) << perf_option_name << ": ";
97     if (!run_stats.completed) {
98       stream << " failed!";
99     } else {
100       run_stats.metrics.inference_time_us().OutputToStream(&stream);
101       // NOTE: As of 2019/11/07, the memory usage is collected in an
102       // OS-process-wide way and this program performs multiple runs in a single
103       // OS process, therefore, the memory usage information of each run becomes
104       // incorrect, hence no output here.
105     }
106     TFLITE_LOG(INFO) << stream.str();
107   }
108 }
109 
BenchmarkPerformanceOptions(BenchmarkModel * single_option_run,std::unique_ptr<MultiRunStatsRecorder> all_run_stats)110 BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
111     BenchmarkModel* single_option_run,
112     std::unique_ptr<MultiRunStatsRecorder> all_run_stats)
113     : BenchmarkPerformanceOptions(DefaultParams(), single_option_run,
114                                   std::move(all_run_stats)) {}
115 
BenchmarkPerformanceOptions(BenchmarkParams params,BenchmarkModel * single_option_run,std::unique_ptr<MultiRunStatsRecorder> all_run_stats)116 BenchmarkPerformanceOptions::BenchmarkPerformanceOptions(
117     BenchmarkParams params, BenchmarkModel* single_option_run,
118     std::unique_ptr<MultiRunStatsRecorder> all_run_stats)
119     : params_(std::move(params)),
120       single_option_run_(single_option_run),
121       single_option_run_params_(single_option_run->mutable_params()),
122       all_run_stats_(std::move(all_run_stats)) {
123   single_option_run_->AddListener(all_run_stats_.get());
124 }
125 
DefaultParams()126 BenchmarkParams BenchmarkPerformanceOptions::DefaultParams() {
127   BenchmarkParams params;
128   params.AddParam("perf_options_list",
129                   BenchmarkParam::Create<std::string>("all"));
130   params.AddParam("option_benchmark_run_delay",
131                   BenchmarkParam::Create<float>(-1.0f));
132   params.AddParam("random_shuffle_benchmark_runs",
133                   BenchmarkParam::Create<bool>(true));
134   return params;
135 }
136 
GetFlags()137 std::vector<Flag> BenchmarkPerformanceOptions::GetFlags() {
138   return {
139       CreateFlag<std::string>(
140           "perf_options_list", &params_,
141           "A comma-separated list of TFLite performance options to benchmark. "
142           "By default, all performance options are benchmarked. Note if it's "
143           "set to 'none', then the tool simply benchmark the model against the "
144           "specified benchmark parameters."),
145       CreateFlag<float>("option_benchmark_run_delay", &params_,
146                         "The delay between two consecutive runs of "
147                         "benchmarking performance options in seconds."),
148       CreateFlag<bool>(
149           "random_shuffle_benchmark_runs", &params_,
150           "Whether to perform all benchmark runs, each of which has different "
151           "performance options, in a random order. It is enabled by default."),
152   };
153 }
154 
ParseFlags(int * argc,char ** argv)155 bool BenchmarkPerformanceOptions::ParseFlags(int* argc, char** argv) {
156   auto flag_list = GetFlags();
157   const bool parse_result =
158       Flags::Parse(argc, const_cast<const char**>(argv), flag_list);
159   if (!parse_result) {
160     std::string usage = Flags::Usage(argv[0], flag_list);
161     TFLITE_LOG(ERROR) << usage;
162     return false;
163   }
164 
165   // Parse the value of --perf_options_list to find performance options to be
166   // benchmarked.
167   return ParsePerfOptions();
168 }
169 
ParsePerfOptions()170 bool BenchmarkPerformanceOptions::ParsePerfOptions() {
171   const auto& perf_options_list = params_.Get<std::string>("perf_options_list");
172   if (!util::SplitAndParse(perf_options_list, ',', &perf_options_)) {
173     TFLITE_LOG(ERROR) << "Cannot parse --perf_options_list: '"
174                       << perf_options_list
175                       << "'. Please double-check its value.";
176     perf_options_.clear();
177     return false;
178   }
179 
180   const auto valid_options = GetValidPerfOptions();
181   bool is_valid = true;
182   for (const auto& option : perf_options_) {
183     if (std::find(valid_options.begin(), valid_options.end(), option) ==
184         valid_options.end()) {
185       is_valid = false;
186       break;
187     }
188   }
189   if (!is_valid) {
190     std::string valid_options_str;
191     for (int i = 0; i < valid_options.size() - 1; ++i) {
192       valid_options_str += (valid_options[i] + ", ");
193     }
194     valid_options_str += valid_options.back();
195     TFLITE_LOG(ERROR)
196         << "There are invalid perf options in --perf_options_list: '"
197         << perf_options_list << "'. Valid perf options are: ["
198         << valid_options_str << "]";
199     perf_options_.clear();
200     return false;
201   }
202 
203   if (HasOption("none") && perf_options_.size() > 1) {
204     TFLITE_LOG(ERROR) << "The 'none' option can not be used together with "
205                          "other perf options in --perf_options_list!";
206     perf_options_.clear();
207     return false;
208   }
209   return true;
210 }
211 
GetValidPerfOptions() const212 std::vector<std::string> BenchmarkPerformanceOptions::GetValidPerfOptions()
213     const {
214   std::vector<std::string> valid_options = {"all", "cpu", "gpu", "nnapi",
215                                             "none"};
216 #if defined(TFLITE_ENABLE_HEXAGON)
217   valid_options.emplace_back("dsp");
218 #endif
219   return valid_options;
220 }
221 
HasOption(const std::string & option) const222 bool BenchmarkPerformanceOptions::HasOption(const std::string& option) const {
223   return std::find(perf_options_.begin(), perf_options_.end(), option) !=
224          perf_options_.end();
225 }
226 
ResetPerformanceOptions()227 void BenchmarkPerformanceOptions::ResetPerformanceOptions() {
228   single_option_run_params_->Set<int32_t>("num_threads", 1);
229   single_option_run_params_->Set<bool>("use_gpu", false);
230 #if defined(__ANDROID__)
231   single_option_run_params_->Set<bool>("gpu_precision_loss_allowed", true);
232   single_option_run_params_->Set<bool>("use_nnapi", false);
233   single_option_run_params_->Set<std::string>("nnapi_accelerator_name", "");
234   single_option_run_params_->Set<bool>("disable_nnapi_cpu", false);
235   single_option_run_params_->Set<int>("max_delegated_partitions", 0);
236   single_option_run_params_->Set<bool>("nnapi_allow_fp16", false);
237 #endif
238 #if defined(TFLITE_ENABLE_HEXAGON)
239   single_option_run_params_->Set<bool>("use_hexagon", false);
240 #endif
241   single_option_run_params_->Set<bool>("use_xnnpack", false);
242 }
243 
CreatePerformanceOptions()244 void BenchmarkPerformanceOptions::CreatePerformanceOptions() {
245   TFLITE_LOG(INFO) << "The list of TFLite runtime options to be benchmarked: ["
246                    << params_.Get<std::string>("perf_options_list") << "]";
247 
248   if (HasOption("none")) {
249     // Just add an empty BenchmarkParams instance.
250     BenchmarkParams params;
251     all_run_params_.emplace_back(std::move(params));
252     // As 'none' is exclusive to others, simply return here.
253     return;
254   }
255 
256   const bool benchmark_all = HasOption("all");
257 
258   if (benchmark_all || HasOption("cpu")) {
259     const std::vector<int> num_threads = {1, 2, 4};
260     for (const int count : num_threads) {
261       BenchmarkParams params;
262       params.AddParam("num_threads", BenchmarkParam::Create<int32_t>(count));
263       all_run_params_.emplace_back(std::move(params));
264 
265       BenchmarkParams xnnpack_params;
266       xnnpack_params.AddParam("use_xnnpack",
267                               BenchmarkParam::Create<bool>(true));
268       xnnpack_params.AddParam("num_threads",
269                               BenchmarkParam::Create<int32_t>(count));
270       all_run_params_.emplace_back(std::move(xnnpack_params));
271     }
272   }
273 
274   if (benchmark_all || HasOption("gpu")) {
275 #if defined(__ANDROID__)
276     const std::vector<bool> allow_precision_loss = {true, false};
277     for (const auto precision_loss : allow_precision_loss) {
278       BenchmarkParams params;
279       params.AddParam("use_gpu", BenchmarkParam::Create<bool>(true));
280       params.AddParam("gpu_precision_loss_allowed",
281                       BenchmarkParam::Create<bool>(precision_loss));
282       all_run_params_.emplace_back(std::move(params));
283     }
284 #else
285     BenchmarkParams params;
286     params.AddParam("use_gpu", BenchmarkParam::Create<bool>(true));
287     all_run_params_.emplace_back(std::move(params));
288 #endif
289   }
290 
291 #if defined(__ANDROID__)
292   if (benchmark_all || HasOption("nnapi")) {
293     std::string nnapi_accelerators = nnapi::GetStringDeviceNamesList();
294     if (!nnapi_accelerators.empty()) {
295       std::vector<std::string> device_names;
296       util::SplitAndParse(nnapi_accelerators, ',', &device_names);
297       for (const auto name : device_names) {
298         BenchmarkParams params;
299         params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(true));
300         params.AddParam("nnapi_accelerator_name",
301                         BenchmarkParam::Create<std::string>(name));
302         params.AddParam("disable_nnapi_cpu",
303                         BenchmarkParam::Create<bool>(false));
304         params.AddParam("max_delegated_partitions",
305                         BenchmarkParam::Create<int>(0));
306         all_run_params_.emplace_back(std::move(params));
307       }
308     }
309     // Explicitly test the case when there's no "nnapi_accelerator_name"
310     // parameter as the nnpai execution is different from the case when
311     // an accelerator name is explicitly specified.
312     BenchmarkParams params;
313     params.AddParam("use_nnapi", BenchmarkParam::Create<bool>(true));
314     all_run_params_.emplace_back(std::move(params));
315   }
316 #endif
317 
318 #if defined(TFLITE_ENABLE_HEXAGON)
319   if (benchmark_all || HasOption("dsp")) {
320     BenchmarkParams params;
321     params.AddParam("use_hexagon", BenchmarkParam::Create<bool>(true));
322     all_run_params_.emplace_back(std::move(params));
323   }
324 #endif
325 }
326 
Run()327 void BenchmarkPerformanceOptions::Run() {
328   CreatePerformanceOptions();
329 
330   if (params_.Get<bool>("random_shuffle_benchmark_runs")) {
331     std::random_shuffle(all_run_params_.begin(), all_run_params_.end());
332   }
333 
334   // We need to clean *internally* created benchmark listeners, like the
335   // profiling listener etc. in each Run() invoke because such listeners may be
336   // reset and become invalid in the next Run(). As a result, we record the
337   // number of externally-added listeners here to prevent they're cleared later.
338   const int num_external_listeners = single_option_run_->NumListeners();
339 
340   // Now perform all runs, each with different performance-affecting parameters.
341   for (const auto& run_params : all_run_params_) {
342     // If the run_params is empty, then it means "none" is set for
343     // --perf_options_list.
344     if (!run_params.Empty()) {
345       // Reset all performance-related options before any runs.
346       ResetPerformanceOptions();
347       single_option_run_params_->Set(run_params);
348     }
349     util::SleepForSeconds(params_.Get<float>("option_benchmark_run_delay"));
350 
351     // Clear internally created listeners before each run but keep externally
352     // created ones.
353     single_option_run_->RemoveListeners(num_external_listeners);
354 
355     all_run_stats_->MarkBenchmarkStart(*single_option_run_params_);
356     single_option_run_->Run();
357   }
358 
359   all_run_stats_->OutputStats();
360 }
361 
Run(int argc,char ** argv)362 void BenchmarkPerformanceOptions::Run(int argc, char** argv) {
363   // Parse flags that are supported by this particular binary first.
364   if (!ParseFlags(&argc, argv)) return;
365 
366   // Then parse flags for single-option runs to get information like parameters
367   // of the input model etc.
368   if (single_option_run_->ParseFlags(&argc, argv) != kTfLiteOk) return;
369 
370   // Now, the remaining are unrecognized flags and we simply print them out.
371   for (int i = 1; i < argc; ++i) {
372     TFLITE_LOG(WARN) << "WARNING: unrecognized commandline flag: " << argv[i];
373   }
374 
375   Run();
376 }
377 }  // namespace benchmark
378 }  // namespace tflite
379