• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/tools/benchmark/benchmark_model.h"
17 
18 #include <iostream>
19 #include <sstream>
20 
21 #include "tensorflow/lite/profiling/memory_info.h"
22 #include "tensorflow/lite/profiling/time.h"
23 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
24 #include "tensorflow/lite/tools/logging.h"
25 
26 namespace tflite {
27 namespace benchmark {
28 using tensorflow::Stat;
29 
30 constexpr int kMemoryCheckIntervalMs = 50;
31 
DefaultParams()32 BenchmarkParams BenchmarkModel::DefaultParams() {
33   BenchmarkParams params;
34   params.AddParam("num_runs", BenchmarkParam::Create<int32_t>(50));
35   params.AddParam("min_secs", BenchmarkParam::Create<float>(1.0f));
36   params.AddParam("max_secs", BenchmarkParam::Create<float>(150.0f));
37   params.AddParam("run_delay", BenchmarkParam::Create<float>(-1.0f));
38   params.AddParam("run_frequency", BenchmarkParam::Create<float>(-1.0f));
39   params.AddParam("num_threads", BenchmarkParam::Create<int32_t>(-1));
40   params.AddParam("use_caching", BenchmarkParam::Create<bool>(false));
41   params.AddParam("benchmark_name", BenchmarkParam::Create<std::string>(""));
42   params.AddParam("output_prefix", BenchmarkParam::Create<std::string>(""));
43   params.AddParam("warmup_runs", BenchmarkParam::Create<int32_t>(1));
44   params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
45   params.AddParam("verbose", BenchmarkParam::Create<bool>(false));
46   params.AddParam("dry_run", BenchmarkParam::Create<bool>(false));
47   params.AddParam("report_peak_memory_footprint",
48                   BenchmarkParam::Create<bool>(false));
49   params.AddParam("memory_footprint_check_interval_ms",
50                   BenchmarkParam::Create<int32_t>(kMemoryCheckIntervalMs));
51   return params;
52 }
53 
BenchmarkModel()54 BenchmarkModel::BenchmarkModel() : params_(DefaultParams()) {}
55 
OnBenchmarkEnd(const BenchmarkResults & results)56 void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
57   auto inference_us = results.inference_time_us();
58   auto init_us = results.startup_latency_us();
59   auto warmup_us = results.warmup_time_us();
60   auto init_mem_usage = results.init_mem_usage();
61   auto overall_mem_usage = results.overall_mem_usage();
62   TFLITE_LOG(INFO) << "Inference timings in us: "
63                    << "Init: " << init_us << ", "
64                    << "First inference: " << warmup_us.first() << ", "
65                    << "Warmup (avg): " << warmup_us.avg() << ", "
66                    << "Inference (avg): " << inference_us.avg();
67 
68   if (!init_mem_usage.IsSupported()) return;
69   TFLITE_LOG(INFO)
70       << "Note: as the benchmark tool itself affects memory footprint, the "
71          "following is only APPROXIMATE to the actual memory footprint of the "
72          "model at runtime. Take the information at your discretion.";
73   TFLITE_LOG(INFO) << "Memory footprint delta from the start of the tool (MB): "
74                    << "init=" << init_mem_usage.max_rss_kb / 1024.0
75                    << " overall=" << overall_mem_usage.max_rss_kb / 1024.0;
76 
77   auto peak_mem_mb = results.peak_mem_mb();
78   if (peak_mem_mb > 0) {
79     TFLITE_LOG(INFO)
80         << "Overall peak memory footprint (MB) via periodic monitoring: "
81         << peak_mem_mb;
82   }
83 }
84 
GetFlags()85 std::vector<Flag> BenchmarkModel::GetFlags() {
86   return {
87       CreateFlag<int32_t>(
88           "num_runs", &params_,
89           "expected number of runs, see also min_secs, max_secs"),
90       CreateFlag<float>(
91           "min_secs", &params_,
92           "minimum number of seconds to rerun for, potentially making the "
93           "actual number of runs to be greater than num_runs"),
94       CreateFlag<float>(
95           "max_secs", &params_,
96           "maximum number of seconds to rerun for, potentially making the "
97           "actual number of runs to be less than num_runs. Note if --max-secs "
98           "is exceeded in the middle of a run, the benchmark will continue to "
99           "the end of the run but will not start the next run."),
100       CreateFlag<float>("run_delay", &params_, "delay between runs in seconds"),
101       CreateFlag<float>(
102           "run_frequency", &params_,
103           "Execute at a fixed frequency, instead of a fixed delay."
104           "Note if the targeted rate per second cannot be reached, the "
105           "benchmark would start the next run immediately, trying its best to "
106           "catch up. If set, this will override run_delay."),
107       CreateFlag<int32_t>("num_threads", &params_, "number of threads"),
108       CreateFlag<bool>(
109           "use_caching", &params_,
110           "Enable caching of prepacked weights matrices in matrix "
111           "multiplication routines. Currently implies the use of the Ruy "
112           "library."),
113       CreateFlag<std::string>("benchmark_name", &params_, "benchmark name"),
114       CreateFlag<std::string>("output_prefix", &params_,
115                               "benchmark output prefix"),
116       CreateFlag<int32_t>(
117           "warmup_runs", &params_,
118           "minimum number of runs performed on initialization, to "
119           "allow performance characteristics to settle, see also "
120           "warmup_min_secs"),
121       CreateFlag<float>(
122           "warmup_min_secs", &params_,
123           "minimum number of seconds to rerun for, potentially making the "
124           "actual number of warm-up runs to be greater than warmup_runs"),
125       CreateFlag<bool>("verbose", &params_,
126                        "Whether to log parameters whose values are not set. "
127                        "By default, only log those parameters that are set by "
128                        "parsing their values from the commandline flags."),
129       CreateFlag<bool>("dry_run", &params_,
130                        "Whether to run the tool just with simply loading the "
131                        "model, allocating tensors etc. but without actually "
132                        "invoking any op kernels."),
133       CreateFlag<bool>(
134           "report_peak_memory_footprint", &params_,
135           "Report the peak memory footprint by periodically checking the "
136           "memory footprint. Internally, a separate thread will be spawned for "
137           "this periodic check. Therefore, the performance benchmark result "
138           "could be affected."),
139       CreateFlag<int32_t>("memory_footprint_check_interval_ms", &params_,
140                           "The interval in millisecond between two consecutive "
141                           "memory footprint checks. This is only used when "
142                           "--report_peak_memory_footprint is set to true.")};
143 }
144 
LogParams()145 void BenchmarkModel::LogParams() {
146   const bool verbose = params_.Get<bool>("verbose");
147   TFLITE_LOG(INFO) << "Log parameter values verbosely: [" << verbose << "]";
148 
149   LOG_BENCHMARK_PARAM(int32_t, "num_runs", "Min num runs", verbose);
150   LOG_BENCHMARK_PARAM(float, "min_secs", "Min runs duration (seconds)",
151                       verbose);
152   LOG_BENCHMARK_PARAM(float, "max_secs", "Max runs duration (seconds)",
153                       verbose);
154   LOG_BENCHMARK_PARAM(float, "run_delay", "Inter-run delay (seconds)", verbose);
155   LOG_BENCHMARK_PARAM(float, "run_frequency",
156                       "Number of prorated runs per second", verbose);
157   LOG_BENCHMARK_PARAM(int32_t, "num_threads", "Num threads", verbose);
158   LOG_BENCHMARK_PARAM(bool, "use_caching", "Use caching", verbose);
159   LOG_BENCHMARK_PARAM(std::string, "benchmark_name", "Benchmark name", verbose);
160   LOG_BENCHMARK_PARAM(std::string, "output_prefix", "Output prefix", verbose);
161   LOG_BENCHMARK_PARAM(int32_t, "warmup_runs", "Min warmup runs", verbose);
162   LOG_BENCHMARK_PARAM(float, "warmup_min_secs",
163                       "Min warmup runs duration (seconds)", verbose);
164   LOG_BENCHMARK_PARAM(bool, "dry_run", "Run w/o invoking kernels", verbose);
165   LOG_BENCHMARK_PARAM(bool, "report_peak_memory_footprint",
166                       "Report the peak memory footprint", verbose);
167   LOG_BENCHMARK_PARAM(int32_t, "memory_footprint_check_interval_ms",
168                       "Memory footprint check interval (ms)", verbose);
169 }
170 
PrepareInputData()171 TfLiteStatus BenchmarkModel::PrepareInputData() { return kTfLiteOk; }
172 
ResetInputsAndOutputs()173 TfLiteStatus BenchmarkModel::ResetInputsAndOutputs() { return kTfLiteOk; }
174 
Run(int min_num_times,float min_secs,float max_secs,RunType run_type,TfLiteStatus * invoke_status)175 Stat<int64_t> BenchmarkModel::Run(int min_num_times, float min_secs,
176                                   float max_secs, RunType run_type,
177                                   TfLiteStatus* invoke_status) {
178   Stat<int64_t> run_stats;
179   TFLITE_LOG(INFO) << "Running benchmark for at least " << min_num_times
180                    << " iterations and at least " << min_secs << " seconds but"
181                    << " terminate if exceeding " << max_secs << " seconds.";
182   int64_t now_us = profiling::time::NowMicros();
183   int64_t min_finish_us = now_us + static_cast<int64_t>(min_secs * 1.e6f);
184   int64_t max_finish_us = now_us + static_cast<int64_t>(max_secs * 1.e6f);
185 
186   *invoke_status = kTfLiteOk;
187   float inter_run_sleep_time = params_.Get<float>("run_delay");
188   auto run_frequency = params_.Get<float>("run_frequency");
189   double manual_inter_run_gap = 1.0 / run_frequency;
190   // float doesn't have sufficient precision for storing this number
191   double next_run_finish_time = now_us * 1e-6 + manual_inter_run_gap;
192   for (int run = 0; (run < min_num_times || now_us < min_finish_us) &&
193                     now_us <= max_finish_us;
194        run++) {
195     ResetInputsAndOutputs();
196     listeners_.OnSingleRunStart(run_type);
197     int64_t start_us = profiling::time::NowMicros();
198     TfLiteStatus status = RunImpl();
199     int64_t end_us = profiling::time::NowMicros();
200     listeners_.OnSingleRunEnd();
201 
202     run_stats.UpdateStat(end_us - start_us);
203     if (run_frequency > 0) {
204       inter_run_sleep_time =
205           next_run_finish_time - profiling::time::NowMicros() * 1e-6;
206       next_run_finish_time += manual_inter_run_gap;
207     }
208     // Note when "inter_run_sleep_time" is negative or 0.0,
209     // the function will return immediately.
210     util::SleepForSeconds(inter_run_sleep_time);
211     now_us = profiling::time::NowMicros();
212 
213     if (status != kTfLiteOk) {
214       *invoke_status = status;
215     }
216   }
217 
218   std::stringstream stream;
219   run_stats.OutputToStream(&stream);
220   TFLITE_LOG(INFO) << stream.str() << std::endl;
221 
222   return run_stats;
223 }
224 
ValidateParams()225 TfLiteStatus BenchmarkModel::ValidateParams() {
226   if (params_.Get<bool>("report_peak_memory_footprint")) {
227     const int32_t interval =
228         params_.Get<int32_t>("memory_footprint_check_interval_ms");
229     if (interval <= 0) {
230       TFLITE_LOG(WARN) << "--memory_footprint_check_interval_ms is set to "
231                        << interval
232                        << " (ms), This value is invalid, and it will be set to "
233                           "the default value "
234                        << kMemoryCheckIntervalMs << " (ms).";
235       params_.Set<int32_t>("memory_footprint_check_interval_ms",
236                            kMemoryCheckIntervalMs);
237     }
238   }
239   return kTfLiteOk;
240 }
241 
Run(int argc,char ** argv)242 TfLiteStatus BenchmarkModel::Run(int argc, char** argv) {
243   TF_LITE_ENSURE_STATUS(ParseFlags(argc, argv));
244   return Run();
245 }
246 
Run()247 TfLiteStatus BenchmarkModel::Run() {
248   TF_LITE_ENSURE_STATUS(ValidateParams());
249 
250   LogParams();
251 
252   auto peak_memory_reporter = MayCreateMemoryUsageMonitor();
253   if (peak_memory_reporter != nullptr) peak_memory_reporter->Start();
254   const double model_size_mb = MayGetModelFileSize() / 1e6;
255   const auto start_mem_usage = profiling::memory::GetMemoryUsage();
256   int64_t initialization_start_us = profiling::time::NowMicros();
257   TF_LITE_ENSURE_STATUS(Init());
258   const auto init_end_mem_usage = profiling::memory::GetMemoryUsage();
259   int64_t initialization_end_us = profiling::time::NowMicros();
260   int64_t startup_latency_us = initialization_end_us - initialization_start_us;
261   const auto init_mem_usage = init_end_mem_usage - start_mem_usage;
262 
263   if (model_size_mb > 0) {
264     TFLITE_LOG(INFO) << "The input model file size (MB): " << model_size_mb;
265   }
266   TFLITE_LOG(INFO) << "Initialized session in " << startup_latency_us / 1e3
267                    << "ms.";
268 
269   TF_LITE_ENSURE_STATUS(PrepareInputData());
270 
271   TfLiteStatus status = kTfLiteOk;
272   uint64_t input_bytes = ComputeInputBytes();
273 
274   // Overwrite certain parameters when --dry_run=true is set.
275   if (params_.Get<bool>("dry_run")) {
276     params_.Set("warmup_runs", 0);
277     params_.Set("warmup_min_secs", -1.0f);
278     params_.Set("num_runs", 0);
279     params_.Set("min_secs", -1.0f);
280   }
281 
282   listeners_.OnBenchmarkStart(params_);
283   Stat<int64_t> warmup_time_us =
284       Run(params_.Get<int32_t>("warmup_runs"),
285           params_.Get<float>("warmup_min_secs"), params_.Get<float>("max_secs"),
286           WARMUP, &status);
287   if (status != kTfLiteOk) {
288     return status;
289   }
290 
291   Stat<int64_t> inference_time_us =
292       Run(params_.Get<int32_t>("num_runs"), params_.Get<float>("min_secs"),
293           params_.Get<float>("max_secs"), REGULAR, &status);
294   const auto overall_mem_usage =
295       profiling::memory::GetMemoryUsage() - start_mem_usage;
296 
297   float peak_mem_mb = profiling::memory::MemoryUsageMonitor::kInvalidMemUsageMB;
298   if (peak_memory_reporter != nullptr) {
299     peak_memory_reporter->Stop();
300     peak_mem_mb = peak_memory_reporter->GetPeakMemUsageInMB();
301   }
302 
303   listeners_.OnBenchmarkEnd({model_size_mb, startup_latency_us, input_bytes,
304                              warmup_time_us, inference_time_us, init_mem_usage,
305                              overall_mem_usage, peak_mem_mb});
306   return status;
307 }
308 
ParseFlags(int * argc,char ** argv)309 TfLiteStatus BenchmarkModel::ParseFlags(int* argc, char** argv) {
310   auto flag_list = GetFlags();
311   const bool parse_result =
312       Flags::Parse(argc, const_cast<const char**>(argv), flag_list);
313   // "--help" flag is added in tools/delegates/default_execution_provider.cc. As
314   // this is an optional dependency, we need to check whether "--help" exists or
315   // not first.
316   if (!parse_result ||
317       (params_.HasParam("help") && params_.Get<bool>("help"))) {
318     std::string usage = Flags::Usage(argv[0], flag_list);
319     TFLITE_LOG(ERROR) << usage;
320     // Returning kTfLiteError intentionally when "--help=true" is specified so
321     // that the caller could check the return value to decide stopping the
322     // execution.
323     return kTfLiteError;
324   }
325 
326   std::string unconsumed_args =
327       Flags::ArgsToString(*argc, const_cast<const char**>(argv));
328   if (!unconsumed_args.empty()) {
329     TFLITE_LOG(WARN) << "Unconsumed cmdline flags: " << unconsumed_args;
330   }
331 
332   return kTfLiteOk;
333 }
334 
335 std::unique_ptr<profiling::memory::MemoryUsageMonitor>
MayCreateMemoryUsageMonitor() const336 BenchmarkModel::MayCreateMemoryUsageMonitor() const {
337   if (!params_.Get<bool>("report_peak_memory_footprint")) return nullptr;
338 
339   return std::unique_ptr<profiling::memory::MemoryUsageMonitor>(
340       new profiling::memory::MemoryUsageMonitor(
341           params_.Get<int32_t>("memory_footprint_check_interval_ms")));
342 }
343 
344 }  // namespace benchmark
345 }  // namespace tflite
346