1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/tools/benchmark/benchmark_model.h"
17
18 #include <iostream>
19 #include <sstream>
20
21 #include "tensorflow/lite/profiling/memory_info.h"
22 #include "tensorflow/lite/profiling/time.h"
23 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
24 #include "tensorflow/lite/tools/logging.h"
25
26 namespace tflite {
27 namespace benchmark {
28 using tensorflow::Stat;
29
30 constexpr int kMemoryCheckIntervalMs = 50;
31
DefaultParams()32 BenchmarkParams BenchmarkModel::DefaultParams() {
33 BenchmarkParams params;
34 params.AddParam("num_runs", BenchmarkParam::Create<int32_t>(50));
35 params.AddParam("min_secs", BenchmarkParam::Create<float>(1.0f));
36 params.AddParam("max_secs", BenchmarkParam::Create<float>(150.0f));
37 params.AddParam("run_delay", BenchmarkParam::Create<float>(-1.0f));
38 params.AddParam("run_frequency", BenchmarkParam::Create<float>(-1.0f));
39 params.AddParam("num_threads", BenchmarkParam::Create<int32_t>(-1));
40 params.AddParam("use_caching", BenchmarkParam::Create<bool>(false));
41 params.AddParam("benchmark_name", BenchmarkParam::Create<std::string>(""));
42 params.AddParam("output_prefix", BenchmarkParam::Create<std::string>(""));
43 params.AddParam("warmup_runs", BenchmarkParam::Create<int32_t>(1));
44 params.AddParam("warmup_min_secs", BenchmarkParam::Create<float>(0.5f));
45 params.AddParam("verbose", BenchmarkParam::Create<bool>(false));
46 params.AddParam("dry_run", BenchmarkParam::Create<bool>(false));
47 params.AddParam("report_peak_memory_footprint",
48 BenchmarkParam::Create<bool>(false));
49 params.AddParam("memory_footprint_check_interval_ms",
50 BenchmarkParam::Create<int32_t>(kMemoryCheckIntervalMs));
51 return params;
52 }
53
BenchmarkModel()54 BenchmarkModel::BenchmarkModel() : params_(DefaultParams()) {}
55
OnBenchmarkEnd(const BenchmarkResults & results)56 void BenchmarkLoggingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
57 auto inference_us = results.inference_time_us();
58 auto init_us = results.startup_latency_us();
59 auto warmup_us = results.warmup_time_us();
60 auto init_mem_usage = results.init_mem_usage();
61 auto overall_mem_usage = results.overall_mem_usage();
62 TFLITE_LOG(INFO) << "Inference timings in us: "
63 << "Init: " << init_us << ", "
64 << "First inference: " << warmup_us.first() << ", "
65 << "Warmup (avg): " << warmup_us.avg() << ", "
66 << "Inference (avg): " << inference_us.avg();
67
68 if (!init_mem_usage.IsSupported()) return;
69 TFLITE_LOG(INFO)
70 << "Note: as the benchmark tool itself affects memory footprint, the "
71 "following is only APPROXIMATE to the actual memory footprint of the "
72 "model at runtime. Take the information at your discretion.";
73 TFLITE_LOG(INFO) << "Memory footprint delta from the start of the tool (MB): "
74 << "init=" << init_mem_usage.max_rss_kb / 1024.0
75 << " overall=" << overall_mem_usage.max_rss_kb / 1024.0;
76
77 auto peak_mem_mb = results.peak_mem_mb();
78 if (peak_mem_mb > 0) {
79 TFLITE_LOG(INFO)
80 << "Overall peak memory footprint (MB) via periodic monitoring: "
81 << peak_mem_mb;
82 }
83 }
84
GetFlags()85 std::vector<Flag> BenchmarkModel::GetFlags() {
86 return {
87 CreateFlag<int32_t>(
88 "num_runs", ¶ms_,
89 "expected number of runs, see also min_secs, max_secs"),
90 CreateFlag<float>(
91 "min_secs", ¶ms_,
92 "minimum number of seconds to rerun for, potentially making the "
93 "actual number of runs to be greater than num_runs"),
94 CreateFlag<float>(
95 "max_secs", ¶ms_,
96 "maximum number of seconds to rerun for, potentially making the "
97 "actual number of runs to be less than num_runs. Note if --max-secs "
98 "is exceeded in the middle of a run, the benchmark will continue to "
99 "the end of the run but will not start the next run."),
100 CreateFlag<float>("run_delay", ¶ms_, "delay between runs in seconds"),
101 CreateFlag<float>(
102 "run_frequency", ¶ms_,
103 "Execute at a fixed frequency, instead of a fixed delay."
104 "Note if the targeted rate per second cannot be reached, the "
105 "benchmark would start the next run immediately, trying its best to "
106 "catch up. If set, this will override run_delay."),
107 CreateFlag<int32_t>("num_threads", ¶ms_, "number of threads"),
108 CreateFlag<bool>(
109 "use_caching", ¶ms_,
110 "Enable caching of prepacked weights matrices in matrix "
111 "multiplication routines. Currently implies the use of the Ruy "
112 "library."),
113 CreateFlag<std::string>("benchmark_name", ¶ms_, "benchmark name"),
114 CreateFlag<std::string>("output_prefix", ¶ms_,
115 "benchmark output prefix"),
116 CreateFlag<int32_t>(
117 "warmup_runs", ¶ms_,
118 "minimum number of runs performed on initialization, to "
119 "allow performance characteristics to settle, see also "
120 "warmup_min_secs"),
121 CreateFlag<float>(
122 "warmup_min_secs", ¶ms_,
123 "minimum number of seconds to rerun for, potentially making the "
124 "actual number of warm-up runs to be greater than warmup_runs"),
125 CreateFlag<bool>("verbose", ¶ms_,
126 "Whether to log parameters whose values are not set. "
127 "By default, only log those parameters that are set by "
128 "parsing their values from the commandline flags."),
129 CreateFlag<bool>("dry_run", ¶ms_,
130 "Whether to run the tool just with simply loading the "
131 "model, allocating tensors etc. but without actually "
132 "invoking any op kernels."),
133 CreateFlag<bool>(
134 "report_peak_memory_footprint", ¶ms_,
135 "Report the peak memory footprint by periodically checking the "
136 "memory footprint. Internally, a separate thread will be spawned for "
137 "this periodic check. Therefore, the performance benchmark result "
138 "could be affected."),
139 CreateFlag<int32_t>("memory_footprint_check_interval_ms", ¶ms_,
140 "The interval in millisecond between two consecutive "
141 "memory footprint checks. This is only used when "
142 "--report_peak_memory_footprint is set to true.")};
143 }
144
LogParams()145 void BenchmarkModel::LogParams() {
146 const bool verbose = params_.Get<bool>("verbose");
147 TFLITE_LOG(INFO) << "Log parameter values verbosely: [" << verbose << "]";
148
149 LOG_BENCHMARK_PARAM(int32_t, "num_runs", "Min num runs", verbose);
150 LOG_BENCHMARK_PARAM(float, "min_secs", "Min runs duration (seconds)",
151 verbose);
152 LOG_BENCHMARK_PARAM(float, "max_secs", "Max runs duration (seconds)",
153 verbose);
154 LOG_BENCHMARK_PARAM(float, "run_delay", "Inter-run delay (seconds)", verbose);
155 LOG_BENCHMARK_PARAM(float, "run_frequency",
156 "Number of prorated runs per second", verbose);
157 LOG_BENCHMARK_PARAM(int32_t, "num_threads", "Num threads", verbose);
158 LOG_BENCHMARK_PARAM(bool, "use_caching", "Use caching", verbose);
159 LOG_BENCHMARK_PARAM(std::string, "benchmark_name", "Benchmark name", verbose);
160 LOG_BENCHMARK_PARAM(std::string, "output_prefix", "Output prefix", verbose);
161 LOG_BENCHMARK_PARAM(int32_t, "warmup_runs", "Min warmup runs", verbose);
162 LOG_BENCHMARK_PARAM(float, "warmup_min_secs",
163 "Min warmup runs duration (seconds)", verbose);
164 LOG_BENCHMARK_PARAM(bool, "dry_run", "Run w/o invoking kernels", verbose);
165 LOG_BENCHMARK_PARAM(bool, "report_peak_memory_footprint",
166 "Report the peak memory footprint", verbose);
167 LOG_BENCHMARK_PARAM(int32_t, "memory_footprint_check_interval_ms",
168 "Memory footprint check interval (ms)", verbose);
169 }
170
PrepareInputData()171 TfLiteStatus BenchmarkModel::PrepareInputData() { return kTfLiteOk; }
172
ResetInputsAndOutputs()173 TfLiteStatus BenchmarkModel::ResetInputsAndOutputs() { return kTfLiteOk; }
174
Run(int min_num_times,float min_secs,float max_secs,RunType run_type,TfLiteStatus * invoke_status)175 Stat<int64_t> BenchmarkModel::Run(int min_num_times, float min_secs,
176 float max_secs, RunType run_type,
177 TfLiteStatus* invoke_status) {
178 Stat<int64_t> run_stats;
179 TFLITE_LOG(INFO) << "Running benchmark for at least " << min_num_times
180 << " iterations and at least " << min_secs << " seconds but"
181 << " terminate if exceeding " << max_secs << " seconds.";
182 int64_t now_us = profiling::time::NowMicros();
183 int64_t min_finish_us = now_us + static_cast<int64_t>(min_secs * 1.e6f);
184 int64_t max_finish_us = now_us + static_cast<int64_t>(max_secs * 1.e6f);
185
186 *invoke_status = kTfLiteOk;
187 float inter_run_sleep_time = params_.Get<float>("run_delay");
188 auto run_frequency = params_.Get<float>("run_frequency");
189 double manual_inter_run_gap = 1.0 / run_frequency;
190 // float doesn't have sufficient precision for storing this number
191 double next_run_finish_time = now_us * 1e-6 + manual_inter_run_gap;
192 for (int run = 0; (run < min_num_times || now_us < min_finish_us) &&
193 now_us <= max_finish_us;
194 run++) {
195 ResetInputsAndOutputs();
196 listeners_.OnSingleRunStart(run_type);
197 int64_t start_us = profiling::time::NowMicros();
198 TfLiteStatus status = RunImpl();
199 int64_t end_us = profiling::time::NowMicros();
200 listeners_.OnSingleRunEnd();
201
202 run_stats.UpdateStat(end_us - start_us);
203 if (run_frequency > 0) {
204 inter_run_sleep_time =
205 next_run_finish_time - profiling::time::NowMicros() * 1e-6;
206 next_run_finish_time += manual_inter_run_gap;
207 }
208 // Note when "inter_run_sleep_time" is negative or 0.0,
209 // the function will return immediately.
210 util::SleepForSeconds(inter_run_sleep_time);
211 now_us = profiling::time::NowMicros();
212
213 if (status != kTfLiteOk) {
214 *invoke_status = status;
215 }
216 }
217
218 std::stringstream stream;
219 run_stats.OutputToStream(&stream);
220 TFLITE_LOG(INFO) << stream.str() << std::endl;
221
222 return run_stats;
223 }
224
ValidateParams()225 TfLiteStatus BenchmarkModel::ValidateParams() {
226 if (params_.Get<bool>("report_peak_memory_footprint")) {
227 const int32_t interval =
228 params_.Get<int32_t>("memory_footprint_check_interval_ms");
229 if (interval <= 0) {
230 TFLITE_LOG(WARN) << "--memory_footprint_check_interval_ms is set to "
231 << interval
232 << " (ms), This value is invalid, and it will be set to "
233 "the default value "
234 << kMemoryCheckIntervalMs << " (ms).";
235 params_.Set<int32_t>("memory_footprint_check_interval_ms",
236 kMemoryCheckIntervalMs);
237 }
238 }
239 return kTfLiteOk;
240 }
241
Run(int argc,char ** argv)242 TfLiteStatus BenchmarkModel::Run(int argc, char** argv) {
243 TF_LITE_ENSURE_STATUS(ParseFlags(argc, argv));
244 return Run();
245 }
246
Run()247 TfLiteStatus BenchmarkModel::Run() {
248 TF_LITE_ENSURE_STATUS(ValidateParams());
249
250 LogParams();
251
252 auto peak_memory_reporter = MayCreateMemoryUsageMonitor();
253 if (peak_memory_reporter != nullptr) peak_memory_reporter->Start();
254 const double model_size_mb = MayGetModelFileSize() / 1e6;
255 const auto start_mem_usage = profiling::memory::GetMemoryUsage();
256 int64_t initialization_start_us = profiling::time::NowMicros();
257 TF_LITE_ENSURE_STATUS(Init());
258 const auto init_end_mem_usage = profiling::memory::GetMemoryUsage();
259 int64_t initialization_end_us = profiling::time::NowMicros();
260 int64_t startup_latency_us = initialization_end_us - initialization_start_us;
261 const auto init_mem_usage = init_end_mem_usage - start_mem_usage;
262
263 if (model_size_mb > 0) {
264 TFLITE_LOG(INFO) << "The input model file size (MB): " << model_size_mb;
265 }
266 TFLITE_LOG(INFO) << "Initialized session in " << startup_latency_us / 1e3
267 << "ms.";
268
269 TF_LITE_ENSURE_STATUS(PrepareInputData());
270
271 TfLiteStatus status = kTfLiteOk;
272 uint64_t input_bytes = ComputeInputBytes();
273
274 // Overwrite certain parameters when --dry_run=true is set.
275 if (params_.Get<bool>("dry_run")) {
276 params_.Set("warmup_runs", 0);
277 params_.Set("warmup_min_secs", -1.0f);
278 params_.Set("num_runs", 0);
279 params_.Set("min_secs", -1.0f);
280 }
281
282 listeners_.OnBenchmarkStart(params_);
283 Stat<int64_t> warmup_time_us =
284 Run(params_.Get<int32_t>("warmup_runs"),
285 params_.Get<float>("warmup_min_secs"), params_.Get<float>("max_secs"),
286 WARMUP, &status);
287 if (status != kTfLiteOk) {
288 return status;
289 }
290
291 Stat<int64_t> inference_time_us =
292 Run(params_.Get<int32_t>("num_runs"), params_.Get<float>("min_secs"),
293 params_.Get<float>("max_secs"), REGULAR, &status);
294 const auto overall_mem_usage =
295 profiling::memory::GetMemoryUsage() - start_mem_usage;
296
297 float peak_mem_mb = profiling::memory::MemoryUsageMonitor::kInvalidMemUsageMB;
298 if (peak_memory_reporter != nullptr) {
299 peak_memory_reporter->Stop();
300 peak_mem_mb = peak_memory_reporter->GetPeakMemUsageInMB();
301 }
302
303 listeners_.OnBenchmarkEnd({model_size_mb, startup_latency_us, input_bytes,
304 warmup_time_us, inference_time_us, init_mem_usage,
305 overall_mem_usage, peak_mem_mb});
306 return status;
307 }
308
ParseFlags(int * argc,char ** argv)309 TfLiteStatus BenchmarkModel::ParseFlags(int* argc, char** argv) {
310 auto flag_list = GetFlags();
311 const bool parse_result =
312 Flags::Parse(argc, const_cast<const char**>(argv), flag_list);
313 // "--help" flag is added in tools/delegates/default_execution_provider.cc. As
314 // this is an optional dependency, we need to check whether "--help" exists or
315 // not first.
316 if (!parse_result ||
317 (params_.HasParam("help") && params_.Get<bool>("help"))) {
318 std::string usage = Flags::Usage(argv[0], flag_list);
319 TFLITE_LOG(ERROR) << usage;
320 // Returning kTfLiteError intentionally when "--help=true" is specified so
321 // that the caller could check the return value to decide stopping the
322 // execution.
323 return kTfLiteError;
324 }
325
326 std::string unconsumed_args =
327 Flags::ArgsToString(*argc, const_cast<const char**>(argv));
328 if (!unconsumed_args.empty()) {
329 TFLITE_LOG(WARN) << "Unconsumed cmdline flags: " << unconsumed_args;
330 }
331
332 return kTfLiteOk;
333 }
334
335 std::unique_ptr<profiling::memory::MemoryUsageMonitor>
MayCreateMemoryUsageMonitor() const336 BenchmarkModel::MayCreateMemoryUsageMonitor() const {
337 if (!params_.Get<bool>("report_peak_memory_footprint")) return nullptr;
338
339 return std::unique_ptr<profiling::memory::MemoryUsageMonitor>(
340 new profiling::memory::MemoryUsageMonitor(
341 params_.Get<int32_t>("memory_footprint_check_interval_ms")));
342 }
343
344 } // namespace benchmark
345 } // namespace tflite
346