1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
17
18 #include <cstdarg>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <fstream>
22 #include <iostream>
23 #include <memory>
24 #include <random>
25 #include <string>
26 #include <unordered_set>
27 #include <vector>
28
29 #include "absl/base/attributes.h"
30 #include "absl/strings/numbers.h"
31 #include "absl/strings/str_split.h"
32 #include "ruy/profiler/profiler.h" // from @ruy
33 #include "tensorflow/lite/c/common.h"
34 #include "tensorflow/lite/core/subgraph.h"
35 #include "tensorflow/lite/kernels/cpu_backend_context.h"
36 #include "tensorflow/lite/kernels/register.h"
37 #include "tensorflow/lite/model.h"
38 #include "tensorflow/lite/op_resolver.h"
39 #include "tensorflow/lite/optional_debug_tools.h"
40 #include "tensorflow/lite/profiling/profile_summary_formatter.h"
41 #include "tensorflow/lite/string_util.h"
42 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
43 #include "tensorflow/lite/tools/benchmark/profiling_listener.h"
44 #include "tensorflow/lite/tools/delegates/delegate_provider.h"
45 #include "tensorflow/lite/tools/logging.h"
46
47 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
48
49 // Version with Weak linker attribute doing nothing: if someone links this
50 // library with another definition of this function (presumably to actually
51 // register custom ops), that version will be used instead.
52 void ABSL_ATTRIBUTE_WEAK
RegisterSelectedOps(::tflite::MutableOpResolver * resolver)53 RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {}
54
55 namespace tflite {
56 namespace benchmark {
57 namespace {
58
59 // Backward compat with previous approach to enabling op profiling.
60 #if defined(TFLITE_PROFILING_ENABLED)
61 constexpr bool kOpProfilingEnabledDefault = true;
62 #else
63 constexpr bool kOpProfilingEnabledDefault = false;
64 #endif
65
66 // Dumps ruy profiling events if the ruy profiler is enabled.
67 class RuyProfileListener : public BenchmarkListener {
68 public:
69 void OnBenchmarkStart(const BenchmarkParams& params) override;
70
71 void OnBenchmarkEnd(const BenchmarkResults& results) override;
72
73 private:
74 std::unique_ptr<ruy::profiler::ScopeProfile> ruy_profile_;
75 };
76
OnBenchmarkStart(const BenchmarkParams & params)77 void RuyProfileListener::OnBenchmarkStart(const BenchmarkParams& params) {
78 ruy_profile_.reset(new ruy::profiler::ScopeProfile);
79 }
80
OnBenchmarkEnd(const BenchmarkResults & results)81 void RuyProfileListener::OnBenchmarkEnd(const BenchmarkResults& results) {
82 ruy_profile_ = nullptr;
83 }
84
85 class InterpreterStatePrinter : public BenchmarkListener {
86 public:
InterpreterStatePrinter(Interpreter * interpreter)87 explicit InterpreterStatePrinter(Interpreter* interpreter)
88 : interpreter_(interpreter) {}
89
OnBenchmarkStart(const BenchmarkParams & params)90 void OnBenchmarkStart(const BenchmarkParams& params) override {
91 params_ = ¶ms;
92 if (params_->Get<bool>("print_preinvoke_state")) {
93 TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter pre-invoke "
94 "state begins====";
95 tflite::PrintInterpreterState(interpreter_);
96 TFLITE_LOG(INFO) << "====Printing out TfLite interpreter pre-invoke "
97 "state ends====\n";
98 }
99 }
100
OnBenchmarkEnd(const BenchmarkResults & results)101 void OnBenchmarkEnd(const BenchmarkResults& results) override {
102 if (params_->Get<bool>("print_postinvoke_state")) {
103 TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter post-invoke "
104 "state begins====";
105 tflite::PrintInterpreterState(interpreter_);
106 TFLITE_LOG(INFO) << "====Printing out TfLite interpreter post-invoke "
107 "state ends====\n";
108 }
109 }
110
111 private:
112 Interpreter* const interpreter_ = nullptr; // not own the memory.
113 const BenchmarkParams* params_ = nullptr; // not own the memory.
114 };
115
Split(const std::string & str,const char delim)116 std::vector<std::string> Split(const std::string& str, const char delim) {
117 if (str.empty()) {
118 return {};
119 }
120 return absl::StrSplit(str, delim);
121 }
122
GetNumElements(const TfLiteIntArray * dim_array)123 int GetNumElements(const TfLiteIntArray* dim_array) {
124 int num_elements = 1;
125 for (size_t i = 0; i < dim_array->size; i++) {
126 num_elements *= dim_array->data[i];
127 }
128 return num_elements;
129 }
130
FillRandomString(tflite::DynamicBuffer * buffer,const TfLiteIntArray * dim_array,const std::function<std::string ()> & random_func)131 void FillRandomString(tflite::DynamicBuffer* buffer,
132 const TfLiteIntArray* dim_array,
133 const std::function<std::string()>& random_func) {
134 int num_elements = GetNumElements(dim_array);
135 for (int i = 0; i < num_elements; ++i) {
136 auto str = random_func();
137 buffer->AddString(str.data(), str.length());
138 }
139 }
140
FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info,const std::string & input_name,const string & names_string)141 int FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info,
142 const std::string& input_name,
143 const string& names_string) {
144 for (int i = 0; i < info->size(); ++i) {
145 if (info->at(i).name == input_name) {
146 return i;
147 }
148 }
149 TFLITE_LOG(FATAL) << "Cannot find the corresponding input_layer name("
150 << input_name << ") in --input_layer as " << names_string;
151 return -1;
152 }
153
PopulateInputValueRanges(const std::string & names_string,const std::string & value_ranges_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)154 TfLiteStatus PopulateInputValueRanges(
155 const std::string& names_string, const std::string& value_ranges_string,
156 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
157 std::vector<std::string> value_ranges = Split(value_ranges_string, ':');
158 for (const auto& val : value_ranges) {
159 std::vector<std::string> name_range = Split(val, ',');
160 if (name_range.size() != 3) {
161 TFLITE_LOG(ERROR) << "Wrong input value range item specified: " << val;
162 return kTfLiteError;
163 }
164
165 // Ensure the specific input layer name exists.
166 int layer_info_idx = FindLayerInfoIndex(info, name_range[0], names_string);
167
168 // Parse the range value.
169 int low, high;
170 bool has_low = absl::SimpleAtoi(name_range[1], &low);
171 bool has_high = absl::SimpleAtoi(name_range[2], &high);
172 if (!has_low || !has_high || low > high) {
173 TFLITE_LOG(ERROR)
174 << "Wrong low and high value of the input value range specified: "
175 << val;
176 return kTfLiteError;
177 }
178 info->at(layer_info_idx).has_value_range = true;
179 info->at(layer_info_idx).low = low;
180 info->at(layer_info_idx).high = high;
181 }
182 return kTfLiteOk;
183 }
184
PopulateInputValueFiles(const std::string & names_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)185 TfLiteStatus PopulateInputValueFiles(
186 const std::string& names_string, const std::string& value_files_string,
187 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
188 std::vector<std::string> value_files = Split(value_files_string, ',');
189 for (const auto& val : value_files) {
190 std::vector<std::string> name_file = Split(val, ':');
191 if (name_file.size() != 2) {
192 TFLITE_LOG(ERROR) << "Wrong input value file item specified: " << val;
193 return kTfLiteError;
194 }
195
196 // Ensure the specific input layer name exists.
197 int layer_info_idx = FindLayerInfoIndex(info, name_file[0], names_string);
198 if (info->at(layer_info_idx).has_value_range) {
199 TFLITE_LOG(WARN)
200 << "The input_name:" << info->at(layer_info_idx).name
201 << " appears both in input_layer_value_files and "
202 "input_layer_value_range. The input_layer_value_range of the "
203 "input_name will be ignored.";
204 }
205 info->at(layer_info_idx).input_file_path = name_file[1];
206 }
207 return kTfLiteOk;
208 }
209
PopulateInputLayerInfo(const std::string & names_string,const std::string & shapes_string,const std::string & value_ranges_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)210 TfLiteStatus PopulateInputLayerInfo(
211 const std::string& names_string, const std::string& shapes_string,
212 const std::string& value_ranges_string,
213 const std::string& value_files_string,
214 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
215 info->clear();
216 std::vector<std::string> names = Split(names_string, ',');
217 std::vector<std::string> shapes = Split(shapes_string, ':');
218
219 if (names.size() != shapes.size()) {
220 TFLITE_LOG(ERROR) << "The number of items in"
221 << " --input_layer_shape (" << shapes_string << ", with "
222 << shapes.size() << " items)"
223 << " must match the number of items in"
224 << " --input_layer (" << names_string << ", with "
225 << names.size() << " items)."
226 << " For example --input_layer=input1,input2"
227 << " --input_layer_shape=1,224,224,4:1,20";
228 return kTfLiteError;
229 }
230
231 for (int i = 0; i < names.size(); ++i) {
232 info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
233 BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
234
235 input.name = names[i];
236
237 TFLITE_TOOLS_CHECK(util::SplitAndParse(shapes[i], ',', &input.shape))
238 << "Incorrect size string specified: " << shapes[i];
239 for (int dim : input.shape) {
240 if (dim == -1) {
241 TFLITE_LOG(ERROR)
242 << "Any unknown sizes in the shapes (-1's) must be replaced"
243 << " with the size you want to benchmark with.";
244 return kTfLiteError;
245 }
246 }
247 }
248
249 // Populate input value range if it's specified.
250 TF_LITE_ENSURE_STATUS(
251 PopulateInputValueRanges(names_string, value_ranges_string, info));
252
253 // Populate input value files if it's specified.
254 TF_LITE_ENSURE_STATUS(
255 PopulateInputValueFiles(names_string, value_files_string, info));
256
257 return kTfLiteOk;
258 }
259
260 std::shared_ptr<profiling::ProfileSummaryFormatter>
CreateProfileSummaryFormatter(bool format_as_csv)261 CreateProfileSummaryFormatter(bool format_as_csv) {
262 return format_as_csv
263 ? std::make_shared<profiling::ProfileSummaryCSVFormatter>()
264 : std::make_shared<profiling::ProfileSummaryDefaultFormatter>();
265 }
266
267 } // namespace
268
DefaultParams()269 BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
270 BenchmarkParams default_params = BenchmarkModel::DefaultParams();
271 default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
272 default_params.AddParam("input_layer",
273 BenchmarkParam::Create<std::string>(""));
274 default_params.AddParam("input_layer_shape",
275 BenchmarkParam::Create<std::string>(""));
276 default_params.AddParam("input_layer_value_range",
277 BenchmarkParam::Create<std::string>(""));
278 default_params.AddParam("input_layer_value_files",
279 BenchmarkParam::Create<std::string>(""));
280 default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
281 default_params.AddParam("require_full_delegation",
282 BenchmarkParam::Create<bool>(false));
283 default_params.AddParam(
284 "enable_op_profiling",
285 BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
286 default_params.AddParam("max_profiling_buffer_entries",
287 BenchmarkParam::Create<int32_t>(1024));
288 default_params.AddParam("profiling_output_csv_file",
289 BenchmarkParam::Create<std::string>(""));
290
291 default_params.AddParam("print_preinvoke_state",
292 BenchmarkParam::Create<bool>(false));
293 default_params.AddParam("print_postinvoke_state",
294 BenchmarkParam::Create<bool>(false));
295
296 tools::ProvidedDelegateList delegate_providers(&default_params);
297 delegate_providers.AddAllDelegateParams();
298
299 return default_params;
300 }
301
BenchmarkTfLiteModel(BenchmarkParams params)302 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
303 : BenchmarkModel(std::move(params)),
304 random_engine_(std::random_device()()) {
305 AddListener(&log_output_);
306 }
307
CleanUp()308 void BenchmarkTfLiteModel::CleanUp() {
309 // Free up any pre-allocated tensor data during PrepareInputData.
310 inputs_data_.clear();
311 }
312
~BenchmarkTfLiteModel()313 BenchmarkTfLiteModel::~BenchmarkTfLiteModel() {
314 CleanUp();
315
316 // Destory the owned interpreter earlier than other objects (specially
317 // 'owned_delegates_').
318 interpreter_.reset();
319 }
320
GetFlags()321 std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
322 std::vector<Flag> flags = BenchmarkModel::GetFlags();
323 std::vector<Flag> specific_flags = {
324 CreateFlag<std::string>("graph", ¶ms_, "graph file name"),
325 CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"),
326 CreateFlag<std::string>("input_layer_shape", ¶ms_,
327 "input layer shape"),
328 CreateFlag<std::string>(
329 "input_layer_value_range", ¶ms_,
330 "A map-like string representing value range for *integer* input "
331 "layers. Each item is separated by ':', and the item value consists "
332 "of input layer name and integer-only range values (both low and "
333 "high are inclusive) separated by ',', e.g. input1,1,2:input2,0,254"),
334 CreateFlag<std::string>(
335 "input_layer_value_files", ¶ms_,
336 "A map-like string representing value file. Each item is separated "
337 "by ',', and the item value consists "
338 "of input layer name and value file path separated by ':', e.g. "
339 "input1:file_path1,input2:file_path2. If the input_name appears both "
340 "in input_layer_value_range and input_layer_value_files, "
341 "input_layer_value_range of the input_name will be ignored. The file "
342 "format is binary and it should be array format or null separated "
343 "strings format."),
344 CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16"),
345 CreateFlag<bool>("require_full_delegation", ¶ms_,
346 "require delegate to run the entire graph"),
347 CreateFlag<bool>("enable_op_profiling", ¶ms_, "enable op profiling"),
348 CreateFlag<int32_t>("max_profiling_buffer_entries", ¶ms_,
349 "max profiling buffer entries"),
350 CreateFlag<std::string>(
351 "profiling_output_csv_file", ¶ms_,
352 "File path to export profile data as CSV, if not set "
353 "prints to stdout."),
354 CreateFlag<bool>(
355 "print_preinvoke_state", ¶ms_,
356 "print out the interpreter internals just before calling Invoke. The "
357 "internals will include allocated memory size of each tensor etc."),
358 CreateFlag<bool>(
359 "print_postinvoke_state", ¶ms_,
360 "print out the interpreter internals just before benchmark completes "
361 "(i.e. after all repeated Invoke calls complete). The internals will "
362 "include allocated memory size of each tensor etc.")};
363
364 flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
365
366 tools::ProvidedDelegateList delegate_providers(¶ms_);
367 delegate_providers.AppendCmdlineFlags(&flags);
368
369 return flags;
370 }
371
LogParams()372 void BenchmarkTfLiteModel::LogParams() {
373 BenchmarkModel::LogParams();
374 const bool verbose = params_.Get<bool>("verbose");
375 // Always log the value of --graph.
376 LOG_BENCHMARK_PARAM(std::string, "graph", "Graph", /*verbose*/ true);
377 LOG_BENCHMARK_PARAM(std::string, "input_layer", "Input layers", verbose);
378 LOG_BENCHMARK_PARAM(std::string, "input_layer_shape", "Input shapes",
379 verbose);
380 LOG_BENCHMARK_PARAM(std::string, "input_layer_value_range",
381 "Input value ranges", verbose);
382 LOG_BENCHMARK_PARAM(std::string, "input_layer_value_files",
383 "Input value files", verbose);
384
385 LOG_BENCHMARK_PARAM(bool, "allow_fp16", "Allow fp16", verbose);
386 LOG_BENCHMARK_PARAM(bool, "require_full_delegation",
387 "Require full delegation", verbose);
388 LOG_BENCHMARK_PARAM(bool, "enable_op_profiling", "Enable op profiling",
389 verbose);
390 LOG_BENCHMARK_PARAM(int32_t, "max_profiling_buffer_entries",
391 "Max profiling buffer entries", verbose);
392 LOG_BENCHMARK_PARAM(std::string, "profiling_output_csv_file",
393 "CSV File to export profiling data to", verbose);
394 LOG_BENCHMARK_PARAM(bool, "print_preinvoke_state",
395 "Print pre-invoke interpreter state", verbose);
396 LOG_BENCHMARK_PARAM(bool, "print_postinvoke_state",
397 "Print post-invoke interpreter state", verbose);
398
399 for (const auto& delegate_provider :
400 tools::GetRegisteredDelegateProviders()) {
401 delegate_provider->LogParams(params_, verbose);
402 }
403 }
404
ValidateParams()405 TfLiteStatus BenchmarkTfLiteModel::ValidateParams() {
406 TF_LITE_ENSURE_STATUS(BenchmarkModel::ValidateParams());
407
408 if (params_.Get<std::string>("graph").empty()) {
409 TFLITE_LOG(ERROR)
410 << "Please specify the name of your TF Lite input file with --graph";
411 return kTfLiteError;
412 }
413
414 return PopulateInputLayerInfo(
415 params_.Get<std::string>("input_layer"),
416 params_.Get<std::string>("input_layer_shape"),
417 params_.Get<std::string>("input_layer_value_range"),
418 params_.Get<std::string>("input_layer_value_files"), &inputs_);
419 }
420
ComputeInputBytes()421 uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
422 TFLITE_TOOLS_CHECK(interpreter_);
423 uint64_t total_input_bytes = 0;
424 for (int input : interpreter_->inputs()) {
425 auto* t = interpreter_->tensor(input);
426 total_input_bytes += t->bytes;
427 }
428 return total_input_bytes;
429 }
430
MayGetModelFileSize()431 int64_t BenchmarkTfLiteModel::MayGetModelFileSize() {
432 std::ifstream in_file(params_.Get<std::string>("graph"),
433 std::ios::binary | std::ios::ate);
434 return in_file.tellg();
435 }
436
LoadInputTensorData(const TfLiteTensor & t,const std::string & input_file_path)437 BenchmarkTfLiteModel::InputTensorData BenchmarkTfLiteModel::LoadInputTensorData(
438 const TfLiteTensor& t, const std::string& input_file_path) {
439 std::ifstream value_file(input_file_path, std::ios::binary);
440 if (!value_file.good()) {
441 TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
442 << input_file_path;
443 }
444 InputTensorData t_data;
445 if (t.type == kTfLiteString) {
446 t_data.data = VoidUniquePtr(
447 static_cast<void*>(new tflite::DynamicBuffer()),
448 [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); });
449 std::string line;
450 size_t num_line = 0;
451 // Read the line with the delimiter '\0'.
452 while (std::getline(value_file, line, '\0')) {
453 num_line++;
454 static_cast<DynamicBuffer*>(t_data.data.get())
455 ->AddString(line.data(), line.length());
456 }
457 int num_elements = GetNumElements(t.dims);
458 if (num_line != num_elements) {
459 TFLITE_LOG(FATAL) << "The number of string in the input_layer_value_file("
460 << input_file_path << ") is " << num_line
461 << ". It should be " << num_elements << ".";
462 }
463 } else {
464 value_file.seekg(0, std::ios_base::end);
465 if (value_file.tellg() != t.bytes) {
466 TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is "
467 << value_file.tellg() << " bytes. It should be "
468 << t.bytes << " bytes.";
469 }
470 t_data.bytes = t.bytes;
471 t_data.data =
472 VoidUniquePtr(static_cast<void*>(new char[t.bytes]),
473 [](void* ptr) { delete[] static_cast<char*>(ptr); });
474 value_file.clear();
475 value_file.seekg(0, std::ios_base::beg);
476 value_file.read(static_cast<char*>(t_data.data.get()), t.bytes);
477 }
478 return t_data;
479 }
480
481 BenchmarkTfLiteModel::InputTensorData
CreateRandomTensorData(const TfLiteTensor & t,const InputLayerInfo * layer_info)482 BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t,
483 const InputLayerInfo* layer_info) {
484 bool has_value_range = false;
485 int low_range = 0;
486 int high_range = 0;
487 if (layer_info) {
488 has_value_range = layer_info->has_value_range;
489 low_range = layer_info->low;
490 high_range = layer_info->high;
491 }
492 int num_elements = GetNumElements(t.dims);
493 switch (t.type) {
494 case kTfLiteFloat32: {
495 return CreateInputTensorData<float>(
496 num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f));
497 }
498 case kTfLiteFloat16: {
499 // TODO(b/138843274): Remove this preprocessor guard when bug is fixed.
500 #if TFLITE_ENABLE_FP16_CPU_BENCHMARKS
501 #if __GNUC__ && \
502 (__clang__ || __ARM_FP16_FORMAT_IEEE || __ARM_FP16_FORMAT_ALTERNATIVE)
503 // __fp16 is available on Clang or when __ARM_FP16_FORMAT_* is defined.
504 return CreateInputTensorData<__fp16>(
505 num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f));
506 #else
507 TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
508 << " of type FLOAT16 on this platform.";
509 #endif
510 #else
511 // You need to build with -DTFLITE_ENABLE_FP16_CPU_BENCHMARKS=1 using a
512 // compiler that supports __fp16 type. Note: when using Clang and *not*
513 // linking with compiler-rt, a definition of __gnu_h2f_ieee and
514 // __gnu_f2h_ieee must be supplied.
515 TFLITE_LOG(FATAL) << "Populating the tensor " << t.name
516 << " of type FLOAT16 is disabled.";
517 #endif // TFLITE_ENABLE_FP16_CPU_BENCHMARKS
518 break;
519 }
520 case kTfLiteFloat64: {
521 return CreateInputTensorData<double>(
522 num_elements, std::uniform_real_distribution<double>(-0.5, 0.5));
523 }
524 case kTfLiteInt64: {
525 int low = has_value_range ? low_range : 0;
526 int high = has_value_range ? high_range : 99;
527 return CreateInputTensorData<int64_t>(
528 num_elements, std::uniform_int_distribution<int64_t>(low, high));
529 }
530 case kTfLiteInt32: {
531 int low = has_value_range ? low_range : 0;
532 int high = has_value_range ? high_range : 99;
533 return CreateInputTensorData<int32_t>(
534 num_elements, std::uniform_int_distribution<int32_t>(low, high));
535 }
536 case kTfLiteUInt32: {
537 int low = has_value_range ? low_range : 0;
538 int high = has_value_range ? high_range : 99;
539 return CreateInputTensorData<uint32_t>(
540 num_elements, std::uniform_int_distribution<uint32_t>(low, high));
541 }
542 case kTfLiteInt16: {
543 int low = has_value_range ? low_range : 0;
544 int high = has_value_range ? high_range : 99;
545 return CreateInputTensorData<int16_t>(
546 num_elements, std::uniform_int_distribution<int16_t>(low, high));
547 }
548 case kTfLiteUInt8: {
549 int low = has_value_range ? low_range : 0;
550 int high = has_value_range ? high_range : 254;
551 // std::uniform_int_distribution is specified not to support char types.
552 return CreateInputTensorData<uint8_t>(
553 num_elements, std::uniform_int_distribution<uint32_t>(low, high));
554 }
555 case kTfLiteInt8: {
556 int low = has_value_range ? low_range : -127;
557 int high = has_value_range ? high_range : 127;
558 // std::uniform_int_distribution is specified not to support char types.
559 return CreateInputTensorData<int8_t>(
560 num_elements, std::uniform_int_distribution<int32_t>(low, high));
561 }
562 case kTfLiteString: {
563 // Don't populate input for string. Instead, return a default-initialized
564 // `InputTensorData` object directly.
565 break;
566 }
567 case kTfLiteBool: {
568 // According to std::uniform_int_distribution specification, non-int type
569 // is not supported.
570 return CreateInputTensorData<bool>(
571 num_elements, std::uniform_int_distribution<uint32_t>(0, 1));
572 }
573 default: {
574 TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t.name
575 << " of type " << t.type;
576 }
577 }
578 return InputTensorData();
579 }
580
PrepareInputData()581 TfLiteStatus BenchmarkTfLiteModel::PrepareInputData() {
582 CleanUp();
583
584 // Note the corresponding relation between 'interpreter_inputs' and 'inputs_'
585 // (i.e. the specified input layer info) has been checked in
586 // BenchmarkTfLiteModel::Init() before calling this function. So, we simply
587 // use the corresponding input layer info to initializethe input data value
588 // properly.
589 auto interpreter_inputs = interpreter_->inputs();
590 for (int i = 0; i < interpreter_inputs.size(); ++i) {
591 int tensor_index = interpreter_inputs[i];
592 const TfLiteTensor& t = *(interpreter_->tensor(tensor_index));
593 const InputLayerInfo* input_layer_info = nullptr;
594 // Note that when input layer parameters (i.e. --input_layer,
595 // --input_layer_shape) are not specified, inputs_ is empty.
596 if (!inputs_.empty()) input_layer_info = &inputs_[i];
597
598 InputTensorData t_data;
599 if (input_layer_info && !input_layer_info->input_file_path.empty()) {
600 t_data = LoadInputTensorData(t, input_layer_info->input_file_path);
601 } else {
602 t_data = CreateRandomTensorData(t, input_layer_info);
603 }
604 inputs_data_.push_back(std::move(t_data));
605 }
606 return kTfLiteOk;
607 }
608
ResetInputsAndOutputs()609 TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
610 auto interpreter_inputs = interpreter_->inputs();
611 // Set the values of the input tensors from inputs_data_.
612 for (int j = 0; j < interpreter_inputs.size(); ++j) {
613 int i = interpreter_inputs[j];
614 TfLiteTensor* t = interpreter_->tensor(i);
615 if (t->type == kTfLiteString) {
616 if (inputs_data_[j].data) {
617 static_cast<DynamicBuffer*>(inputs_data_[j].data.get())
618 ->WriteToTensor(t, /*new_shape=*/nullptr);
619 } else {
620 tflite::DynamicBuffer buffer;
621 FillRandomString(&buffer, t->dims, []() {
622 return "we're have some friends over saturday to hang out in the "
623 "yard";
624 });
625 buffer.WriteToTensor(t, /*new_shape=*/nullptr);
626 }
627 } else {
628 std::memcpy(t->data.raw, inputs_data_[j].data.get(),
629 inputs_data_[j].bytes);
630 }
631 }
632
633 return kTfLiteOk;
634 }
635
InitInterpreter()636 TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() {
637 auto resolver = GetOpResolver();
638 const int32_t num_threads = params_.Get<int32_t>("num_threads");
639 const bool use_caching = params_.Get<bool>("use_caching");
640 tflite::InterpreterBuilder(*model_, *resolver)(&interpreter_, num_threads);
641 if (!interpreter_) {
642 TFLITE_LOG(ERROR) << "Failed to initialize the interpreter";
643 return kTfLiteError;
644 }
645 // Manually enable caching behavior in TF Lite interpreter.
646 if (use_caching) {
647 external_context_.reset(new tflite::ExternalCpuBackendContext());
648 std::unique_ptr<tflite::CpuBackendContext> cpu_backend_context(
649 new tflite::CpuBackendContext());
650 cpu_backend_context->SetUseCaching(true);
651 cpu_backend_context->SetMaxNumThreads(num_threads);
652 external_context_->set_internal_backend_context(
653 std::move(cpu_backend_context));
654 interpreter_->SetExternalContext(kTfLiteCpuBackendContext,
655 external_context_.get());
656 }
657
658 return kTfLiteOk;
659 }
660
Init()661 TfLiteStatus BenchmarkTfLiteModel::Init() {
662 TF_LITE_ENSURE_STATUS(LoadModel());
663 TF_LITE_ENSURE_STATUS(InitInterpreter());
664
665 // Install profilers if necessary right after interpreter is created so that
666 // any memory allocations inside the TFLite runtime could be recorded if the
667 // installed profiler profile memory usage information.
668
669 // Adjust "max_profiling_buffer_entries" according to the loaded model.
670 int total_nodes = 0;
671 for (int i = 0; i < interpreter_->subgraphs_size(); ++i) {
672 // subgraph(...) is non-const member method.
673 total_nodes += static_cast<int>(interpreter_->subgraph(i)->nodes_size());
674 }
675 if (total_nodes > params_.Get<int32_t>("max_profiling_buffer_entries")) {
676 constexpr int kProfilingBufferHeadrooms = 512;
677 params_.Set<int32_t>("max_profiling_buffer_entries",
678 total_nodes + kProfilingBufferHeadrooms);
679 }
680
681 AddOwnedListener(MayCreateProfilingListener());
682 AddOwnedListener(std::unique_ptr<BenchmarkListener>(
683 new InterpreterStatePrinter(interpreter_.get())));
684
685 interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16"));
686
687 owned_delegates_.clear();
688
689 // Contains all ids of TfLiteNodes that have been checked to see whether it's
690 // delegated or not.
691 std::unordered_set<int> checked_node_ids;
692 tools::ProvidedDelegateList delegate_providers(¶ms_);
693 auto created_delegates = delegate_providers.CreateAllRankedDelegates();
694 TFLITE_MAY_LOG(INFO, (created_delegates.size() >= 2))
695 << "Going to apply " << created_delegates.size()
696 << " delegates one after another.";
697 for (auto& created_delegate : created_delegates) {
698 const auto* delegate_provider = created_delegate.provider;
699 tools::TfLiteDelegatePtr delegate = std::move(created_delegate.delegate);
700 TFLITE_TOOLS_CHECK(delegate != nullptr)
701 << "The created delegate by the delegate provider should not be "
702 "nullptr!";
703 if (interpreter_->ModifyGraphWithDelegate(delegate.get()) != kTfLiteOk) {
704 TFLITE_LOG(ERROR) << "Failed to apply " << delegate_provider->GetName()
705 << " delegate.";
706 return kTfLiteError;
707 } else {
708 // Ideally, such delegate info should already be computed when the
709 // delegate is being applied to the model graph.
710 int num_delegated_kernels = 0;
711 for (int i = 0; i < interpreter_->execution_plan().size(); ++i) {
712 int node_id = interpreter_->execution_plan()[i];
713 if (checked_node_ids.find(node_id) != checked_node_ids.end()) {
714 continue;
715 }
716 const TfLiteNode& node =
717 interpreter_->node_and_registration(node_id)->first;
718
719 // Note that the 'delegate' here could be an ExternalDelegateWrapper
720 // object that wraps an actual external delegate, in which case,
721 // 'node.delegate' will be different from 'delegate' because
722 // 'node.delegate' refers to the actual external delegate.
723 if (node.delegate != nullptr) {
724 num_delegated_kernels++;
725 checked_node_ids.insert(node_id);
726 }
727 }
728 bool fully_delegated = (num_delegated_kernels == 1 &&
729 interpreter_->execution_plan().size() == 1);
730
731 if (params_.Get<bool>("require_full_delegation") && !fully_delegated) {
732 TFLITE_LOG(ERROR) << "Disallowed CPU fallback detected.";
733 return kTfLiteError;
734 }
735 if (fully_delegated) {
736 TFLITE_LOG(INFO) << "Explicitly applied "
737 << delegate_provider->GetName()
738 << " delegate, and the model graph will be completely"
739 << " executed by the delegate.";
740 } else if (num_delegated_kernels > 0) {
741 TFLITE_LOG(INFO) << "Explicitly applied "
742 << delegate_provider->GetName()
743 << " delegate, and the model graph will be partially"
744 << " executed by the delegate w/ "
745 << num_delegated_kernels << " delegate kernels.";
746 } else {
747 TFLITE_LOG(INFO)
748 << "Though " << delegate_provider->GetName()
749 << " delegate is explicitly applied, the model graph will not be"
750 << " executed by the delegate.";
751 }
752 }
753 owned_delegates_.emplace_back(std::move(delegate));
754 }
755
756 auto interpreter_inputs = interpreter_->inputs();
757
758 if (!inputs_.empty()) {
759 TFLITE_TOOLS_CHECK_EQ(inputs_.size(), interpreter_inputs.size())
760 << "Inputs mismatch: Model inputs #:" << inputs_.size()
761 << " expected: " << interpreter_inputs.size();
762 }
763
764 // Check if the tensor names match, and log a warning if it doesn't.
765 for (int j = 0; j < inputs_.size(); ++j) {
766 const InputLayerInfo& input = inputs_[j];
767 int i = interpreter_inputs[j];
768 TfLiteTensor* t = interpreter_->tensor(i);
769 if (input.name != t->name) {
770 TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name
771 << " but flags call it " << input.name;
772 }
773
774 if (t->type != kTfLiteString && input.shape.size() != t->dims->size) {
775 TFLITE_LOG(ERROR) << "Input tensor #" << i << " should have "
776 << t->dims->size << " dimensions!";
777 return kTfLiteError;
778 }
779 }
780
781 // Resize all non-string tensors.
782 for (int j = 0; j < inputs_.size(); ++j) {
783 const InputLayerInfo& input = inputs_[j];
784 int i = interpreter_inputs[j];
785 TfLiteTensor* t = interpreter_->tensor(i);
786 if (t->type != kTfLiteString) {
787 interpreter_->ResizeInputTensor(i, input.shape);
788 }
789 }
790
791 if (interpreter_->AllocateTensors() != kTfLiteOk) {
792 TFLITE_LOG(ERROR) << "Failed to allocate tensors!";
793 return kTfLiteError;
794 }
795
796 AddOwnedListener(
797 std::unique_ptr<BenchmarkListener>(new RuyProfileListener()));
798
799 return kTfLiteOk;
800 }
801
LoadModel()802 TfLiteStatus BenchmarkTfLiteModel::LoadModel() {
803 std::string graph = params_.Get<std::string>("graph");
804 model_ = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
805 if (!model_) {
806 TFLITE_LOG(ERROR) << "Failed to mmap model " << graph;
807 return kTfLiteError;
808 }
809 TFLITE_LOG(INFO) << "Loaded model " << graph;
810 return kTfLiteOk;
811 }
812
GetOpResolver() const813 std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver()
814 const {
815 tflite::ops::builtin::BuiltinOpResolver* resolver = nullptr;
816 // When --use_xnnpack is explicitly set to false, skip applying the default
817 // XNNPACK delegate in TfLite runtime so that the original execution path
818 // based on the unmodified model graph is still excercised.
819 if (params_.HasParam("use_xnnpack") &&
820 params_.HasValueSet<bool>("use_xnnpack") &&
821 !params_.Get<bool>("use_xnnpack")) {
822 resolver =
823 new tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates();
824 } else {
825 resolver = new tflite::ops::builtin::BuiltinOpResolver();
826 }
827 RegisterSelectedOps(resolver);
828 return std::unique_ptr<tflite::OpResolver>(resolver);
829 }
830
831 std::unique_ptr<BenchmarkListener>
MayCreateProfilingListener() const832 BenchmarkTfLiteModel::MayCreateProfilingListener() const {
833 if (!params_.Get<bool>("enable_op_profiling")) return nullptr;
834
835 return std::unique_ptr<BenchmarkListener>(new ProfilingListener(
836 interpreter_.get(), params_.Get<int32_t>("max_profiling_buffer_entries"),
837 params_.Get<std::string>("profiling_output_csv_file"),
838 CreateProfileSummaryFormatter(
839 !params_.Get<std::string>("profiling_output_csv_file").empty())));
840 }
841
RunImpl()842 TfLiteStatus BenchmarkTfLiteModel::RunImpl() { return interpreter_->Invoke(); }
843
844 } // namespace benchmark
845 } // namespace tflite
846