• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
17 
18 #include <cstdarg>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <fstream>
22 #include <iostream>
23 #include <memory>
24 #include <random>
25 #include <string>
26 #include <unordered_set>
27 #include <vector>
28 
29 #include "absl/base/attributes.h"
30 #include "absl/strings/numbers.h"
31 #include "absl/strings/str_split.h"
32 #include "ruy/profiler/profiler.h"  // from @ruy
33 #include "tensorflow/lite/c/common.h"
34 #include "tensorflow/lite/core/subgraph.h"
35 #include "tensorflow/lite/kernels/cpu_backend_context.h"
36 #include "tensorflow/lite/kernels/register.h"
37 #include "tensorflow/lite/model.h"
38 #include "tensorflow/lite/op_resolver.h"
39 #include "tensorflow/lite/optional_debug_tools.h"
40 #include "tensorflow/lite/profiling/profile_summary_formatter.h"
41 #include "tensorflow/lite/string_util.h"
42 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
43 #include "tensorflow/lite/tools/benchmark/profiling_listener.h"
44 #include "tensorflow/lite/tools/delegates/delegate_provider.h"
45 #include "tensorflow/lite/tools/logging.h"
46 
47 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
48 
49 // Version with Weak linker attribute doing nothing: if someone links this
50 // library with another definition of this function (presumably to actually
51 // register custom ops), that version will be used instead.
52 void ABSL_ATTRIBUTE_WEAK
RegisterSelectedOps(::tflite::MutableOpResolver * resolver)53 RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {}
54 
55 namespace tflite {
56 namespace benchmark {
57 namespace {
58 
59 // Backward compat with previous approach to enabling op profiling.
60 #if defined(TFLITE_PROFILING_ENABLED)
61 constexpr bool kOpProfilingEnabledDefault = true;
62 #else
63 constexpr bool kOpProfilingEnabledDefault = false;
64 #endif
65 
66 // Dumps ruy profiling events if the ruy profiler is enabled.
67 class RuyProfileListener : public BenchmarkListener {
68  public:
69   void OnBenchmarkStart(const BenchmarkParams& params) override;
70 
71   void OnBenchmarkEnd(const BenchmarkResults& results) override;
72 
73  private:
74   std::unique_ptr<ruy::profiler::ScopeProfile> ruy_profile_;
75 };
76 
OnBenchmarkStart(const BenchmarkParams & params)77 void RuyProfileListener::OnBenchmarkStart(const BenchmarkParams& params) {
78   ruy_profile_.reset(new ruy::profiler::ScopeProfile);
79 }
80 
OnBenchmarkEnd(const BenchmarkResults & results)81 void RuyProfileListener::OnBenchmarkEnd(const BenchmarkResults& results) {
82   ruy_profile_ = nullptr;
83 }
84 
85 class InterpreterStatePrinter : public BenchmarkListener {
86  public:
InterpreterStatePrinter(Interpreter * interpreter)87   explicit InterpreterStatePrinter(Interpreter* interpreter)
88       : interpreter_(interpreter) {}
89 
OnBenchmarkStart(const BenchmarkParams & params)90   void OnBenchmarkStart(const BenchmarkParams& params) override {
91     params_ = &params;
92     if (params_->Get<bool>("print_preinvoke_state")) {
93       TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter pre-invoke "
94                           "state begins====";
95       tflite::PrintInterpreterState(interpreter_);
96       TFLITE_LOG(INFO) << "====Printing out TfLite interpreter pre-invoke "
97                           "state ends====\n";
98     }
99   }
100 
OnBenchmarkEnd(const BenchmarkResults & results)101   void OnBenchmarkEnd(const BenchmarkResults& results) override {
102     if (params_->Get<bool>("print_postinvoke_state")) {
103       TFLITE_LOG(INFO) << "\n====Printing out TfLite interpreter post-invoke "
104                           "state begins====";
105       tflite::PrintInterpreterState(interpreter_);
106       TFLITE_LOG(INFO) << "====Printing out TfLite interpreter post-invoke "
107                           "state ends====\n";
108     }
109   }
110 
111  private:
112   Interpreter* const interpreter_ = nullptr;  // not own the memory.
113   const BenchmarkParams* params_ = nullptr;   // not own the memory.
114 };
115 
Split(const std::string & str,const char delim)116 std::vector<std::string> Split(const std::string& str, const char delim) {
117   if (str.empty()) {
118     return {};
119   }
120   return absl::StrSplit(str, delim);
121 }
122 
GetNumElements(const TfLiteIntArray * dim_array)123 int GetNumElements(const TfLiteIntArray* dim_array) {
124   int num_elements = 1;
125   for (size_t i = 0; i < dim_array->size; i++) {
126     num_elements *= dim_array->data[i];
127   }
128   return num_elements;
129 }
130 
FillRandomString(tflite::DynamicBuffer * buffer,const TfLiteIntArray * dim_array,const std::function<std::string ()> & random_func)131 void FillRandomString(tflite::DynamicBuffer* buffer,
132                       const TfLiteIntArray* dim_array,
133                       const std::function<std::string()>& random_func) {
134   int num_elements = GetNumElements(dim_array);
135   for (int i = 0; i < num_elements; ++i) {
136     auto str = random_func();
137     buffer->AddString(str.data(), str.length());
138   }
139 }
140 
FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info,const std::string & input_name,const string & names_string)141 int FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info,
142                        const std::string& input_name,
143                        const string& names_string) {
144   for (int i = 0; i < info->size(); ++i) {
145     if (info->at(i).name == input_name) {
146       return i;
147     }
148   }
149   TFLITE_LOG(FATAL) << "Cannot find the corresponding input_layer name("
150                     << input_name << ") in --input_layer as " << names_string;
151   return -1;
152 }
153 
PopulateInputValueRanges(const std::string & names_string,const std::string & value_ranges_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)154 TfLiteStatus PopulateInputValueRanges(
155     const std::string& names_string, const std::string& value_ranges_string,
156     std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
157   std::vector<std::string> value_ranges = Split(value_ranges_string, ':');
158   for (const auto& val : value_ranges) {
159     std::vector<std::string> name_range = Split(val, ',');
160     if (name_range.size() != 3) {
161       TFLITE_LOG(ERROR) << "Wrong input value range item specified: " << val;
162       return kTfLiteError;
163     }
164 
165     // Ensure the specific input layer name exists.
166     int layer_info_idx = FindLayerInfoIndex(info, name_range[0], names_string);
167 
168     // Parse the range value.
169     int low, high;
170     bool has_low = absl::SimpleAtoi(name_range[1], &low);
171     bool has_high = absl::SimpleAtoi(name_range[2], &high);
172     if (!has_low || !has_high || low > high) {
173       TFLITE_LOG(ERROR)
174           << "Wrong low and high value of the input value range specified: "
175           << val;
176       return kTfLiteError;
177     }
178     info->at(layer_info_idx).has_value_range = true;
179     info->at(layer_info_idx).low = low;
180     info->at(layer_info_idx).high = high;
181   }
182   return kTfLiteOk;
183 }
184 
PopulateInputValueFiles(const std::string & names_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)185 TfLiteStatus PopulateInputValueFiles(
186     const std::string& names_string, const std::string& value_files_string,
187     std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
188   std::vector<std::string> value_files = Split(value_files_string, ',');
189   for (const auto& val : value_files) {
190     std::vector<std::string> name_file = Split(val, ':');
191     if (name_file.size() != 2) {
192       TFLITE_LOG(ERROR) << "Wrong input value file item specified: " << val;
193       return kTfLiteError;
194     }
195 
196     // Ensure the specific input layer name exists.
197     int layer_info_idx = FindLayerInfoIndex(info, name_file[0], names_string);
198     if (info->at(layer_info_idx).has_value_range) {
199       TFLITE_LOG(WARN)
200           << "The input_name:" << info->at(layer_info_idx).name
201           << " appears both in input_layer_value_files and "
202              "input_layer_value_range. The input_layer_value_range of the "
203              "input_name will be ignored.";
204     }
205     info->at(layer_info_idx).input_file_path = name_file[1];
206   }
207   return kTfLiteOk;
208 }
209 
PopulateInputLayerInfo(const std::string & names_string,const std::string & shapes_string,const std::string & value_ranges_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)210 TfLiteStatus PopulateInputLayerInfo(
211     const std::string& names_string, const std::string& shapes_string,
212     const std::string& value_ranges_string,
213     const std::string& value_files_string,
214     std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
215   info->clear();
216   std::vector<std::string> names = Split(names_string, ',');
217   std::vector<std::string> shapes = Split(shapes_string, ':');
218 
219   if (names.size() != shapes.size()) {
220     TFLITE_LOG(ERROR) << "The number of items in"
221                       << " --input_layer_shape (" << shapes_string << ", with "
222                       << shapes.size() << " items)"
223                       << " must match the number of items in"
224                       << " --input_layer (" << names_string << ", with "
225                       << names.size() << " items)."
226                       << " For example --input_layer=input1,input2"
227                       << " --input_layer_shape=1,224,224,4:1,20";
228     return kTfLiteError;
229   }
230 
231   for (int i = 0; i < names.size(); ++i) {
232     info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
233     BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
234 
235     input.name = names[i];
236 
237     TFLITE_TOOLS_CHECK(util::SplitAndParse(shapes[i], ',', &input.shape))
238         << "Incorrect size string specified: " << shapes[i];
239     for (int dim : input.shape) {
240       if (dim == -1) {
241         TFLITE_LOG(ERROR)
242             << "Any unknown sizes in the shapes (-1's) must be replaced"
243             << " with the size you want to benchmark with.";
244         return kTfLiteError;
245       }
246     }
247   }
248 
249   // Populate input value range if it's specified.
250   TF_LITE_ENSURE_STATUS(
251       PopulateInputValueRanges(names_string, value_ranges_string, info));
252 
253   // Populate input value files if it's specified.
254   TF_LITE_ENSURE_STATUS(
255       PopulateInputValueFiles(names_string, value_files_string, info));
256 
257   return kTfLiteOk;
258 }
259 
260 std::shared_ptr<profiling::ProfileSummaryFormatter>
CreateProfileSummaryFormatter(bool format_as_csv)261 CreateProfileSummaryFormatter(bool format_as_csv) {
262   return format_as_csv
263              ? std::make_shared<profiling::ProfileSummaryCSVFormatter>()
264              : std::make_shared<profiling::ProfileSummaryDefaultFormatter>();
265 }
266 
267 }  // namespace
268 
DefaultParams()269 BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
270   BenchmarkParams default_params = BenchmarkModel::DefaultParams();
271   default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
272   default_params.AddParam("input_layer",
273                           BenchmarkParam::Create<std::string>(""));
274   default_params.AddParam("input_layer_shape",
275                           BenchmarkParam::Create<std::string>(""));
276   default_params.AddParam("input_layer_value_range",
277                           BenchmarkParam::Create<std::string>(""));
278   default_params.AddParam("input_layer_value_files",
279                           BenchmarkParam::Create<std::string>(""));
280   default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
281   default_params.AddParam("require_full_delegation",
282                           BenchmarkParam::Create<bool>(false));
283   default_params.AddParam(
284       "enable_op_profiling",
285       BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
286   default_params.AddParam("max_profiling_buffer_entries",
287                           BenchmarkParam::Create<int32_t>(1024));
288   default_params.AddParam("profiling_output_csv_file",
289                           BenchmarkParam::Create<std::string>(""));
290 
291   default_params.AddParam("print_preinvoke_state",
292                           BenchmarkParam::Create<bool>(false));
293   default_params.AddParam("print_postinvoke_state",
294                           BenchmarkParam::Create<bool>(false));
295 
296   tools::ProvidedDelegateList delegate_providers(&default_params);
297   delegate_providers.AddAllDelegateParams();
298 
299   return default_params;
300 }
301 
BenchmarkTfLiteModel(BenchmarkParams params)302 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
303     : BenchmarkModel(std::move(params)),
304       random_engine_(std::random_device()()) {
305   AddListener(&log_output_);
306 }
307 
CleanUp()308 void BenchmarkTfLiteModel::CleanUp() {
309   // Free up any pre-allocated tensor data during PrepareInputData.
310   inputs_data_.clear();
311 }
312 
~BenchmarkTfLiteModel()313 BenchmarkTfLiteModel::~BenchmarkTfLiteModel() {
314   CleanUp();
315 
316   // Destory the owned interpreter earlier than other objects (specially
317   // 'owned_delegates_').
318   interpreter_.reset();
319 }
320 
GetFlags()321 std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
322   std::vector<Flag> flags = BenchmarkModel::GetFlags();
323   std::vector<Flag> specific_flags = {
324       CreateFlag<std::string>("graph", &params_, "graph file name"),
325       CreateFlag<std::string>("input_layer", &params_, "input layer names"),
326       CreateFlag<std::string>("input_layer_shape", &params_,
327                               "input layer shape"),
328       CreateFlag<std::string>(
329           "input_layer_value_range", &params_,
330           "A map-like string representing value range for *integer* input "
331           "layers. Each item is separated by ':', and the item value consists "
332           "of input layer name and integer-only range values (both low and "
333           "high are inclusive) separated by ',', e.g. input1,1,2:input2,0,254"),
334       CreateFlag<std::string>(
335           "input_layer_value_files", &params_,
336           "A map-like string representing value file. Each item is separated "
337           "by ',', and the item value consists "
338           "of input layer name and value file path separated by ':', e.g. "
339           "input1:file_path1,input2:file_path2. If the input_name appears both "
340           "in input_layer_value_range and input_layer_value_files, "
341           "input_layer_value_range of the input_name will be ignored. The file "
342           "format is binary and it should be array format or null separated "
343           "strings format."),
344       CreateFlag<bool>("allow_fp16", &params_, "allow fp16"),
345       CreateFlag<bool>("require_full_delegation", &params_,
346                        "require delegate to run the entire graph"),
347       CreateFlag<bool>("enable_op_profiling", &params_, "enable op profiling"),
348       CreateFlag<int32_t>("max_profiling_buffer_entries", &params_,
349                           "max profiling buffer entries"),
350       CreateFlag<std::string>(
351           "profiling_output_csv_file", &params_,
352           "File path to export profile data as CSV, if not set "
353           "prints to stdout."),
354       CreateFlag<bool>(
355           "print_preinvoke_state", &params_,
356           "print out the interpreter internals just before calling Invoke. The "
357           "internals will include allocated memory size of each tensor etc."),
358       CreateFlag<bool>(
359           "print_postinvoke_state", &params_,
360           "print out the interpreter internals just before benchmark completes "
361           "(i.e. after all repeated Invoke calls complete). The internals will "
362           "include allocated memory size of each tensor etc.")};
363 
364   flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
365 
366   tools::ProvidedDelegateList delegate_providers(&params_);
367   delegate_providers.AppendCmdlineFlags(&flags);
368 
369   return flags;
370 }
371 
LogParams()372 void BenchmarkTfLiteModel::LogParams() {
373   BenchmarkModel::LogParams();
374   const bool verbose = params_.Get<bool>("verbose");
375   // Always log the value of --graph.
376   LOG_BENCHMARK_PARAM(std::string, "graph", "Graph", /*verbose*/ true);
377   LOG_BENCHMARK_PARAM(std::string, "input_layer", "Input layers", verbose);
378   LOG_BENCHMARK_PARAM(std::string, "input_layer_shape", "Input shapes",
379                       verbose);
380   LOG_BENCHMARK_PARAM(std::string, "input_layer_value_range",
381                       "Input value ranges", verbose);
382   LOG_BENCHMARK_PARAM(std::string, "input_layer_value_files",
383                       "Input value files", verbose);
384 
385   LOG_BENCHMARK_PARAM(bool, "allow_fp16", "Allow fp16", verbose);
386   LOG_BENCHMARK_PARAM(bool, "require_full_delegation",
387                       "Require full delegation", verbose);
388   LOG_BENCHMARK_PARAM(bool, "enable_op_profiling", "Enable op profiling",
389                       verbose);
390   LOG_BENCHMARK_PARAM(int32_t, "max_profiling_buffer_entries",
391                       "Max profiling buffer entries", verbose);
392   LOG_BENCHMARK_PARAM(std::string, "profiling_output_csv_file",
393                       "CSV File to export profiling data to", verbose);
394   LOG_BENCHMARK_PARAM(bool, "print_preinvoke_state",
395                       "Print pre-invoke interpreter state", verbose);
396   LOG_BENCHMARK_PARAM(bool, "print_postinvoke_state",
397                       "Print post-invoke interpreter state", verbose);
398 
399   for (const auto& delegate_provider :
400        tools::GetRegisteredDelegateProviders()) {
401     delegate_provider->LogParams(params_, verbose);
402   }
403 }
404 
ValidateParams()405 TfLiteStatus BenchmarkTfLiteModel::ValidateParams() {
406   TF_LITE_ENSURE_STATUS(BenchmarkModel::ValidateParams());
407 
408   if (params_.Get<std::string>("graph").empty()) {
409     TFLITE_LOG(ERROR)
410         << "Please specify the name of your TF Lite input file with --graph";
411     return kTfLiteError;
412   }
413 
414   return PopulateInputLayerInfo(
415       params_.Get<std::string>("input_layer"),
416       params_.Get<std::string>("input_layer_shape"),
417       params_.Get<std::string>("input_layer_value_range"),
418       params_.Get<std::string>("input_layer_value_files"), &inputs_);
419 }
420 
ComputeInputBytes()421 uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
422   TFLITE_TOOLS_CHECK(interpreter_);
423   uint64_t total_input_bytes = 0;
424   for (int input : interpreter_->inputs()) {
425     auto* t = interpreter_->tensor(input);
426     total_input_bytes += t->bytes;
427   }
428   return total_input_bytes;
429 }
430 
MayGetModelFileSize()431 int64_t BenchmarkTfLiteModel::MayGetModelFileSize() {
432   std::ifstream in_file(params_.Get<std::string>("graph"),
433                         std::ios::binary | std::ios::ate);
434   return in_file.tellg();
435 }
436 
LoadInputTensorData(const TfLiteTensor & t,const std::string & input_file_path)437 BenchmarkTfLiteModel::InputTensorData BenchmarkTfLiteModel::LoadInputTensorData(
438     const TfLiteTensor& t, const std::string& input_file_path) {
439   std::ifstream value_file(input_file_path, std::ios::binary);
440   if (!value_file.good()) {
441     TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
442                       << input_file_path;
443   }
444   InputTensorData t_data;
445   if (t.type == kTfLiteString) {
446     t_data.data = VoidUniquePtr(
447         static_cast<void*>(new tflite::DynamicBuffer()),
448         [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); });
449     std::string line;
450     size_t num_line = 0;
451     // Read the line with the delimiter '\0'.
452     while (std::getline(value_file, line, '\0')) {
453       num_line++;
454       static_cast<DynamicBuffer*>(t_data.data.get())
455           ->AddString(line.data(), line.length());
456     }
457     int num_elements = GetNumElements(t.dims);
458     if (num_line != num_elements) {
459       TFLITE_LOG(FATAL) << "The number of string in the input_layer_value_file("
460                         << input_file_path << ") is " << num_line
461                         << ". It should be " << num_elements << ".";
462     }
463   } else {
464     value_file.seekg(0, std::ios_base::end);
465     if (value_file.tellg() != t.bytes) {
466       TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is "
467                         << value_file.tellg() << " bytes. It should be "
468                         << t.bytes << " bytes.";
469     }
470     t_data.bytes = t.bytes;
471     t_data.data =
472         VoidUniquePtr(static_cast<void*>(new char[t.bytes]),
473                       [](void* ptr) { delete[] static_cast<char*>(ptr); });
474     value_file.clear();
475     value_file.seekg(0, std::ios_base::beg);
476     value_file.read(static_cast<char*>(t_data.data.get()), t.bytes);
477   }
478   return t_data;
479 }
480 
481 BenchmarkTfLiteModel::InputTensorData
CreateRandomTensorData(const TfLiteTensor & t,const InputLayerInfo * layer_info)482 BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t,
483                                              const InputLayerInfo* layer_info) {
484   bool has_value_range = false;
485   int low_range = 0;
486   int high_range = 0;
487   if (layer_info) {
488     has_value_range = layer_info->has_value_range;
489     low_range = layer_info->low;
490     high_range = layer_info->high;
491   }
492   int num_elements = GetNumElements(t.dims);
493   switch (t.type) {
494     case kTfLiteFloat32: {
495       return CreateInputTensorData<float>(
496           num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f));
497     }
498     case kTfLiteFloat16: {
499       // TODO(b/138843274): Remove this preprocessor guard when bug is fixed.
500 #if TFLITE_ENABLE_FP16_CPU_BENCHMARKS
501 #if __GNUC__ && \
502     (__clang__ || __ARM_FP16_FORMAT_IEEE || __ARM_FP16_FORMAT_ALTERNATIVE)
503       // __fp16 is available on Clang or when __ARM_FP16_FORMAT_* is defined.
504       return CreateInputTensorData<__fp16>(
505           num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f));
506 #else
507       TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
508                         << " of type FLOAT16 on this platform.";
509 #endif
510 #else
511       // You need to build with -DTFLITE_ENABLE_FP16_CPU_BENCHMARKS=1 using a
512       // compiler that supports __fp16 type. Note: when using Clang and *not*
513       // linking with compiler-rt, a definition of __gnu_h2f_ieee and
514       // __gnu_f2h_ieee must be supplied.
515       TFLITE_LOG(FATAL) << "Populating the tensor " << t.name
516                         << " of type FLOAT16 is disabled.";
517 #endif  // TFLITE_ENABLE_FP16_CPU_BENCHMARKS
518       break;
519     }
520     case kTfLiteFloat64: {
521       return CreateInputTensorData<double>(
522           num_elements, std::uniform_real_distribution<double>(-0.5, 0.5));
523     }
524     case kTfLiteInt64: {
525       int low = has_value_range ? low_range : 0;
526       int high = has_value_range ? high_range : 99;
527       return CreateInputTensorData<int64_t>(
528           num_elements, std::uniform_int_distribution<int64_t>(low, high));
529     }
530     case kTfLiteInt32: {
531       int low = has_value_range ? low_range : 0;
532       int high = has_value_range ? high_range : 99;
533       return CreateInputTensorData<int32_t>(
534           num_elements, std::uniform_int_distribution<int32_t>(low, high));
535     }
536     case kTfLiteUInt32: {
537       int low = has_value_range ? low_range : 0;
538       int high = has_value_range ? high_range : 99;
539       return CreateInputTensorData<uint32_t>(
540           num_elements, std::uniform_int_distribution<uint32_t>(low, high));
541     }
542     case kTfLiteInt16: {
543       int low = has_value_range ? low_range : 0;
544       int high = has_value_range ? high_range : 99;
545       return CreateInputTensorData<int16_t>(
546           num_elements, std::uniform_int_distribution<int16_t>(low, high));
547     }
548     case kTfLiteUInt8: {
549       int low = has_value_range ? low_range : 0;
550       int high = has_value_range ? high_range : 254;
551       // std::uniform_int_distribution is specified not to support char types.
552       return CreateInputTensorData<uint8_t>(
553           num_elements, std::uniform_int_distribution<uint32_t>(low, high));
554     }
555     case kTfLiteInt8: {
556       int low = has_value_range ? low_range : -127;
557       int high = has_value_range ? high_range : 127;
558       // std::uniform_int_distribution is specified not to support char types.
559       return CreateInputTensorData<int8_t>(
560           num_elements, std::uniform_int_distribution<int32_t>(low, high));
561     }
562     case kTfLiteString: {
563       // Don't populate input for string. Instead, return a default-initialized
564       // `InputTensorData` object directly.
565       break;
566     }
567     case kTfLiteBool: {
568       // According to std::uniform_int_distribution specification, non-int type
569       // is not supported.
570       return CreateInputTensorData<bool>(
571           num_elements, std::uniform_int_distribution<uint32_t>(0, 1));
572     }
573     default: {
574       TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t.name
575                         << " of type " << t.type;
576     }
577   }
578   return InputTensorData();
579 }
580 
PrepareInputData()581 TfLiteStatus BenchmarkTfLiteModel::PrepareInputData() {
582   CleanUp();
583 
584   // Note the corresponding relation between 'interpreter_inputs' and 'inputs_'
585   // (i.e. the specified input layer info) has been checked in
586   // BenchmarkTfLiteModel::Init() before calling this function. So, we simply
587   // use the corresponding input layer info to initializethe input data value
588   // properly.
589   auto interpreter_inputs = interpreter_->inputs();
590   for (int i = 0; i < interpreter_inputs.size(); ++i) {
591     int tensor_index = interpreter_inputs[i];
592     const TfLiteTensor& t = *(interpreter_->tensor(tensor_index));
593     const InputLayerInfo* input_layer_info = nullptr;
594     // Note that when input layer parameters (i.e. --input_layer,
595     // --input_layer_shape) are not specified, inputs_ is empty.
596     if (!inputs_.empty()) input_layer_info = &inputs_[i];
597 
598     InputTensorData t_data;
599     if (input_layer_info && !input_layer_info->input_file_path.empty()) {
600       t_data = LoadInputTensorData(t, input_layer_info->input_file_path);
601     } else {
602       t_data = CreateRandomTensorData(t, input_layer_info);
603     }
604     inputs_data_.push_back(std::move(t_data));
605   }
606   return kTfLiteOk;
607 }
608 
ResetInputsAndOutputs()609 TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
610   auto interpreter_inputs = interpreter_->inputs();
611   // Set the values of the input tensors from inputs_data_.
612   for (int j = 0; j < interpreter_inputs.size(); ++j) {
613     int i = interpreter_inputs[j];
614     TfLiteTensor* t = interpreter_->tensor(i);
615     if (t->type == kTfLiteString) {
616       if (inputs_data_[j].data) {
617         static_cast<DynamicBuffer*>(inputs_data_[j].data.get())
618             ->WriteToTensor(t, /*new_shape=*/nullptr);
619       } else {
620         tflite::DynamicBuffer buffer;
621         FillRandomString(&buffer, t->dims, []() {
622           return "we're have some friends over saturday to hang out in the "
623                  "yard";
624         });
625         buffer.WriteToTensor(t, /*new_shape=*/nullptr);
626       }
627     } else {
628       std::memcpy(t->data.raw, inputs_data_[j].data.get(),
629                   inputs_data_[j].bytes);
630     }
631   }
632 
633   return kTfLiteOk;
634 }
635 
InitInterpreter()636 TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() {
637   auto resolver = GetOpResolver();
638   const int32_t num_threads = params_.Get<int32_t>("num_threads");
639   const bool use_caching = params_.Get<bool>("use_caching");
640   tflite::InterpreterBuilder(*model_, *resolver)(&interpreter_, num_threads);
641   if (!interpreter_) {
642     TFLITE_LOG(ERROR) << "Failed to initialize the interpreter";
643     return kTfLiteError;
644   }
645   // Manually enable caching behavior in TF Lite interpreter.
646   if (use_caching) {
647     external_context_.reset(new tflite::ExternalCpuBackendContext());
648     std::unique_ptr<tflite::CpuBackendContext> cpu_backend_context(
649         new tflite::CpuBackendContext());
650     cpu_backend_context->SetUseCaching(true);
651     cpu_backend_context->SetMaxNumThreads(num_threads);
652     external_context_->set_internal_backend_context(
653         std::move(cpu_backend_context));
654     interpreter_->SetExternalContext(kTfLiteCpuBackendContext,
655                                      external_context_.get());
656   }
657 
658   return kTfLiteOk;
659 }
660 
Init()661 TfLiteStatus BenchmarkTfLiteModel::Init() {
662   TF_LITE_ENSURE_STATUS(LoadModel());
663   TF_LITE_ENSURE_STATUS(InitInterpreter());
664 
665   // Install profilers if necessary right after interpreter is created so that
666   // any memory allocations inside the TFLite runtime could be recorded if the
667   // installed profiler profile memory usage information.
668 
669   // Adjust "max_profiling_buffer_entries" according to the loaded model.
670   int total_nodes = 0;
671   for (int i = 0; i < interpreter_->subgraphs_size(); ++i) {
672     // subgraph(...) is non-const member method.
673     total_nodes += static_cast<int>(interpreter_->subgraph(i)->nodes_size());
674   }
675   if (total_nodes > params_.Get<int32_t>("max_profiling_buffer_entries")) {
676     constexpr int kProfilingBufferHeadrooms = 512;
677     params_.Set<int32_t>("max_profiling_buffer_entries",
678                          total_nodes + kProfilingBufferHeadrooms);
679   }
680 
681   AddOwnedListener(MayCreateProfilingListener());
682   AddOwnedListener(std::unique_ptr<BenchmarkListener>(
683       new InterpreterStatePrinter(interpreter_.get())));
684 
685   interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16"));
686 
687   owned_delegates_.clear();
688 
689   // Contains all ids of TfLiteNodes that have been checked to see whether it's
690   // delegated or not.
691   std::unordered_set<int> checked_node_ids;
692   tools::ProvidedDelegateList delegate_providers(&params_);
693   auto created_delegates = delegate_providers.CreateAllRankedDelegates();
694   TFLITE_MAY_LOG(INFO, (created_delegates.size() >= 2))
695       << "Going to apply " << created_delegates.size()
696       << " delegates one after another.";
697   for (auto& created_delegate : created_delegates) {
698     const auto* delegate_provider = created_delegate.provider;
699     tools::TfLiteDelegatePtr delegate = std::move(created_delegate.delegate);
700     TFLITE_TOOLS_CHECK(delegate != nullptr)
701         << "The created delegate by the delegate provider should not be "
702            "nullptr!";
703     if (interpreter_->ModifyGraphWithDelegate(delegate.get()) != kTfLiteOk) {
704       TFLITE_LOG(ERROR) << "Failed to apply " << delegate_provider->GetName()
705                         << " delegate.";
706       return kTfLiteError;
707     } else {
708       // Ideally, such delegate info should already be computed when the
709       // delegate is being applied to the model graph.
710       int num_delegated_kernels = 0;
711       for (int i = 0; i < interpreter_->execution_plan().size(); ++i) {
712         int node_id = interpreter_->execution_plan()[i];
713         if (checked_node_ids.find(node_id) != checked_node_ids.end()) {
714           continue;
715         }
716         const TfLiteNode& node =
717             interpreter_->node_and_registration(node_id)->first;
718 
719         // Note that the 'delegate' here could be an ExternalDelegateWrapper
720         // object that wraps an actual external delegate, in which case,
721         // 'node.delegate' will be different from 'delegate' because
722         // 'node.delegate' refers to the actual external delegate.
723         if (node.delegate != nullptr) {
724           num_delegated_kernels++;
725           checked_node_ids.insert(node_id);
726         }
727       }
728       bool fully_delegated = (num_delegated_kernels == 1 &&
729                               interpreter_->execution_plan().size() == 1);
730 
731       if (params_.Get<bool>("require_full_delegation") && !fully_delegated) {
732         TFLITE_LOG(ERROR) << "Disallowed CPU fallback detected.";
733         return kTfLiteError;
734       }
735       if (fully_delegated) {
736         TFLITE_LOG(INFO) << "Explicitly applied "
737                          << delegate_provider->GetName()
738                          << " delegate, and the model graph will be completely"
739                          << " executed by the delegate.";
740       } else if (num_delegated_kernels > 0) {
741         TFLITE_LOG(INFO) << "Explicitly applied "
742                          << delegate_provider->GetName()
743                          << " delegate, and the model graph will be partially"
744                          << " executed by the delegate w/ "
745                          << num_delegated_kernels << " delegate kernels.";
746       } else {
747         TFLITE_LOG(INFO)
748             << "Though " << delegate_provider->GetName()
749             << " delegate is explicitly applied, the model graph will not be"
750             << " executed by the delegate.";
751       }
752     }
753     owned_delegates_.emplace_back(std::move(delegate));
754   }
755 
756   auto interpreter_inputs = interpreter_->inputs();
757 
758   if (!inputs_.empty()) {
759     TFLITE_TOOLS_CHECK_EQ(inputs_.size(), interpreter_inputs.size())
760         << "Inputs mismatch: Model inputs #:" << inputs_.size()
761         << " expected: " << interpreter_inputs.size();
762   }
763 
764   // Check if the tensor names match, and log a warning if it doesn't.
765   for (int j = 0; j < inputs_.size(); ++j) {
766     const InputLayerInfo& input = inputs_[j];
767     int i = interpreter_inputs[j];
768     TfLiteTensor* t = interpreter_->tensor(i);
769     if (input.name != t->name) {
770       TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name
771                        << " but flags call it " << input.name;
772     }
773 
774     if (t->type != kTfLiteString && input.shape.size() != t->dims->size) {
775       TFLITE_LOG(ERROR) << "Input tensor #" << i << " should have "
776                         << t->dims->size << " dimensions!";
777       return kTfLiteError;
778     }
779   }
780 
781   // Resize all non-string tensors.
782   for (int j = 0; j < inputs_.size(); ++j) {
783     const InputLayerInfo& input = inputs_[j];
784     int i = interpreter_inputs[j];
785     TfLiteTensor* t = interpreter_->tensor(i);
786     if (t->type != kTfLiteString) {
787       interpreter_->ResizeInputTensor(i, input.shape);
788     }
789   }
790 
791   if (interpreter_->AllocateTensors() != kTfLiteOk) {
792     TFLITE_LOG(ERROR) << "Failed to allocate tensors!";
793     return kTfLiteError;
794   }
795 
796   AddOwnedListener(
797       std::unique_ptr<BenchmarkListener>(new RuyProfileListener()));
798 
799   return kTfLiteOk;
800 }
801 
LoadModel()802 TfLiteStatus BenchmarkTfLiteModel::LoadModel() {
803   std::string graph = params_.Get<std::string>("graph");
804   model_ = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
805   if (!model_) {
806     TFLITE_LOG(ERROR) << "Failed to mmap model " << graph;
807     return kTfLiteError;
808   }
809   TFLITE_LOG(INFO) << "Loaded model " << graph;
810   return kTfLiteOk;
811 }
812 
GetOpResolver() const813 std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver()
814     const {
815   tflite::ops::builtin::BuiltinOpResolver* resolver = nullptr;
816   // When --use_xnnpack is explicitly set to false, skip applying the default
817   // XNNPACK delegate in TfLite runtime so that the original execution path
818   // based on the unmodified model graph is still excercised.
819   if (params_.HasParam("use_xnnpack") &&
820       params_.HasValueSet<bool>("use_xnnpack") &&
821       !params_.Get<bool>("use_xnnpack")) {
822     resolver =
823         new tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates();
824   } else {
825     resolver = new tflite::ops::builtin::BuiltinOpResolver();
826   }
827   RegisterSelectedOps(resolver);
828   return std::unique_ptr<tflite::OpResolver>(resolver);
829 }
830 
831 std::unique_ptr<BenchmarkListener>
MayCreateProfilingListener() const832 BenchmarkTfLiteModel::MayCreateProfilingListener() const {
833   if (!params_.Get<bool>("enable_op_profiling")) return nullptr;
834 
835   return std::unique_ptr<BenchmarkListener>(new ProfilingListener(
836       interpreter_.get(), params_.Get<int32_t>("max_profiling_buffer_entries"),
837       params_.Get<std::string>("profiling_output_csv_file"),
838       CreateProfileSummaryFormatter(
839           !params_.Get<std::string>("profiling_output_csv_file").empty())));
840 }
841 
RunImpl()842 TfLiteStatus BenchmarkTfLiteModel::RunImpl() { return interpreter_->Invoke(); }
843 
844 }  // namespace benchmark
845 }  // namespace tflite
846