1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
17
18 #include <cstdarg>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <fstream>
22 #include <iostream>
23 #include <memory>
24 #include <random>
25 #include <string>
26 #include <unordered_set>
27 #include <vector>
28
29 #include "absl/base/attributes.h"
30 #include "absl/strings/numbers.h"
31 #include "ruy/profiler/profiler.h" // from @ruy
32 #include "tensorflow/lite/c/common.h"
33 #include "tensorflow/lite/kernels/cpu_backend_context.h"
34 #include "tensorflow/lite/kernels/register.h"
35 #include "tensorflow/lite/model.h"
36 #include "tensorflow/lite/op_resolver.h"
37 #include "tensorflow/lite/profiling/profile_summary_formatter.h"
38 #include "tensorflow/lite/string_util.h"
39 #include "tensorflow/lite/tools/benchmark/benchmark_utils.h"
40 #include "tensorflow/lite/tools/benchmark/profiling_listener.h"
41 #include "tensorflow/lite/tools/delegates/delegate_provider.h"
42 #include "tensorflow/lite/tools/logging.h"
43
44 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
45
46 // Version with Weak linker attribute doing nothing: if someone links this
47 // library with another definition of this function (presumably to actually
48 // register custom ops), that version will be used instead.
49 void ABSL_ATTRIBUTE_WEAK
RegisterSelectedOps(::tflite::MutableOpResolver * resolver)50 RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {}
51
52 namespace tflite {
53 namespace benchmark {
54 namespace {
55
56 // Backward compat with previous approach to enabling op profiling.
57 #if defined(TFLITE_PROFILING_ENABLED)
58 constexpr int kOpProfilingEnabledDefault = true;
59 #else
60 constexpr int kOpProfilingEnabledDefault = false;
61 #endif
62
63 // Dumps ruy profiling events if the ruy profiler is enabled.
64 class RuyProfileListener : public BenchmarkListener {
65 public:
66 void OnBenchmarkStart(const BenchmarkParams& params) override;
67
68 void OnBenchmarkEnd(const BenchmarkResults& results) override;
69
70 private:
71 std::unique_ptr<ruy::profiler::ScopeProfile> ruy_profile_;
72 };
73
OnBenchmarkStart(const BenchmarkParams & params)74 void RuyProfileListener::OnBenchmarkStart(const BenchmarkParams& params) {
75 ruy_profile_.reset(new ruy::profiler::ScopeProfile);
76 }
77
OnBenchmarkEnd(const BenchmarkResults & results)78 void RuyProfileListener::OnBenchmarkEnd(const BenchmarkResults& results) {
79 ruy_profile_ = nullptr;
80 }
81
Split(const std::string & str,const char delim)82 std::vector<std::string> Split(const std::string& str, const char delim) {
83 std::vector<std::string> results;
84 if (!util::SplitAndParse(str, delim, &results)) {
85 results.clear();
86 }
87 return results;
88 }
89
GetNumElements(const TfLiteIntArray * dim_array)90 int GetNumElements(const TfLiteIntArray* dim_array) {
91 int num_elements = 1;
92 for (size_t i = 0; i < dim_array->size; i++) {
93 num_elements *= dim_array->data[i];
94 }
95 return num_elements;
96 }
97
FillRandomString(tflite::DynamicBuffer * buffer,const TfLiteIntArray * dim_array,const std::function<std::string ()> & random_func)98 void FillRandomString(tflite::DynamicBuffer* buffer,
99 const TfLiteIntArray* dim_array,
100 const std::function<std::string()>& random_func) {
101 int num_elements = GetNumElements(dim_array);
102 for (int i = 0; i < num_elements; ++i) {
103 auto str = random_func();
104 buffer->AddString(str.data(), str.length());
105 }
106 }
107
FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info,const std::string & input_name,const string & names_string)108 int FindLayerInfoIndex(std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info,
109 const std::string& input_name,
110 const string& names_string) {
111 for (int i = 0; i < info->size(); ++i) {
112 if (info->at(i).name == input_name) {
113 return i;
114 }
115 }
116 TFLITE_LOG(FATAL) << "Cannot find the corresponding input_layer name("
117 << input_name << ") in --input_layer as " << names_string;
118 return -1;
119 }
120
PopulateInputValueRanges(const std::string & names_string,const std::string & value_ranges_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)121 TfLiteStatus PopulateInputValueRanges(
122 const std::string& names_string, const std::string& value_ranges_string,
123 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
124 std::vector<std::string> value_ranges = Split(value_ranges_string, ':');
125 for (const auto& val : value_ranges) {
126 std::vector<std::string> name_range = Split(val, ',');
127 if (name_range.size() != 3) {
128 TFLITE_LOG(ERROR) << "Wrong input value range item specified: " << val;
129 return kTfLiteError;
130 }
131
132 // Ensure the specific input layer name exists.
133 int layer_info_idx = FindLayerInfoIndex(info, name_range[0], names_string);
134
135 // Parse the range value.
136 int low, high;
137 bool has_low = absl::SimpleAtoi(name_range[1], &low);
138 bool has_high = absl::SimpleAtoi(name_range[2], &high);
139 if (!has_low || !has_high || low > high) {
140 TFLITE_LOG(ERROR)
141 << "Wrong low and high value of the input value range specified: "
142 << val;
143 return kTfLiteError;
144 }
145 info->at(layer_info_idx).has_value_range = true;
146 info->at(layer_info_idx).low = low;
147 info->at(layer_info_idx).high = high;
148 }
149 return kTfLiteOk;
150 }
151
PopulateInputValueFiles(const std::string & names_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)152 TfLiteStatus PopulateInputValueFiles(
153 const std::string& names_string, const std::string& value_files_string,
154 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
155 std::vector<std::string> value_files = Split(value_files_string, ',');
156 for (const auto& val : value_files) {
157 std::vector<std::string> name_file = Split(val, ':');
158 if (name_file.size() != 2) {
159 TFLITE_LOG(ERROR) << "Wrong input value file item specified: " << val;
160 return kTfLiteError;
161 }
162
163 // Ensure the specific input layer name exists.
164 int layer_info_idx = FindLayerInfoIndex(info, name_file[0], names_string);
165 if (info->at(layer_info_idx).has_value_range) {
166 TFLITE_LOG(WARN)
167 << "The input_name:" << info->at(layer_info_idx).name
168 << " appears both in input_layer_value_files and "
169 "input_layer_value_range. The input_layer_value_range of the "
170 "input_name will be ignored.";
171 }
172 info->at(layer_info_idx).input_file_path = name_file[1];
173 }
174 return kTfLiteOk;
175 }
176
PopulateInputLayerInfo(const std::string & names_string,const std::string & shapes_string,const std::string & value_ranges_string,const std::string & value_files_string,std::vector<BenchmarkTfLiteModel::InputLayerInfo> * info)177 TfLiteStatus PopulateInputLayerInfo(
178 const std::string& names_string, const std::string& shapes_string,
179 const std::string& value_ranges_string,
180 const std::string& value_files_string,
181 std::vector<BenchmarkTfLiteModel::InputLayerInfo>* info) {
182 info->clear();
183 std::vector<std::string> names = Split(names_string, ',');
184 std::vector<std::string> shapes = Split(shapes_string, ':');
185
186 if (names.size() != shapes.size()) {
187 TFLITE_LOG(ERROR) << "The number of items in"
188 << " --input_layer_shape (" << shapes_string << ", with "
189 << shapes.size() << " items)"
190 << " must match the number of items in"
191 << " --input_layer (" << names_string << ", with "
192 << names.size() << " items)."
193 << " For example --input_layer=input1,input2"
194 << " --input_layer_shape=1,224,224,4:1,20";
195 return kTfLiteError;
196 }
197
198 for (int i = 0; i < names.size(); ++i) {
199 info->push_back(BenchmarkTfLiteModel::InputLayerInfo());
200 BenchmarkTfLiteModel::InputLayerInfo& input = info->back();
201
202 input.name = names[i];
203
204 TFLITE_TOOLS_CHECK(util::SplitAndParse(shapes[i], ',', &input.shape))
205 << "Incorrect size string specified: " << shapes[i];
206 for (int dim : input.shape) {
207 if (dim == -1) {
208 TFLITE_LOG(ERROR)
209 << "Any unknown sizes in the shapes (-1's) must be replaced"
210 << " with the size you want to benchmark with.";
211 return kTfLiteError;
212 }
213 }
214 }
215
216 // Populate input value range if it's specified.
217 TF_LITE_ENSURE_STATUS(
218 PopulateInputValueRanges(names_string, value_ranges_string, info));
219
220 // Populate input value files if it's specified.
221 TF_LITE_ENSURE_STATUS(
222 PopulateInputValueFiles(names_string, value_files_string, info));
223
224 return kTfLiteOk;
225 }
226
227 std::shared_ptr<profiling::ProfileSummaryFormatter>
CreateProfileSummaryFormatter(bool format_as_csv)228 CreateProfileSummaryFormatter(bool format_as_csv) {
229 return format_as_csv
230 ? std::make_shared<profiling::ProfileSummaryCSVFormatter>()
231 : std::make_shared<profiling::ProfileSummaryDefaultFormatter>();
232 }
233
234 } // namespace
235
DefaultParams()236 BenchmarkParams BenchmarkTfLiteModel::DefaultParams() {
237 BenchmarkParams default_params = BenchmarkModel::DefaultParams();
238 default_params.AddParam("graph", BenchmarkParam::Create<std::string>(""));
239 default_params.AddParam("input_layer",
240 BenchmarkParam::Create<std::string>(""));
241 default_params.AddParam("input_layer_shape",
242 BenchmarkParam::Create<std::string>(""));
243 default_params.AddParam("input_layer_value_range",
244 BenchmarkParam::Create<std::string>(""));
245 default_params.AddParam("input_layer_value_files",
246 BenchmarkParam::Create<std::string>(""));
247 default_params.AddParam("allow_fp16", BenchmarkParam::Create<bool>(false));
248 default_params.AddParam("require_full_delegation",
249 BenchmarkParam::Create<bool>(false));
250 default_params.AddParam(
251 "enable_op_profiling",
252 BenchmarkParam::Create<bool>(kOpProfilingEnabledDefault));
253 default_params.AddParam("max_profiling_buffer_entries",
254 BenchmarkParam::Create<int32_t>(1024));
255 default_params.AddParam("profiling_output_csv_file",
256 BenchmarkParam::Create<std::string>(""));
257
258 for (const auto& delegate_provider :
259 tools::GetRegisteredDelegateProviders()) {
260 default_params.Merge(delegate_provider->DefaultParams());
261 }
262
263 return default_params;
264 }
265
BenchmarkTfLiteModel(BenchmarkParams params)266 BenchmarkTfLiteModel::BenchmarkTfLiteModel(BenchmarkParams params)
267 : BenchmarkModel(std::move(params)),
268 random_engine_(std::random_device()()) {
269 AddListener(&log_output_);
270 }
271
CleanUp()272 void BenchmarkTfLiteModel::CleanUp() {
273 // Free up any pre-allocated tensor data during PrepareInputData.
274 inputs_data_.clear();
275 }
276
~BenchmarkTfLiteModel()277 BenchmarkTfLiteModel::~BenchmarkTfLiteModel() {
278 CleanUp();
279
280 // Destory the owned interpreter earlier than other objects (specially
281 // 'owned_delegates_').
282 interpreter_.reset();
283 }
284
GetFlags()285 std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
286 std::vector<Flag> flags = BenchmarkModel::GetFlags();
287 std::vector<Flag> specific_flags = {
288 CreateFlag<std::string>("graph", ¶ms_, "graph file name"),
289 CreateFlag<std::string>("input_layer", ¶ms_, "input layer names"),
290 CreateFlag<std::string>("input_layer_shape", ¶ms_,
291 "input layer shape"),
292 CreateFlag<std::string>(
293 "input_layer_value_range", ¶ms_,
294 "A map-like string representing value range for *integer* input "
295 "layers. Each item is separated by ':', and the item value consists "
296 "of input layer name and integer-only range values (both low and "
297 "high are inclusive) separated by ',', e.g. input1,1,2:input2,0,254"),
298 CreateFlag<std::string>(
299 "input_layer_value_files", ¶ms_,
300 "A map-like string representing value file. Each item is separated "
301 "by ',', and the item value consists "
302 "of input layer name and value file path separated by ':', e.g. "
303 "input1:file_path1,input2:file_path2. If the input_name appears both "
304 "in input_layer_value_range and input_layer_value_files, "
305 "input_layer_value_range of the input_name will be ignored. The file "
306 "format is binary and it should be array format or null separated "
307 "strings format."),
308 CreateFlag<bool>("allow_fp16", ¶ms_, "allow fp16"),
309 CreateFlag<bool>("require_full_delegation", ¶ms_,
310 "require delegate to run the entire graph"),
311 CreateFlag<bool>("enable_op_profiling", ¶ms_, "enable op profiling"),
312 CreateFlag<int32_t>("max_profiling_buffer_entries", ¶ms_,
313 "max profiling buffer entries"),
314 CreateFlag<std::string>(
315 "profiling_output_csv_file", ¶ms_,
316 "File path to export profile data as CSV, if not set "
317 "prints to stdout.")};
318
319 flags.insert(flags.end(), specific_flags.begin(), specific_flags.end());
320
321 for (const auto& delegate_provider :
322 tools::GetRegisteredDelegateProviders()) {
323 auto delegate_flags = delegate_provider->CreateFlags(¶ms_);
324 flags.insert(flags.end(), delegate_flags.begin(), delegate_flags.end());
325 }
326
327 return flags;
328 }
329
LogParams()330 void BenchmarkTfLiteModel::LogParams() {
331 BenchmarkModel::LogParams();
332 const bool verbose = params_.Get<bool>("verbose");
333 // Always log the value of --graph.
334 LOG_BENCHMARK_PARAM(std::string, "graph", "Graph", /*verbose*/ true);
335 LOG_BENCHMARK_PARAM(std::string, "input_layer", "Input layers", verbose);
336 LOG_BENCHMARK_PARAM(std::string, "input_layer_shape", "Input shapes",
337 verbose);
338 LOG_BENCHMARK_PARAM(std::string, "input_layer_value_range",
339 "Input value ranges", verbose);
340 LOG_BENCHMARK_PARAM(std::string, "input_layer_value_files",
341 "Input value files", verbose);
342
343 LOG_BENCHMARK_PARAM(bool, "allow_fp16", "Allow fp16", verbose);
344 LOG_BENCHMARK_PARAM(bool, "require_full_delegation",
345 "Require full delegation", verbose);
346 LOG_BENCHMARK_PARAM(bool, "enable_op_profiling", "Enable op profiling",
347 verbose);
348 LOG_BENCHMARK_PARAM(int32_t, "max_profiling_buffer_entries",
349 "Max profiling buffer entries", verbose);
350 LOG_BENCHMARK_PARAM(std::string, "profiling_output_csv_file",
351 "CSV File to export profiling data to", verbose);
352
353 for (const auto& delegate_provider :
354 tools::GetRegisteredDelegateProviders()) {
355 delegate_provider->LogParams(params_, verbose);
356 }
357 }
358
ValidateParams()359 TfLiteStatus BenchmarkTfLiteModel::ValidateParams() {
360 if (params_.Get<std::string>("graph").empty()) {
361 TFLITE_LOG(ERROR)
362 << "Please specify the name of your TF Lite input file with --graph";
363 return kTfLiteError;
364 }
365
366 return PopulateInputLayerInfo(
367 params_.Get<std::string>("input_layer"),
368 params_.Get<std::string>("input_layer_shape"),
369 params_.Get<std::string>("input_layer_value_range"),
370 params_.Get<std::string>("input_layer_value_files"), &inputs_);
371 }
372
ComputeInputBytes()373 uint64_t BenchmarkTfLiteModel::ComputeInputBytes() {
374 TFLITE_TOOLS_CHECK(interpreter_);
375 uint64_t total_input_bytes = 0;
376 for (int input : interpreter_->inputs()) {
377 auto* t = interpreter_->tensor(input);
378 total_input_bytes += t->bytes;
379 }
380 return total_input_bytes;
381 }
382
MayGetModelFileSize()383 int64_t BenchmarkTfLiteModel::MayGetModelFileSize() {
384 std::ifstream in_file(params_.Get<std::string>("graph"),
385 std::ios::binary | std::ios::ate);
386 return in_file.tellg();
387 }
388
LoadInputTensorData(const TfLiteTensor & t,const std::string & input_file_path)389 BenchmarkTfLiteModel::InputTensorData BenchmarkTfLiteModel::LoadInputTensorData(
390 const TfLiteTensor& t, const std::string& input_file_path) {
391 std::ifstream value_file(input_file_path, std::ios::binary);
392 if (!value_file.good()) {
393 TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
394 << input_file_path;
395 }
396 InputTensorData t_data;
397 if (t.type == kTfLiteString) {
398 t_data.data = VoidUniquePtr(
399 static_cast<void*>(new tflite::DynamicBuffer()),
400 [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); });
401 std::string line;
402 size_t num_line = 0;
403 // Read the line with the delimiter '\0'.
404 while (std::getline(value_file, line, '\0')) {
405 num_line++;
406 static_cast<DynamicBuffer*>(t_data.data.get())
407 ->AddString(line.data(), line.length());
408 }
409 int num_elements = GetNumElements(t.dims);
410 if (num_line != num_elements) {
411 TFLITE_LOG(FATAL) << "The number of string in the input_layer_value_file("
412 << input_file_path << ") is " << num_line
413 << ". It should be " << num_elements << ".";
414 }
415 } else {
416 value_file.seekg(0, std::ios_base::end);
417 if (value_file.tellg() != t.bytes) {
418 TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is "
419 << value_file.tellg() << " bytes. It should be "
420 << t.bytes << " bytes.";
421 }
422 t_data.bytes = t.bytes;
423 t_data.data =
424 VoidUniquePtr(static_cast<void*>(new char[t.bytes]),
425 [](void* ptr) { delete[] static_cast<char*>(ptr); });
426 value_file.clear();
427 value_file.seekg(0, std::ios_base::beg);
428 value_file.read(static_cast<char*>(t_data.data.get()), t.bytes);
429 }
430 return t_data;
431 }
432
433 BenchmarkTfLiteModel::InputTensorData
CreateRandomTensorData(const TfLiteTensor & t,const InputLayerInfo * layer_info)434 BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t,
435 const InputLayerInfo* layer_info) {
436 bool has_value_range = false;
437 int low_range = 0;
438 int high_range = 0;
439 if (layer_info) {
440 has_value_range = layer_info->has_value_range;
441 low_range = layer_info->low;
442 high_range = layer_info->high;
443 }
444 int num_elements = GetNumElements(t.dims);
445 switch (t.type) {
446 case kTfLiteFloat32: {
447 return CreateInputTensorData<float>(
448 num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f));
449 }
450 case kTfLiteFloat16: {
451 // TODO(b/138843274): Remove this preprocessor guard when bug is fixed.
452 #if TFLITE_ENABLE_FP16_CPU_BENCHMARKS
453 #if __GNUC__ && \
454 (__clang__ || __ARM_FP16_FORMAT_IEEE || __ARM_FP16_FORMAT_ALTERNATIVE)
455 // __fp16 is available on Clang or when __ARM_FP16_FORMAT_* is defined.
456 return CreateInputTensorData<__fp16>(
457 num_elements, std::uniform_real_distribution<float>(-0.5f, 0.5f));
458 #else
459 TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t->name
460 << " of type FLOAT16 on this platform.";
461 #endif
462 #else
463 // You need to build with -DTFLITE_ENABLE_FP16_CPU_BENCHMARKS=1 using a
464 // compiler that supports __fp16 type. Note: when using Clang and *not*
465 // linking with compiler-rt, a definition of __gnu_h2f_ieee and
466 // __gnu_f2h_ieee must be supplied.
467 TFLITE_LOG(FATAL) << "Populating the tensor " << t.name
468 << " of type FLOAT16 is disabled.";
469 #endif // TFLITE_ENABLE_FP16_CPU_BENCHMARKS
470 break;
471 }
472 case kTfLiteFloat64: {
473 return CreateInputTensorData<double>(
474 num_elements, std::uniform_real_distribution<double>(-0.5, 0.5));
475 }
476 case kTfLiteInt64: {
477 int low = has_value_range ? low_range : 0;
478 int high = has_value_range ? high_range : 99;
479 return CreateInputTensorData<int64_t>(
480 num_elements, std::uniform_int_distribution<int64_t>(low, high));
481 }
482 case kTfLiteInt32: {
483 int low = has_value_range ? low_range : 0;
484 int high = has_value_range ? high_range : 99;
485 return CreateInputTensorData<int32_t>(
486 num_elements, std::uniform_int_distribution<int32_t>(low, high));
487 }
488 case kTfLiteUInt32: {
489 int low = has_value_range ? low_range : 0;
490 int high = has_value_range ? high_range : 99;
491 return CreateInputTensorData<uint32_t>(
492 num_elements, std::uniform_int_distribution<uint32_t>(low, high));
493 }
494 case kTfLiteInt16: {
495 int low = has_value_range ? low_range : 0;
496 int high = has_value_range ? high_range : 99;
497 return CreateInputTensorData<int16_t>(
498 num_elements, std::uniform_int_distribution<int16_t>(low, high));
499 }
500 case kTfLiteUInt8: {
501 int low = has_value_range ? low_range : 0;
502 int high = has_value_range ? high_range : 254;
503 // std::uniform_int_distribution is specified not to support char types.
504 return CreateInputTensorData<uint8_t>(
505 num_elements, std::uniform_int_distribution<uint32_t>(low, high));
506 }
507 case kTfLiteInt8: {
508 int low = has_value_range ? low_range : -127;
509 int high = has_value_range ? high_range : 127;
510 // std::uniform_int_distribution is specified not to support char types.
511 return CreateInputTensorData<int8_t>(
512 num_elements, std::uniform_int_distribution<int32_t>(low, high));
513 }
514 case kTfLiteString: {
515 // TODO(haoliang): No need to cache string tensors right now.
516 break;
517 }
518 case kTfLiteBool: {
519 // According to std::uniform_int_distribution specification, non-int type
520 // is not supported.
521 return CreateInputTensorData<bool>(
522 num_elements, std::uniform_int_distribution<uint32_t>(0, 1));
523 }
524 default: {
525 TFLITE_LOG(FATAL) << "Don't know how to populate tensor " << t.name
526 << " of type " << t.type;
527 }
528 }
529 return InputTensorData();
530 }
531
PrepareInputData()532 TfLiteStatus BenchmarkTfLiteModel::PrepareInputData() {
533 CleanUp();
534
535 // Note the corresponding relation between 'interpreter_inputs' and 'inputs_'
536 // (i.e. the specified input layer info) has been checked in
537 // BenchmarkTfLiteModel::Init() before calling this function. So, we simply
538 // use the corresponding input layer info to initializethe input data value
539 // properly.
540 auto interpreter_inputs = interpreter_->inputs();
541 for (int i = 0; i < interpreter_inputs.size(); ++i) {
542 int tensor_index = interpreter_inputs[i];
543 const TfLiteTensor& t = *(interpreter_->tensor(tensor_index));
544 const InputLayerInfo* input_layer_info = nullptr;
545 // Note that when input layer parameters (i.e. --input_layer,
546 // --input_layer_shape) are not specified, inputs_ is empty.
547 if (!inputs_.empty()) input_layer_info = &inputs_[i];
548
549 InputTensorData t_data;
550 if (input_layer_info && !input_layer_info->input_file_path.empty()) {
551 t_data = LoadInputTensorData(t, input_layer_info->input_file_path);
552 } else {
553 t_data = CreateRandomTensorData(t, input_layer_info);
554 }
555 inputs_data_.push_back(std::move(t_data));
556 }
557 return kTfLiteOk;
558 }
559
ResetInputsAndOutputs()560 TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
561 auto interpreter_inputs = interpreter_->inputs();
562 // Set the values of the input tensors from inputs_data_.
563 for (int j = 0; j < interpreter_inputs.size(); ++j) {
564 int i = interpreter_inputs[j];
565 TfLiteTensor* t = interpreter_->tensor(i);
566 if (t->type == kTfLiteString) {
567 if (inputs_data_[j].data) {
568 static_cast<DynamicBuffer*>(inputs_data_[j].data.get())
569 ->WriteToTensor(t, /*new_shape=*/nullptr);
570 } else {
571 tflite::DynamicBuffer buffer;
572 FillRandomString(&buffer, t->dims, []() {
573 return "we're have some friends over saturday to hang out in the "
574 "yard";
575 });
576 buffer.WriteToTensor(t, /*new_shape=*/nullptr);
577 }
578 } else {
579 std::memcpy(t->data.raw, inputs_data_[j].data.get(),
580 inputs_data_[j].bytes);
581 }
582 }
583
584 return kTfLiteOk;
585 }
586
InitInterpreter()587 TfLiteStatus BenchmarkTfLiteModel::InitInterpreter() {
588 auto resolver = GetOpResolver();
589 const int32_t num_threads = params_.Get<int32_t>("num_threads");
590 const bool use_caching = params_.Get<bool>("use_caching");
591 tflite::InterpreterBuilder(*model_, *resolver)(&interpreter_, num_threads);
592 if (!interpreter_) {
593 TFLITE_LOG(ERROR) << "Failed to initialize the interpreter";
594 return kTfLiteError;
595 }
596 // Manually enable caching behavior in TF Lite interpreter.
597 if (use_caching) {
598 external_context_.reset(new tflite::ExternalCpuBackendContext());
599 std::unique_ptr<tflite::CpuBackendContext> cpu_backend_context(
600 new tflite::CpuBackendContext());
601 cpu_backend_context->SetUseCaching(true);
602 cpu_backend_context->SetMaxNumThreads(num_threads);
603 external_context_->set_internal_backend_context(
604 std::move(cpu_backend_context));
605 interpreter_->SetExternalContext(kTfLiteCpuBackendContext,
606 external_context_.get());
607 }
608
609 return kTfLiteOk;
610 }
611
Init()612 TfLiteStatus BenchmarkTfLiteModel::Init() {
613 TF_LITE_ENSURE_STATUS(LoadModel());
614 TF_LITE_ENSURE_STATUS(InitInterpreter());
615
616 // Install profilers if necessary right after interpreter is created so that
617 // any memory allocations inside the TFLite runtime could be recorded if the
618 // installed profiler profile memory usage information.
619 profiling_listener_ = MayCreateProfilingListener();
620 if (profiling_listener_) AddListener(profiling_listener_.get());
621
622 interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16"));
623
624 owned_delegates_.clear();
625
626 // Contains all ids of TfLiteNodes that have been checked to see whether it's
627 // delegated or not.
628 std::unordered_set<int> checked_node_ids;
629 for (const auto& delegate_provider :
630 tools::GetRegisteredDelegateProviders()) {
631 auto delegate = delegate_provider->CreateTfLiteDelegate(params_);
632 // It's possible that a delegate of certain type won't be created as
633 // user-specified benchmark params tells not to.
634 if (delegate == nullptr) continue;
635 if (interpreter_->ModifyGraphWithDelegate(delegate.get()) != kTfLiteOk) {
636 TFLITE_LOG(ERROR) << "Failed to apply " << delegate_provider->GetName()
637 << " delegate.";
638 return kTfLiteError;
639 } else {
640 // Ideally, such delegate info should already be computed when the
641 // delegate is being applied to the model graph.
642 int num_delegated_kernels = 0;
643 for (int i = 0; i < interpreter_->execution_plan().size(); ++i) {
644 int node_id = interpreter_->execution_plan()[i];
645 if (checked_node_ids.find(node_id) != checked_node_ids.end()) {
646 continue;
647 }
648 const TfLiteNode& node =
649 interpreter_->node_and_registration(node_id)->first;
650
651 // Note that the 'delegate' here could be an ExternalDelegateWrapper
652 // object that wraps an actual external delegate, in which case,
653 // 'node.delegate' will be different from 'delegate' because
654 // 'node.delegate' refers to the actual external delegate.
655 if (node.delegate != nullptr) {
656 num_delegated_kernels++;
657 checked_node_ids.insert(node_id);
658 }
659 }
660 bool fully_delegated = (num_delegated_kernels == 1 &&
661 interpreter_->execution_plan().size() == 1);
662
663 if (params_.Get<bool>("require_full_delegation") && !fully_delegated) {
664 TFLITE_LOG(ERROR) << "Disallowed CPU fallback detected.";
665 return kTfLiteError;
666 }
667 if (fully_delegated) {
668 TFLITE_LOG(INFO) << "Explicitly applied "
669 << delegate_provider->GetName()
670 << " delegate, and the model graph will be completely"
671 << " executed by the delegate.";
672 } else if (num_delegated_kernels > 0) {
673 TFLITE_LOG(INFO) << "Explicitly applied "
674 << delegate_provider->GetName()
675 << " delegate, and the model graph will be partially"
676 << " executed by the delegate w/ "
677 << num_delegated_kernels << " delegate kernels.";
678 } else {
679 TFLITE_LOG(INFO)
680 << "Though " << delegate_provider->GetName()
681 << " delegate is explicitly applied, the model graph will not be"
682 << " executed by the delegate.";
683 }
684 }
685 owned_delegates_.emplace_back(std::move(delegate));
686 }
687
688 auto interpreter_inputs = interpreter_->inputs();
689
690 if (!inputs_.empty()) {
691 TFLITE_TOOLS_CHECK_EQ(inputs_.size(), interpreter_inputs.size())
692 << "Inputs mismatch: Model inputs #:" << inputs_.size()
693 << " expected: " << interpreter_inputs.size();
694 }
695
696 // Check if the tensor names match, and log a warning if it doesn't.
697 // TODO(ycling): Consider to make this an error again when the new converter
698 // create tensors with consistent naming.
699 for (int j = 0; j < inputs_.size(); ++j) {
700 const InputLayerInfo& input = inputs_[j];
701 int i = interpreter_inputs[j];
702 TfLiteTensor* t = interpreter_->tensor(i);
703 if (input.name != t->name) {
704 TFLITE_LOG(WARN) << "Tensor # " << i << " is named " << t->name
705 << " but flags call it " << input.name;
706 }
707
708 if (input.shape.size() != t->dims->size) {
709 TFLITE_LOG(ERROR) << "Input tensor #" << i << " should have "
710 << t->dims->size << " dimensions!";
711 return kTfLiteError;
712 }
713 }
714
715 // Resize all non-string tensors.
716 for (int j = 0; j < inputs_.size(); ++j) {
717 const InputLayerInfo& input = inputs_[j];
718 int i = interpreter_inputs[j];
719 TfLiteTensor* t = interpreter_->tensor(i);
720 if (t->type != kTfLiteString) {
721 interpreter_->ResizeInputTensor(i, input.shape);
722 }
723 }
724
725 if (interpreter_->AllocateTensors() != kTfLiteOk) {
726 TFLITE_LOG(ERROR) << "Failed to allocate tensors!";
727 return kTfLiteError;
728 }
729
730 ruy_profiling_listener_.reset(new RuyProfileListener());
731 AddListener(ruy_profiling_listener_.get());
732
733 return kTfLiteOk;
734 }
735
LoadModel()736 TfLiteStatus BenchmarkTfLiteModel::LoadModel() {
737 std::string graph = params_.Get<std::string>("graph");
738 model_ = tflite::FlatBufferModel::BuildFromFile(graph.c_str());
739 if (!model_) {
740 TFLITE_LOG(ERROR) << "Failed to mmap model " << graph;
741 return kTfLiteError;
742 }
743 TFLITE_LOG(INFO) << "Loaded model " << graph;
744 return kTfLiteOk;
745 }
746
GetOpResolver() const747 std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver()
748 const {
749 auto resolver = new tflite::ops::builtin::BuiltinOpResolver();
750 RegisterSelectedOps(resolver);
751 return std::unique_ptr<tflite::OpResolver>(resolver);
752 }
753
754 std::unique_ptr<BenchmarkListener>
MayCreateProfilingListener() const755 BenchmarkTfLiteModel::MayCreateProfilingListener() const {
756 if (!params_.Get<bool>("enable_op_profiling")) return nullptr;
757
758 return std::unique_ptr<BenchmarkListener>(new ProfilingListener(
759 interpreter_.get(), params_.Get<int32_t>("max_profiling_buffer_entries"),
760 params_.Get<std::string>("profiling_output_csv_file"),
761 CreateProfileSummaryFormatter(
762 !params_.Get<std::string>("profiling_output_csv_file").empty())));
763 }
764
RunImpl()765 TfLiteStatus BenchmarkTfLiteModel::RunImpl() { return interpreter_->Invoke(); }
766
767 } // namespace benchmark
768 } // namespace tflite
769