1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/validator.h"
16
17 #include <stdint.h>
18 #include <string.h>
19 #include <time.h>
20 #include <unistd.h>
21
22 #include <functional>
23 #include <map>
24 #include <memory>
25 #include <string>
26 #include <utility>
27 #include <vector>
28
29 #include "absl/container/flat_hash_set.h"
30 #include "tensorflow/lite/c/c_api.h"
31 #include "tensorflow/lite/c/common.h"
32 #include "tensorflow/lite/core/api/profiler.h"
33 #include "tensorflow/lite/core/subgraph.h"
34 #include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
35 #include "tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h"
36 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/call_register.h"
37 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/decode_jpeg_register.h"
38 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/model_loader.h"
39 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/status_codes.h"
40 #include "tensorflow/lite/interpreter.h"
41 #include "tensorflow/lite/interpreter_builder.h"
42 #include "tensorflow/lite/kernels/register.h"
43 #include "tensorflow/lite/logger.h"
44 #include "tensorflow/lite/minimal_logging.h"
45 #include "tensorflow/lite/mutable_op_resolver.h"
46
47 #ifndef TEMP_FAILURE_RETRY
48 #ifdef __ANDROID__
49 #error "TEMP_FAILURE_RETRY not set although on Android"
50 #else // ! defined(__ANDROID__)
51 #define TEMP_FAILURE_RETRY(exp) exp
52 #endif // defined(__ANDROID__)
53 #endif // defined(TEMP_FAILURE_RETRY)
54
55 namespace tflite {
56 namespace acceleration {
57 namespace {
LoadDelegatePlugin(const std::string & name,const tflite::TFLiteSettings & tflite_settings)58 std::unique_ptr<tflite::delegates::DelegatePluginInterface> LoadDelegatePlugin(
59 const std::string& name, const tflite::TFLiteSettings& tflite_settings) {
60 return tflite::delegates::DelegatePluginRegistry::CreateByName(
61 name + "Plugin", tflite_settings);
62 }
63
AddTensorDataToMap(TfLiteTensor * tensor,std::map<std::string,std::vector<char>> & output_map)64 void AddTensorDataToMap(TfLiteTensor* tensor,
65 std::map<std::string, std::vector<char>>& output_map) {
66 std::vector<char> char_output(TfLiteTensorByteSize(tensor));
67 memcpy(char_output.data(), TfLiteTensorData(tensor),
68 TfLiteTensorByteSize(tensor));
69 output_map.emplace(TfLiteTensorName(tensor), std::move(char_output));
70 }
71
72 constexpr int64_t kMicrosInSecond = 1000 * 1000;
73 constexpr int64_t kNanosInMicro = 1000;
74
75 // CLOCK_BOOTTIME is what Android uses for elapsed time. Wallclock on mobile
76 // devices can jump due to user actions or network time sync.
ElapsedTimeMicros()77 int64_t ElapsedTimeMicros() {
78 struct timespec ts;
79 #if defined(__ANDROID__)
80 int err = clock_gettime(CLOCK_BOOTTIME, &ts);
81 #elif defined(_WIN32)
82 int err = 1;
83 #else
84 int err = clock_gettime(CLOCK_MONOTONIC, &ts);
85 #endif
86 if (err) {
87 return -1;
88 }
89 return ts.tv_sec * kMicrosInSecond + ts.tv_nsec / kNanosInMicro;
90 }
91
92 class ValidatorProfiler : public ::tflite::Profiler {
93 public:
94 struct EventData {
95 std::string tag;
96 int64_t start_time_us = -1;
97 int64_t end_time_us = -1;
98 };
events()99 const std::vector<EventData>& events() { return events_; }
BeginEvent(const char * tag,EventType event_type,int64_t event_metadata1,int64_t event_metadata2)100 uint32_t BeginEvent(const char* tag, EventType event_type,
101 int64_t event_metadata1,
102 int64_t event_metadata2) override {
103 if (event_type != EventType::DEFAULT) {
104 return 0;
105 }
106 events_.push_back({tag, ElapsedTimeMicros(), -1});
107 return events_.size();
108 }
EndEvent(uint32_t event_handle)109 void EndEvent(uint32_t event_handle) override {
110 if (event_handle == 0) {
111 return;
112 }
113 events_[event_handle - 1].end_time_us = ElapsedTimeMicros();
114 }
115
116 private:
117 std::vector<EventData> events_;
118 };
119
120 } // namespace
121
CheckGoldenOutput(Results * results_out)122 MinibenchmarkStatus Validator::CheckGoldenOutput(Results* results_out) {
123 if (!interpreter_ || !model_loader_->GetModel()) {
124 return kMinibenchmarkPreconditionNotMet;
125 }
126 if (validation_entrypoint_) {
127 // Already done.
128 return kMinibenchmarkSuccess;
129 }
130 main_model_ = interpreter_->subgraph(0);
131 int validation_entrypoint_index = 0;
132 for (int i = 0; i < interpreter_->subgraphs_size(); i++) {
133 Subgraph* subgraph = interpreter_->subgraph(i);
134 if (subgraph->GetName() == "VALIDATION:main") {
135 validation_entrypoint_ = subgraph;
136 validation_entrypoint_index = i;
137 break;
138 }
139 }
140 if (!validation_entrypoint_) {
141 return kMinibenchmarkValidationSubgraphNotFound;
142 }
143 if (validation_entrypoint_->inputs().size() <= 1) {
144 return kMinibenchmarkValidationSubgraphHasTooFewInputs;
145 }
146 if (validation_entrypoint_->inputs().size() >
147 validation_entrypoint_->outputs().size()) {
148 return kMinibenchmarkValidationSubgraphHasTooFewOutputs;
149 }
150
151 if (validation_entrypoint_->AllocateTensors() != kTfLiteOk) {
152 return kMinibenchmarkAllocateTensorsFailed;
153 }
154
155 // Check if we have validation data embedded or need to run CPU for it. If
156 // the data is embedded, there is already an allocation for it from the model.
157 // We can skip running it on CPU, and copy the embedded golden output to
158 // results_out.
159 TfLiteTensor* first_input_tensor =
160 validation_entrypoint_->tensor(validation_entrypoint_->inputs()[0]);
161 if (first_input_tensor->allocation) {
162 for (int i = 0; i < validation_entrypoint_->inputs().size() - 1; i++) {
163 AddTensorDataToMap(
164 validation_entrypoint_->tensor(validation_entrypoint_->inputs()[i]),
165 results_out->golden_inference_output);
166 }
167 return kMinibenchmarkSuccess;
168 }
169
170 // Create the interpreter to run on CPU.
171 tflite::InterpreterBuilder(*model_loader_->GetModel(),
172 *resolver_)(&golden_interpreter_);
173 if (!golden_interpreter_) {
174 return kMinibenchmarkInterpreterBuilderFailed;
175 }
176 Subgraph* golden_validation_entrypoint =
177 golden_interpreter_->subgraph(validation_entrypoint_index);
178
179 // Run on CPU.
180 if (golden_validation_entrypoint->AllocateTensors() != kTfLiteOk) {
181 return kMinibenchmarkAllocateTensorsFailed;
182 }
183 // Set initial golden outputs to 0 to avoid accessing uninitialized memory.
184 // Last input is jpeg, skip.
185 for (int i = 0; i < golden_validation_entrypoint->inputs().size() - 1; i++) {
186 TfLiteTensor* input_tensor = golden_validation_entrypoint->tensor(
187 golden_validation_entrypoint->inputs()[i]);
188 memset(input_tensor->data.data, 0, input_tensor->bytes);
189 }
190
191 if (golden_validation_entrypoint->Invoke() != kTfLiteOk) {
192 return kMinibenchmarkInvokeFailed;
193 }
194 // Copy CPU outputs as golden. Last input is jpeg image data, skip.
195 for (int i = 0; i < validation_entrypoint_->inputs().size() - 1; i++) {
196 TfLiteTensor* input_tensor =
197 validation_entrypoint_->tensor(validation_entrypoint_->inputs()[i]);
198 TfLiteTensor* golden_output_tensor = golden_validation_entrypoint->tensor(
199 golden_validation_entrypoint->outputs()[i]);
200 if (input_tensor->bytes != golden_output_tensor->bytes) {
201 return kMinibenchmarkValidationSubgraphInputsDontMatchOutputs;
202 }
203
204 memcpy(input_tensor->data.data, golden_output_tensor->data.data,
205 golden_output_tensor->bytes);
206
207 AddTensorDataToMap(golden_output_tensor,
208 results_out->golden_inference_output);
209 }
210
211 return kMinibenchmarkSuccess;
212 }
213
LoadDelegate()214 MinibenchmarkStatus Validator::LoadDelegate() {
215 if (!compute_settings_) {
216 return kMinibenchmarkPreconditionNotMet;
217 }
218
219 // Create delegate plugin and delegate.
220 Delegate which_delegate = Delegate_NONE;
221 if (compute_settings_->tflite_settings()) {
222 which_delegate = compute_settings_->tflite_settings()->delegate();
223 }
224 std::string delegate_name;
225 switch (which_delegate) {
226 case Delegate_NONE:
227 // Skip creating delegate if running on CPU.
228 return kMinibenchmarkSuccess;
229 case Delegate_NNAPI:
230 delegate_name = "Nnapi";
231 break;
232 case Delegate_GPU:
233 delegate_name = "Gpu";
234 break;
235 case Delegate_XNNPACK:
236 delegate_name = "XNNPack";
237 break;
238 default:
239 return kMinibenchmarkDelegateNotSupported;
240 }
241
242 TFLITE_LOG_PROD(TFLITE_LOG_INFO, "Running mini-benchmark on %s",
243 delegate_name.c_str());
244 if (!(delegate_plugin_ = LoadDelegatePlugin(
245 delegate_name, *compute_settings_->tflite_settings()))) {
246 return kMinibenchmarkDelegatePluginNotFound;
247 }
248 if (!(delegate_ = delegate_plugin_->Create())) {
249 return kMinibenchmarkDelegateCreateFailed;
250 }
251 return kMinibenchmarkSuccess;
252 }
253
CreateInterpreter(int * delegate_error_out,int * delegated_kernels_out)254 MinibenchmarkStatus Validator::CreateInterpreter(int* delegate_error_out,
255 int* delegated_kernels_out) {
256 if (!delegate_error_out || !delegated_kernels_out ||
257 !model_loader_->GetModel()) {
258 return kMinibenchmarkPreconditionNotMet;
259 }
260 *delegate_error_out = 0;
261 // Create interpreter with the delegate.
262 if (compute_settings_->tflite_settings() &&
263 compute_settings_->tflite_settings()->disable_default_delegates()) {
264 resolver_ = std::make_unique<
265 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates>();
266 } else {
267 resolver_ = std::make_unique<::tflite::ops::builtin::BuiltinOpResolver>();
268 }
269 resolver_->AddCustom("validation/call",
270 ::tflite::acceleration::ops::Register_CALL(), 1);
271 resolver_->AddCustom(
272 "validation/decode_jpeg",
273 ::tflite::acceleration::decode_jpeg_kernel::Register_DECODE_JPEG(), 1);
274
275 tflite::InterpreterBuilder builder(*model_loader_->GetModel(), *resolver_);
276 // Add delegate if not running on CPU.
277 if (delegate_ != nullptr) {
278 builder.AddDelegate(delegate_.get());
279 }
280 TfLiteStatus status = builder(&interpreter_);
281 if (!interpreter_) {
282 // Return delegate error number if not null.
283 *delegate_error_out =
284 delegate_plugin_ ? delegate_plugin_->GetDelegateErrno(delegate_.get())
285 : 0;
286
287 TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
288 "Creating Interpreter failed with error code %d.", status);
289 return kMinibenchmarkInterpreterBuilderFailed;
290 }
291
292 // Check if the model is actually going to execute on the delegate.
293 // For now just give a warning, with the exception of NNAPI SL mini benchmark.
294 // Can consider changing to error in other contexts.
295 // The logic is copy/pasted from benchmark_tflite_model.cc
296 // TODO(b/232085640): Replace this logic with Subgraph::IsFullyDelegated()
297 // after making that function public.
298 absl::flat_hash_set<int> checked_node_ids;
299 int num_delegated_kernels = 0;
300 for (int i = 0; i < interpreter_->execution_plan().size(); ++i) {
301 int node_id = interpreter_->execution_plan()[i];
302 if (checked_node_ids.find(node_id) != checked_node_ids.end()) {
303 continue;
304 }
305 const TfLiteNode& node =
306 interpreter_->node_and_registration(node_id)->first;
307 if (node.delegate != nullptr) {
308 num_delegated_kernels++;
309 checked_node_ids.insert(node_id);
310 }
311 }
312 *delegated_kernels_out = num_delegated_kernels;
313 bool fully_delegated = (num_delegated_kernels == 1 &&
314 interpreter_->execution_plan().size() == 1);
315 if (!fully_delegated) {
316 TFLITE_LOG_PROD(TFLITE_LOG_WARNING,
317 "The model will be %s executed by the delegate.",
318 num_delegated_kernels > 0 ? "partially" : "not");
319 }
320
321 return kMinibenchmarkSuccess;
322 }
323
RunValidation(Results * results_out)324 MinibenchmarkStatus Validator::RunValidation(Results* results_out) {
325 if (!results_out) {
326 return kMinibenchmarkPreconditionNotMet;
327 }
328 if (!model_loader_) {
329 return kMinibenchmarkModelReadFailed;
330 }
331
332 #define MB_RETURN_IF_ERROR(s) \
333 { \
334 MinibenchmarkStatus c = (s); \
335 if (c != kMinibenchmarkSuccess) return c; \
336 }
337
338 MB_RETURN_IF_ERROR(model_loader_->Init());
339 // The lifetime of the delegate must be at least as long as the lifetime of
340 // any Interpreter.
341 int64_t delegate_load_start_time_us = ElapsedTimeMicros();
342 MB_RETURN_IF_ERROR(LoadDelegate());
343 MB_RETURN_IF_ERROR(CreateInterpreter(&results_out->delegate_error,
344 &results_out->delegated_kernels));
345 int64_t delegate_load_end_time_us = ElapsedTimeMicros();
346 MB_RETURN_IF_ERROR(CheckGoldenOutput(results_out));
347 ValidatorProfiler profiler;
348 main_model_->SetProfiler(&profiler, 0);
349 TfLiteStatus status = validation_entrypoint_->Invoke();
350 main_model_->SetProfiler(nullptr, 0);
351 if (status != kTfLiteOk) {
352 return kMinibenchmarkInvokeFailed;
353 }
354
355 // Create results_out.
356 int model_output_size = main_model_->outputs().size();
357 // Model output.
358 for (int i = 0; i < model_output_size; i++) {
359 AddTensorDataToMap(
360 validation_entrypoint_->tensor(validation_entrypoint_->outputs()[i]),
361 results_out->actual_inference_output);
362 }
363 // Accuracy metrics.
364 const std::string kMetricPrefix = "metrics/";
365 const std::string kOk("ok");
366 for (int i = model_output_size; i < validation_entrypoint_->outputs().size();
367 i++) {
368 TfLiteTensor* tensor =
369 validation_entrypoint_->tensor(validation_entrypoint_->outputs()[i]);
370 std::string name = tensor->name;
371 if (name.find(kMetricPrefix) != 0) { // NOLINT
372 continue;
373 }
374 name = name.substr(kMetricPrefix.size());
375 if (kOk == name) {
376 results_out->ok = *(tensor->data.b);
377 } else {
378 std::vector<float> values;
379 int count = 1;
380 for (int j = 0; j < tensor->dims->size; j++) {
381 count *= tensor->dims->data[j];
382 }
383 values.reserve(count);
384 for (int j = 0; j < count; j++) {
385 values.push_back(tensor->data.f[j]);
386 TFLITE_LOG_PROD(TFLITE_LOG_INFO, " %s %.4f", name.c_str(),
387 tensor->data.f[j]);
388 }
389 results_out->metrics[name] = values;
390 }
391 }
392 TFLITE_LOG_PROD(TFLITE_LOG_INFO, " accuracy: %s",
393 results_out->ok ? "ok" : "not ok");
394 // Performance metrics.
395 results_out->delegate_prep_time_us =
396 (delegate_load_end_time_us == -1 || delegate_load_start_time_us == -1)
397 ? -1
398 : delegate_load_end_time_us - delegate_load_start_time_us;
399 TFLITE_LOG_PROD(TFLITE_LOG_INFO, " Delegate preparation took %d us",
400 static_cast<int>(results_out->delegate_prep_time_us));
401 for (const auto& e : profiler.events()) {
402 if (e.tag == "Invoke" && e.start_time_us != -1 && e.end_time_us != -1) {
403 results_out->execution_time_us.push_back(e.end_time_us - e.start_time_us);
404 TFLITE_LOG_PROD(TFLITE_LOG_INFO, " Inference took %d us",
405 static_cast<int>(e.end_time_us - e.start_time_us));
406 }
407 }
408 #undef MB_RETURN_IF_ERROR
409 return kMinibenchmarkSuccess;
410 }
411
BootTimeMicros()412 int64_t Validator::BootTimeMicros() { return ElapsedTimeMicros(); }
WallTimeMicros()413 int64_t Validator::WallTimeMicros() {
414 struct timespec ts;
415 #ifndef _WIN32
416 int err = clock_gettime(CLOCK_REALTIME, &ts);
417 #else // _WIN32
418 int err = 1;
419 #endif // !_WIN32
420 if (err) {
421 return -1;
422 }
423 return ts.tv_sec * kMicrosInSecond + ts.tv_nsec / kNanosInMicro;
424 }
425
426 } // namespace acceleration
427 } // namespace tflite
428