• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/validator.h"
16 
17 #include <stdint.h>
18 #include <string.h>
19 #include <time.h>
20 #include <unistd.h>
21 
22 #include <functional>
23 #include <map>
24 #include <memory>
25 #include <string>
26 #include <utility>
27 #include <vector>
28 
29 #include "absl/container/flat_hash_set.h"
30 #include "tensorflow/lite/c/c_api.h"
31 #include "tensorflow/lite/c/common.h"
32 #include "tensorflow/lite/core/api/profiler.h"
33 #include "tensorflow/lite/core/subgraph.h"
34 #include "tensorflow/lite/experimental/acceleration/configuration/configuration_generated.h"
35 #include "tensorflow/lite/experimental/acceleration/configuration/delegate_registry.h"
36 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/call_register.h"
37 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/decode_jpeg_register.h"
38 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/model_loader.h"
39 #include "tensorflow/lite/experimental/acceleration/mini_benchmark/status_codes.h"
40 #include "tensorflow/lite/interpreter.h"
41 #include "tensorflow/lite/interpreter_builder.h"
42 #include "tensorflow/lite/kernels/register.h"
43 #include "tensorflow/lite/logger.h"
44 #include "tensorflow/lite/minimal_logging.h"
45 #include "tensorflow/lite/mutable_op_resolver.h"
46 
47 #ifndef TEMP_FAILURE_RETRY
48 #ifdef __ANDROID__
49 #error "TEMP_FAILURE_RETRY not set although on Android"
50 #else  // ! defined(__ANDROID__)
51 #define TEMP_FAILURE_RETRY(exp) exp
52 #endif  // defined(__ANDROID__)
53 #endif  // defined(TEMP_FAILURE_RETRY)
54 
55 namespace tflite {
56 namespace acceleration {
57 namespace {
LoadDelegatePlugin(const std::string & name,const tflite::TFLiteSettings & tflite_settings)58 std::unique_ptr<tflite::delegates::DelegatePluginInterface> LoadDelegatePlugin(
59     const std::string& name, const tflite::TFLiteSettings& tflite_settings) {
60   return tflite::delegates::DelegatePluginRegistry::CreateByName(
61       name + "Plugin", tflite_settings);
62 }
63 
AddTensorDataToMap(TfLiteTensor * tensor,std::map<std::string,std::vector<char>> & output_map)64 void AddTensorDataToMap(TfLiteTensor* tensor,
65                         std::map<std::string, std::vector<char>>& output_map) {
66   std::vector<char> char_output(TfLiteTensorByteSize(tensor));
67   memcpy(char_output.data(), TfLiteTensorData(tensor),
68          TfLiteTensorByteSize(tensor));
69   output_map.emplace(TfLiteTensorName(tensor), std::move(char_output));
70 }
71 
72 constexpr int64_t kMicrosInSecond = 1000 * 1000;
73 constexpr int64_t kNanosInMicro = 1000;
74 
75 // CLOCK_BOOTTIME is what Android uses for elapsed time. Wallclock on mobile
76 // devices can jump due to user actions or network time sync.
ElapsedTimeMicros()77 int64_t ElapsedTimeMicros() {
78   struct timespec ts;
79 #if defined(__ANDROID__)
80   int err = clock_gettime(CLOCK_BOOTTIME, &ts);
81 #elif defined(_WIN32)
82   int err = 1;
83 #else
84   int err = clock_gettime(CLOCK_MONOTONIC, &ts);
85 #endif
86   if (err) {
87     return -1;
88   }
89   return ts.tv_sec * kMicrosInSecond + ts.tv_nsec / kNanosInMicro;
90 }
91 
92 class ValidatorProfiler : public ::tflite::Profiler {
93  public:
94   struct EventData {
95     std::string tag;
96     int64_t start_time_us = -1;
97     int64_t end_time_us = -1;
98   };
events()99   const std::vector<EventData>& events() { return events_; }
BeginEvent(const char * tag,EventType event_type,int64_t event_metadata1,int64_t event_metadata2)100   uint32_t BeginEvent(const char* tag, EventType event_type,
101                       int64_t event_metadata1,
102                       int64_t event_metadata2) override {
103     if (event_type != EventType::DEFAULT) {
104       return 0;
105     }
106     events_.push_back({tag, ElapsedTimeMicros(), -1});
107     return events_.size();
108   }
EndEvent(uint32_t event_handle)109   void EndEvent(uint32_t event_handle) override {
110     if (event_handle == 0) {
111       return;
112     }
113     events_[event_handle - 1].end_time_us = ElapsedTimeMicros();
114   }
115 
116  private:
117   std::vector<EventData> events_;
118 };
119 
120 }  // namespace
121 
CheckGoldenOutput(Results * results_out)122 MinibenchmarkStatus Validator::CheckGoldenOutput(Results* results_out) {
123   if (!interpreter_ || !model_loader_->GetModel()) {
124     return kMinibenchmarkPreconditionNotMet;
125   }
126   if (validation_entrypoint_) {
127     // Already done.
128     return kMinibenchmarkSuccess;
129   }
130   main_model_ = interpreter_->subgraph(0);
131   int validation_entrypoint_index = 0;
132   for (int i = 0; i < interpreter_->subgraphs_size(); i++) {
133     Subgraph* subgraph = interpreter_->subgraph(i);
134     if (subgraph->GetName() == "VALIDATION:main") {
135       validation_entrypoint_ = subgraph;
136       validation_entrypoint_index = i;
137       break;
138     }
139   }
140   if (!validation_entrypoint_) {
141     return kMinibenchmarkValidationSubgraphNotFound;
142   }
143   if (validation_entrypoint_->inputs().size() <= 1) {
144     return kMinibenchmarkValidationSubgraphHasTooFewInputs;
145   }
146   if (validation_entrypoint_->inputs().size() >
147       validation_entrypoint_->outputs().size()) {
148     return kMinibenchmarkValidationSubgraphHasTooFewOutputs;
149   }
150 
151   if (validation_entrypoint_->AllocateTensors() != kTfLiteOk) {
152     return kMinibenchmarkAllocateTensorsFailed;
153   }
154 
155   // Check if we have validation data embedded or need to run CPU for it. If
156   // the data is embedded, there is already an allocation for it from the model.
157   // We can skip running it on CPU, and copy the embedded golden output to
158   // results_out.
159   TfLiteTensor* first_input_tensor =
160       validation_entrypoint_->tensor(validation_entrypoint_->inputs()[0]);
161   if (first_input_tensor->allocation) {
162     for (int i = 0; i < validation_entrypoint_->inputs().size() - 1; i++) {
163       AddTensorDataToMap(
164           validation_entrypoint_->tensor(validation_entrypoint_->inputs()[i]),
165           results_out->golden_inference_output);
166     }
167     return kMinibenchmarkSuccess;
168   }
169 
170   // Create the interpreter to run on CPU.
171   tflite::InterpreterBuilder(*model_loader_->GetModel(),
172                              *resolver_)(&golden_interpreter_);
173   if (!golden_interpreter_) {
174     return kMinibenchmarkInterpreterBuilderFailed;
175   }
176   Subgraph* golden_validation_entrypoint =
177       golden_interpreter_->subgraph(validation_entrypoint_index);
178 
179   // Run on CPU.
180   if (golden_validation_entrypoint->AllocateTensors() != kTfLiteOk) {
181     return kMinibenchmarkAllocateTensorsFailed;
182   }
183   // Set initial golden outputs to 0 to avoid accessing uninitialized memory.
184   // Last input is jpeg, skip.
185   for (int i = 0; i < golden_validation_entrypoint->inputs().size() - 1; i++) {
186     TfLiteTensor* input_tensor = golden_validation_entrypoint->tensor(
187         golden_validation_entrypoint->inputs()[i]);
188     memset(input_tensor->data.data, 0, input_tensor->bytes);
189   }
190 
191   if (golden_validation_entrypoint->Invoke() != kTfLiteOk) {
192     return kMinibenchmarkInvokeFailed;
193   }
194   // Copy CPU outputs as golden. Last input is jpeg image data, skip.
195   for (int i = 0; i < validation_entrypoint_->inputs().size() - 1; i++) {
196     TfLiteTensor* input_tensor =
197         validation_entrypoint_->tensor(validation_entrypoint_->inputs()[i]);
198     TfLiteTensor* golden_output_tensor = golden_validation_entrypoint->tensor(
199         golden_validation_entrypoint->outputs()[i]);
200     if (input_tensor->bytes != golden_output_tensor->bytes) {
201       return kMinibenchmarkValidationSubgraphInputsDontMatchOutputs;
202     }
203 
204     memcpy(input_tensor->data.data, golden_output_tensor->data.data,
205            golden_output_tensor->bytes);
206 
207     AddTensorDataToMap(golden_output_tensor,
208                        results_out->golden_inference_output);
209   }
210 
211   return kMinibenchmarkSuccess;
212 }
213 
LoadDelegate()214 MinibenchmarkStatus Validator::LoadDelegate() {
215   if (!compute_settings_) {
216     return kMinibenchmarkPreconditionNotMet;
217   }
218 
219   // Create delegate plugin and delegate.
220   Delegate which_delegate = Delegate_NONE;
221   if (compute_settings_->tflite_settings()) {
222     which_delegate = compute_settings_->tflite_settings()->delegate();
223   }
224   std::string delegate_name;
225   switch (which_delegate) {
226     case Delegate_NONE:
227       // Skip creating delegate if running on CPU.
228       return kMinibenchmarkSuccess;
229     case Delegate_NNAPI:
230       delegate_name = "Nnapi";
231       break;
232     case Delegate_GPU:
233       delegate_name = "Gpu";
234       break;
235     case Delegate_XNNPACK:
236       delegate_name = "XNNPack";
237       break;
238     default:
239       return kMinibenchmarkDelegateNotSupported;
240   }
241 
242   TFLITE_LOG_PROD(TFLITE_LOG_INFO, "Running mini-benchmark on %s",
243                   delegate_name.c_str());
244   if (!(delegate_plugin_ = LoadDelegatePlugin(
245             delegate_name, *compute_settings_->tflite_settings()))) {
246     return kMinibenchmarkDelegatePluginNotFound;
247   }
248   if (!(delegate_ = delegate_plugin_->Create())) {
249     return kMinibenchmarkDelegateCreateFailed;
250   }
251   return kMinibenchmarkSuccess;
252 }
253 
CreateInterpreter(int * delegate_error_out,int * delegated_kernels_out)254 MinibenchmarkStatus Validator::CreateInterpreter(int* delegate_error_out,
255                                                  int* delegated_kernels_out) {
256   if (!delegate_error_out || !delegated_kernels_out ||
257       !model_loader_->GetModel()) {
258     return kMinibenchmarkPreconditionNotMet;
259   }
260   *delegate_error_out = 0;
261   // Create interpreter with the delegate.
262   if (compute_settings_->tflite_settings() &&
263       compute_settings_->tflite_settings()->disable_default_delegates()) {
264     resolver_ = std::make_unique<
265         ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates>();
266   } else {
267     resolver_ = std::make_unique<::tflite::ops::builtin::BuiltinOpResolver>();
268   }
269   resolver_->AddCustom("validation/call",
270                        ::tflite::acceleration::ops::Register_CALL(), 1);
271   resolver_->AddCustom(
272       "validation/decode_jpeg",
273       ::tflite::acceleration::decode_jpeg_kernel::Register_DECODE_JPEG(), 1);
274 
275   tflite::InterpreterBuilder builder(*model_loader_->GetModel(), *resolver_);
276   // Add delegate if not running on CPU.
277   if (delegate_ != nullptr) {
278     builder.AddDelegate(delegate_.get());
279   }
280   TfLiteStatus status = builder(&interpreter_);
281   if (!interpreter_) {
282     // Return delegate error number if not null.
283     *delegate_error_out =
284         delegate_plugin_ ? delegate_plugin_->GetDelegateErrno(delegate_.get())
285                          : 0;
286 
287     TFLITE_LOG_PROD(TFLITE_LOG_ERROR,
288                     "Creating Interpreter failed with error code %d.", status);
289     return kMinibenchmarkInterpreterBuilderFailed;
290   }
291 
292   // Check if the model is actually going to execute on the delegate.
293   // For now just give a warning, with the exception of NNAPI SL mini benchmark.
294   // Can consider changing to error in other contexts.
295   // The logic is copy/pasted from benchmark_tflite_model.cc
296   // TODO(b/232085640): Replace this logic with Subgraph::IsFullyDelegated()
297   // after making that function public.
298   absl::flat_hash_set<int> checked_node_ids;
299   int num_delegated_kernels = 0;
300   for (int i = 0; i < interpreter_->execution_plan().size(); ++i) {
301     int node_id = interpreter_->execution_plan()[i];
302     if (checked_node_ids.find(node_id) != checked_node_ids.end()) {
303       continue;
304     }
305     const TfLiteNode& node =
306         interpreter_->node_and_registration(node_id)->first;
307     if (node.delegate != nullptr) {
308       num_delegated_kernels++;
309       checked_node_ids.insert(node_id);
310     }
311   }
312   *delegated_kernels_out = num_delegated_kernels;
313   bool fully_delegated = (num_delegated_kernels == 1 &&
314                           interpreter_->execution_plan().size() == 1);
315   if (!fully_delegated) {
316     TFLITE_LOG_PROD(TFLITE_LOG_WARNING,
317                     "The model will be %s executed by the delegate.",
318                     num_delegated_kernels > 0 ? "partially" : "not");
319   }
320 
321   return kMinibenchmarkSuccess;
322 }
323 
RunValidation(Results * results_out)324 MinibenchmarkStatus Validator::RunValidation(Results* results_out) {
325   if (!results_out) {
326     return kMinibenchmarkPreconditionNotMet;
327   }
328   if (!model_loader_) {
329     return kMinibenchmarkModelReadFailed;
330   }
331 
332 #define MB_RETURN_IF_ERROR(s)                 \
333   {                                           \
334     MinibenchmarkStatus c = (s);              \
335     if (c != kMinibenchmarkSuccess) return c; \
336   }
337 
338   MB_RETURN_IF_ERROR(model_loader_->Init());
339   // The lifetime of the delegate must be at least as long as the lifetime of
340   // any Interpreter.
341   int64_t delegate_load_start_time_us = ElapsedTimeMicros();
342   MB_RETURN_IF_ERROR(LoadDelegate());
343   MB_RETURN_IF_ERROR(CreateInterpreter(&results_out->delegate_error,
344                                        &results_out->delegated_kernels));
345   int64_t delegate_load_end_time_us = ElapsedTimeMicros();
346   MB_RETURN_IF_ERROR(CheckGoldenOutput(results_out));
347   ValidatorProfiler profiler;
348   main_model_->SetProfiler(&profiler, 0);
349   TfLiteStatus status = validation_entrypoint_->Invoke();
350   main_model_->SetProfiler(nullptr, 0);
351   if (status != kTfLiteOk) {
352     return kMinibenchmarkInvokeFailed;
353   }
354 
355   // Create results_out.
356   int model_output_size = main_model_->outputs().size();
357   // Model output.
358   for (int i = 0; i < model_output_size; i++) {
359     AddTensorDataToMap(
360         validation_entrypoint_->tensor(validation_entrypoint_->outputs()[i]),
361         results_out->actual_inference_output);
362   }
363   // Accuracy metrics.
364   const std::string kMetricPrefix = "metrics/";
365   const std::string kOk("ok");
366   for (int i = model_output_size; i < validation_entrypoint_->outputs().size();
367        i++) {
368     TfLiteTensor* tensor =
369         validation_entrypoint_->tensor(validation_entrypoint_->outputs()[i]);
370     std::string name = tensor->name;
371     if (name.find(kMetricPrefix) != 0) {  // NOLINT
372       continue;
373     }
374     name = name.substr(kMetricPrefix.size());
375     if (kOk == name) {
376       results_out->ok = *(tensor->data.b);
377     } else {
378       std::vector<float> values;
379       int count = 1;
380       for (int j = 0; j < tensor->dims->size; j++) {
381         count *= tensor->dims->data[j];
382       }
383       values.reserve(count);
384       for (int j = 0; j < count; j++) {
385         values.push_back(tensor->data.f[j]);
386         TFLITE_LOG_PROD(TFLITE_LOG_INFO, "  %s %.4f", name.c_str(),
387                         tensor->data.f[j]);
388       }
389       results_out->metrics[name] = values;
390     }
391   }
392   TFLITE_LOG_PROD(TFLITE_LOG_INFO, "  accuracy: %s",
393                   results_out->ok ? "ok" : "not ok");
394   // Performance metrics.
395   results_out->delegate_prep_time_us =
396       (delegate_load_end_time_us == -1 || delegate_load_start_time_us == -1)
397           ? -1
398           : delegate_load_end_time_us - delegate_load_start_time_us;
399   TFLITE_LOG_PROD(TFLITE_LOG_INFO, "  Delegate preparation took %d us",
400                   static_cast<int>(results_out->delegate_prep_time_us));
401   for (const auto& e : profiler.events()) {
402     if (e.tag == "Invoke" && e.start_time_us != -1 && e.end_time_us != -1) {
403       results_out->execution_time_us.push_back(e.end_time_us - e.start_time_us);
404       TFLITE_LOG_PROD(TFLITE_LOG_INFO, "  Inference took %d us",
405                       static_cast<int>(e.end_time_us - e.start_time_us));
406     }
407   }
408 #undef MB_RETURN_IF_ERROR
409   return kMinibenchmarkSuccess;
410 }
411 
BootTimeMicros()412 int64_t Validator::BootTimeMicros() { return ElapsedTimeMicros(); }
WallTimeMicros()413 int64_t Validator::WallTimeMicros() {
414   struct timespec ts;
415 #ifndef _WIN32
416   int err = clock_gettime(CLOCK_REALTIME, &ts);
417 #else   // _WIN32
418   int err = 1;
419 #endif  // !_WIN32
420   if (err) {
421     return -1;
422   }
423   return ts.tv_sec * kMicrosInSecond + ts.tv_nsec / kNanosInMicro;
424 }
425 
426 }  // namespace acceleration
427 }  // namespace tflite
428