1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/interpreter.h"
17
18 #include <stddef.h>
19 #include <stdlib.h>
20
21 #include <cstdint>
22 #include <functional>
23 #include <memory>
24 #include <utility>
25 #include <vector>
26
27 #include "tensorflow/lite/allocation.h"
28 #include "tensorflow/lite/core/api/error_reporter.h"
29 #include "tensorflow/lite/core/api/profiler.h"
30 #include "tensorflow/lite/core/subgraph.h"
31 #include "tensorflow/lite/external_cpu_backend_context.h"
32 #include "tensorflow/lite/minimal_logging.h"
33 #include "tensorflow/lite/stderr_reporter.h"
34 #include "tensorflow/lite/util.h"
35
36 // TODO(b/139446230): Move to portable platform header.
37 #if defined(__ANDROID__)
38 #define TFLITE_IS_MOBILE_PLATFORM
39 #endif // defined(__ANDROID__)
40
41 #if defined(__APPLE__)
42 #include "TargetConditionals.h"
43 #if TARGET_IPHONE_SIMULATOR
44 #define TFLITE_IS_MOBILE_PLATFORM
45 #elif TARGET_OS_IPHONE
46 #define TFLITE_IS_MOBILE_PLATFORM
47 #endif
48 #endif // defined(__APPLE__)
49
50 // TODO(b/132087118): move static_assert to c_api_internal when compiled with
51 // C++.
52 static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
53 "Float 16 type must be 16 bits.");
54
55 namespace tflite {
56
57 namespace {
58
59 // Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams.
GetQuantizationFromLegacy(const TfLiteQuantizationParams & legacy_quantization)60 TfLiteQuantization GetQuantizationFromLegacy(
61 const TfLiteQuantizationParams& legacy_quantization) {
62 TfLiteQuantization quantization;
63 quantization.type = kTfLiteAffineQuantization;
64 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
65 malloc(sizeof(TfLiteAffineQuantization)));
66 affine_quantization->scale = TfLiteFloatArrayCreate(1);
67 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
68 affine_quantization->scale->data[0] = legacy_quantization.scale;
69 affine_quantization->zero_point->data[0] = legacy_quantization.zero_point;
70 quantization.params = affine_quantization;
71
72 return quantization;
73 }
74
75 // TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
76 // temporarily because delegate-specific error codes are either not retrievable
77 // at the moment, which we will add later.
78 #define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
79 do { \
80 TfLiteStatus status = (a); \
81 runtime_event.set_runtime_status(/*delegate_status=*/0, \
82 static_cast<int64_t>(status)); \
83 TF_LITE_ENSURE_STATUS(status); \
84 } while (0)
85
86 } // namespace
87
Interpreter(ErrorReporter * error_reporter)88 Interpreter::Interpreter(ErrorReporter* error_reporter)
89 : error_reporter_(error_reporter ? error_reporter
90 : DefaultErrorReporter()) {
91 // TODO(b/128420794): Include the TFLite runtime version in the log.
92 // Prod logging is useful for mobile platforms where scraping console logs is
93 // critical for debugging.
94 #if defined(TFLITE_IS_MOBILE_PLATFORM)
95 TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
96 #else
97 TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
98 #endif
99
100 // There's always at least 1 subgraph which is the primary subgraph.
101 AddSubgraphs(1);
102 context_ = primary_subgraph().context();
103
104 // Reserve some space for the tensors to avoid excessive resizing.
105 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
106 external_contexts_[i] = nullptr;
107 }
108
109 // This operation is cheap because we allocate the CPU context resources (i.e.
110 // threads) lazily.
111 own_external_cpu_backend_context_.reset(new ExternalCpuBackendContext());
112 external_contexts_[kTfLiteCpuBackendContext] =
113 own_external_cpu_backend_context_.get();
114 }
115
~Interpreter()116 Interpreter::~Interpreter() {
117 // The owned external Cpu Backend Context will go out of scope with this
118 // interpreter. If we have an external backend context that is not
119 // owned, we need to clear the cache for other interpreters that may
120 // use the context.
121 if (external_contexts_[kTfLiteCpuBackendContext] &&
122 (external_contexts_[kTfLiteCpuBackendContext] !=
123 own_external_cpu_backend_context_.get())) {
124 ExternalCpuBackendContext* external_context =
125 static_cast<ExternalCpuBackendContext*>(
126 external_contexts_[kTfLiteCpuBackendContext]);
127 TfLiteInternalBackendContext* internal_context =
128 external_context->internal_backend_context();
129 if (internal_context) {
130 // This call may have negative performance impacts on the next inference
131 // for any interpreter using this context. The cache will be refreshed
132 // by the next inference.
133 internal_context->ClearCaches();
134 }
135 }
136 }
137
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)138 void Interpreter::SetExternalContext(TfLiteExternalContextType type,
139 TfLiteExternalContext* ctx) {
140 if (ctx == own_external_cpu_backend_context_.get()) {
141 error_reporter_->Report(
142 "WARNING: The passed external context is identical to the internally "
143 "owned one.");
144 return;
145 }
146
147 // We have an internally owned external context of kTfLiteCpuBackendContext.
148 // If it's overwritten here, we will release the resource of the internally
149 // owned external context.
150 // Note: the 'max thread count' info associated with the overwritten context
151 // will be lost here, and such info is now determined by the new context, thus
152 // affecting how much parallelism a TFLite op would have.
153 if (kTfLiteCpuBackendContext == type &&
154 external_contexts_[kTfLiteCpuBackendContext] ==
155 own_external_cpu_backend_context_.get()) {
156 own_external_cpu_backend_context_.reset();
157 }
158
159 // This essentially changes the "external_contexts_[type]".
160 primary_subgraph().SetExternalContext(type, ctx);
161 }
162
SetCustomAllocationForTensor(int tensor_index,const TfLiteCustomAllocation & allocation)163 TfLiteStatus Interpreter::SetCustomAllocationForTensor(
164 int tensor_index, const TfLiteCustomAllocation& allocation) {
165 return primary_subgraph().SetCustomAllocationForTensor(tensor_index,
166 allocation);
167 }
168
SetInputs(std::vector<int> inputs)169 TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
170 return primary_subgraph().SetInputs(std::move(inputs));
171 }
172
SetOutputs(std::vector<int> outputs)173 TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) {
174 return primary_subgraph().SetOutputs(std::move(outputs));
175 }
176
SetVariables(std::vector<int> variables)177 TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) {
178 return primary_subgraph().SetVariables(std::move(variables));
179 }
180
AllocateTensors()181 TfLiteStatus Interpreter::AllocateTensors() {
182 // Apply the default delegate that TFLite will enable at this point to allow
183 // other user-level delegates to be applied first.
184 if (!lazy_delegate_providers_.empty()) {
185 // We only apply lazy delegate providers once.
186 std::vector<TfLiteDelegatePtr> delegate_providers;
187 delegate_providers.swap(lazy_delegate_providers_);
188
189 TFLITE_LOG(TFLITE_LOG_INFO,
190 "Applying %zu TensorFlow Lite delegate(s) lazily.",
191 delegate_providers.size());
192 // At the momement, XNNPACK delegate is the only one that might be applied
193 // by default, in which case, the execution will fall back to default
194 // implementation if the XNNPACK delegate fails to be applied. Therefore, we
195 // ignore the return status here and let it fall through the rest of the
196 // code.
197 for (size_t i = 0; i < delegate_providers.size(); ++i) {
198 auto status = ModifyGraphWithDelegate(std::move(delegate_providers[i]));
199 switch (status) {
200 case kTfLiteOk:
201 TFLITE_LOG(TFLITE_LOG_INFO,
202 "Successfully applied the default TensorFlow Lite "
203 "delegate indexed at %zu.",
204 i);
205 break;
206 case kTfLiteError:
207 TF_LITE_REPORT_ERROR(error_reporter_,
208 "Failed to apply the default TensorFlow Lite "
209 "delegate indexed at %zu.",
210 i);
211 return kTfLiteError;
212 case kTfLiteDelegateError:
213 TF_LITE_REPORT_ERROR(
214 error_reporter_,
215 "Error in applying the default TensorFlow Lite delegate indexed "
216 "at %zu, and all previously applied delegates are reverted.",
217 i);
218 break;
219 case kTfLiteApplicationError:
220 TF_LITE_REPORT_ERROR(error_reporter_,
221 "Ignoring failed application of the default "
222 "TensorFlow Lite delegate indexed at %zu.",
223 i);
224 break;
225 default:
226 TF_LITE_REPORT_ERROR(error_reporter_,
227 "Unknown status (%d) after applying the default "
228 "TensorFlow Lite delegate indexed at %zu.",
229 status, i);
230 return kTfLiteError;
231 }
232 }
233 }
234
235 return primary_subgraph().AllocateTensors();
236 }
237
ReserveNodes(int count)238 void Interpreter::ReserveNodes(int count) {
239 primary_subgraph().ReserveNodes(count);
240 }
241
AddSubgraphs(int subgraphs_to_add,int * first_new_subgraph_index)242 void Interpreter::AddSubgraphs(int subgraphs_to_add,
243 int* first_new_subgraph_index) {
244 const size_t base_index = subgraphs_.size();
245 if (first_new_subgraph_index) *first_new_subgraph_index = base_index;
246
247 subgraphs_.reserve(base_index + subgraphs_to_add);
248 for (int i = 0; i < subgraphs_to_add; ++i) {
249 Subgraph* subgraph = new Subgraph(error_reporter_, external_contexts_,
250 &subgraphs_, &resources_);
251 subgraphs_.emplace_back(subgraph);
252 }
253 }
254
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)255 TfLiteStatus Interpreter::AddNodeWithParameters(
256 const std::vector<int>& inputs, const std::vector<int>& outputs,
257 const char* init_data, size_t init_data_size, void* builtin_data,
258 const TfLiteRegistration* registration, int* node_index) {
259 return primary_subgraph().AddNodeWithParameters(
260 inputs, outputs, {}, init_data, init_data_size, builtin_data,
261 registration, node_index);
262 }
263
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)264 TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
265 const std::vector<int>& dims) {
266 return primary_subgraph().ResizeInputTensor(tensor_index, dims);
267 }
268
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)269 TfLiteStatus Interpreter::ResizeInputTensorStrict(
270 int tensor_index, const std::vector<int>& dims) {
271 return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims);
272 }
273
ReleaseNonPersistentMemory()274 TfLiteStatus Interpreter::ReleaseNonPersistentMemory() {
275 // TODO(b/138790287): We could do this for all subgraphs whose tensors have
276 // been allocated. However, AllocateTensors() relies on Control Flow ops to
277 // allocate tensors on 'children' subgraphs. Revisit this if required.
278 return primary_subgraph().ReleaseNonPersistentMemory();
279 }
280
Invoke()281 TfLiteStatus Interpreter::Invoke() {
282 ScopedRuntimeInstrumentationProfile scoped_runtime_event(installed_profiler_,
283 "invoke");
284 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
285 scoped_runtime_event, primary_subgraph().Invoke());
286
287 if (!allow_buffer_handle_output_) {
288 for (int tensor_index : outputs()) {
289 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
290 scoped_runtime_event,
291 primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
292 }
293 }
294
295 return kTfLiteOk;
296 }
297
AddTensors(int tensors_to_add,int * first_new_tensor_index)298 TfLiteStatus Interpreter::AddTensors(int tensors_to_add,
299 int* first_new_tensor_index) {
300 return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index);
301 }
302
ResetVariableTensors()303 TfLiteStatus Interpreter::ResetVariableTensors() {
304 return primary_subgraph().ResetVariableTensors();
305 }
306
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation)307 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
308 int tensor_index, TfLiteType type, const char* name,
309 const std::vector<int>& dims, TfLiteQuantization quantization,
310 const char* buffer, size_t bytes, const Allocation* allocation) {
311 return primary_subgraph().SetTensorParametersReadOnly(
312 tensor_index, type, name, dims.size(), dims.data(), quantization, buffer,
313 bytes, allocation);
314 }
315
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,bool is_variable)316 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
317 int tensor_index, TfLiteType type, const char* name,
318 const std::vector<int>& dims, TfLiteQuantization quantization,
319 bool is_variable) {
320 return primary_subgraph().SetTensorParametersReadWrite(
321 tensor_index, type, name, dims.size(), dims.data(), quantization,
322 is_variable);
323 }
324
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,const char * buffer,size_t bytes,const Allocation * allocation)325 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
326 int tensor_index, TfLiteType type, const char* name, const size_t rank,
327 const int* dims, TfLiteQuantizationParams quantization, const char* buffer,
328 size_t bytes, const Allocation* allocation) {
329 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
330 return primary_subgraph().SetTensorParametersReadOnly(
331 tensor_index, type, name, rank, dims, new_quantization, buffer, bytes,
332 allocation);
333 }
334
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,bool is_variable,const size_t rank_dims_signature,const int * dims_signature)335 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
336 int tensor_index, TfLiteType type, const char* name, const size_t rank,
337 const int* dims, TfLiteQuantizationParams quantization, bool is_variable,
338 const size_t rank_dims_signature, const int* dims_signature) {
339 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
340 return primary_subgraph().SetTensorParametersReadWrite(
341 tensor_index, type, name, rank, dims, new_quantization, is_variable,
342 rank_dims_signature, dims_signature);
343 }
344
SetExecutionPlan(const std::vector<int> & new_plan)345 TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) {
346 return primary_subgraph().SetExecutionPlan(new_plan);
347 }
348
SetNumThreads(int num_threads)349 TfLiteStatus Interpreter::SetNumThreads(int num_threads) {
350 if (num_threads < -1) {
351 context_->ReportError(context_,
352 "num_threads should be >=0 or just -1 to let TFLite "
353 "runtime set the value.");
354 return kTfLiteError;
355 }
356
357 for (auto& subgraph : subgraphs_) {
358 subgraph->context()->recommended_num_threads = num_threads;
359 }
360
361 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
362 auto* c = external_contexts_[i];
363 if (c && c->Refresh) {
364 c->Refresh(context_);
365 }
366 }
367 return kTfLiteOk;
368 }
369
SetAllowFp16PrecisionForFp32(bool allow)370 void Interpreter::SetAllowFp16PrecisionForFp32(bool allow) {
371 for (auto& subgraph : subgraphs_) {
372 subgraph->context()->allow_fp32_relax_to_fp16 = allow;
373 }
374 }
375
376 // TODO(b/121264966): Subgraphs added after cancellation is set will not get the
377 // cancellation function added to their context.
SetCancellationFunction(void * data,bool (* check_cancelled_func)(void *))378 void Interpreter::SetCancellationFunction(void* data,
379 bool (*check_cancelled_func)(void*)) {
380 for (auto& subgraph : subgraphs_) {
381 subgraph->SetCancellationFunction(data, check_cancelled_func);
382 }
383 }
384
IsCancelled()385 bool Interpreter::IsCancelled() { return primary_subgraph().IsCancelled(); }
386
ModifyGraphWithDelegate(TfLiteDelegate * delegate)387 TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
388 TfLiteStatus status = kTfLiteOk;
389 for (auto& subgraph : subgraphs_) {
390 if (IsValidationSubgraph(subgraph->GetName().c_str())) {
391 continue;
392 }
393 status = subgraph->ModifyGraphWithDelegate(delegate);
394 if (status != kTfLiteOk) {
395 break;
396 }
397 }
398 // Delegate-specific errors can be recovered from by restoring Interpreter to
399 // its original state.
400 if (status == kTfLiteDelegateError) {
401 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
402 }
403 return status;
404 }
405
RemoveAllDelegates()406 TfLiteStatus Interpreter::RemoveAllDelegates() {
407 for (auto& subgraph : subgraphs_) {
408 TF_LITE_ENSURE_STATUS(subgraph->RemoveAllDelegates());
409 }
410 return kTfLiteOk;
411 }
412
HasDelegates()413 bool Interpreter::HasDelegates() { return primary_subgraph().HasDelegates(); }
414
SetBufferHandle(int tensor_index,TfLiteBufferHandle buffer_handle,TfLiteDelegate * delegate)415 TfLiteStatus Interpreter::SetBufferHandle(int tensor_index,
416 TfLiteBufferHandle buffer_handle,
417 TfLiteDelegate* delegate) {
418 TF_LITE_ENSURE(context_, tensor_index < tensors_size());
419 TfLiteTensor* tensor = primary_subgraph().tensor(tensor_index);
420
421 TF_LITE_ENSURE(context_,
422 tensor->delegate == nullptr || tensor->delegate == delegate);
423 tensor->delegate = delegate;
424 if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
425 TF_LITE_ENSURE(context_, tensor->delegate->FreeBufferHandle != nullptr);
426 tensor->delegate->FreeBufferHandle(context_, tensor->delegate,
427 &tensor->buffer_handle);
428 }
429 tensor->buffer_handle = buffer_handle;
430
431 return kTfLiteOk;
432 }
433
GetBufferHandle(int tensor_index,TfLiteBufferHandle * buffer_handle,TfLiteDelegate ** delegate)434 TfLiteStatus Interpreter::GetBufferHandle(int tensor_index,
435 TfLiteBufferHandle* buffer_handle,
436 TfLiteDelegate** delegate) {
437 TF_LITE_ENSURE(context_, tensor_index < tensors_size());
438 TfLiteTensor* tensor = primary_subgraph().tensor(tensor_index);
439
440 *delegate = tensor->delegate;
441 *buffer_handle = tensor->buffer_handle;
442
443 return kTfLiteOk;
444 }
445
SetProfiler(Profiler * profiler)446 void Interpreter::SetProfiler(Profiler* profiler) {
447 // Release resources occupied by owned_profiler_ which is replaced by
448 // caller-owned profiler.
449 owned_profiler_.reset(nullptr);
450 installed_profiler_ = profiler;
451 SetSubgraphProfiler();
452 }
453
SetProfiler(std::unique_ptr<Profiler> profiler)454 void Interpreter::SetProfiler(std::unique_ptr<Profiler> profiler) {
455 owned_profiler_ = std::move(profiler);
456 installed_profiler_ = owned_profiler_.get();
457 SetSubgraphProfiler();
458 }
459
SetSubgraphProfiler()460 void Interpreter::SetSubgraphProfiler() {
461 for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
462 ++subgraph_index) {
463 subgraphs_[subgraph_index]->SetProfiler(installed_profiler_,
464 subgraph_index);
465 }
466 }
467
GetProfiler()468 Profiler* Interpreter::GetProfiler() {
469 return primary_subgraph().GetProfiler();
470 }
471
472 } // namespace tflite
473