1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/interpreter.h"
17
18 #include <stddef.h>
19 #include <stdlib.h>
20
21 #include <cstdint>
22 #include <functional>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27
28 #include "ruy/denormal.h" // from @ruy
29 #include "tensorflow/lite/allocation.h"
30 #include "tensorflow/lite/core/api/error_reporter.h"
31 #include "tensorflow/lite/core/api/profiler.h"
32 #include "tensorflow/lite/external_cpu_backend_context.h"
33 #include "tensorflow/lite/minimal_logging.h"
34 #include "tensorflow/lite/stderr_reporter.h"
35 #include "tensorflow/lite/util.h"
36
37 // TODO(b/139446230): Move to portable platform header.
38 #if defined(__ANDROID__)
39 #define TFLITE_IS_MOBILE_PLATFORM
40 #endif // defined(__ANDROID__)
41
42 #if defined(__APPLE__)
43 #include "TargetConditionals.h"
44 #if TARGET_IPHONE_SIMULATOR
45 #define TFLITE_IS_MOBILE_PLATFORM
46 #elif TARGET_OS_IPHONE
47 #define TFLITE_IS_MOBILE_PLATFORM
48 #endif
49 #endif // defined(__APPLE__)
50
51 // TODO(b/132087118): move static_assert to c_api_internal when compiled with
52 // C++.
53 static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
54 "Float 16 type must be 16 bits.");
55
56 namespace tflite {
57
58 namespace {
59
60 // Gets the current TfLiteQuantization from the legacy TfLiteQuantizationParams.
GetQuantizationFromLegacy(const TfLiteQuantizationParams & legacy_quantization)61 TfLiteQuantization GetQuantizationFromLegacy(
62 const TfLiteQuantizationParams& legacy_quantization) {
63 TfLiteQuantization quantization;
64 quantization.type = kTfLiteAffineQuantization;
65 auto* affine_quantization = reinterpret_cast<TfLiteAffineQuantization*>(
66 malloc(sizeof(TfLiteAffineQuantization)));
67 affine_quantization->scale = TfLiteFloatArrayCreate(1);
68 affine_quantization->zero_point = TfLiteIntArrayCreate(1);
69 affine_quantization->scale->data[0] = legacy_quantization.scale;
70 affine_quantization->zero_point->data[0] = legacy_quantization.zero_point;
71 quantization.params = affine_quantization;
72
73 return quantization;
74 }
75
76 // TODO(b/153131797): We have put 'delegate_status' to 0 in the following macro
77 // temporarily because delegate-specific error codes are either not retrievable
78 // at the moment, which we will add later.
79 #define TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(runtime_event, a) \
80 do { \
81 TfLiteStatus status = (a); \
82 runtime_event.set_runtime_status(/*delegate_status=*/0, \
83 static_cast<int64_t>(status)); \
84 TF_LITE_ENSURE_STATUS(status); \
85 } while (0)
86
87 } // namespace
88
Interpreter(ErrorReporter * error_reporter)89 Interpreter::Interpreter(ErrorReporter* error_reporter)
90 : error_reporter_(error_reporter ? error_reporter
91 : DefaultErrorReporter()) {
92 // TODO(b/128420794): Include the TFLite runtime version in the log.
93 // Prod logging is useful for mobile platforms where scraping console logs is
94 // critical for debugging.
95 #if defined(TFLITE_IS_MOBILE_PLATFORM)
96 TFLITE_LOG_PROD_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
97 #else
98 TFLITE_LOG_ONCE(TFLITE_LOG_INFO, "Initialized TensorFlow Lite runtime.");
99 #endif
100
101 // There's always at least 1 subgraph which is the primary subgraph.
102 AddSubgraphs(1);
103 context_ = primary_subgraph().context();
104
105 // Reserve some space for the tensors to avoid excessive resizing.
106 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
107 external_contexts_[i] = nullptr;
108 }
109
110 // This operation is cheap because we allocate the CPU context resources (i.e.
111 // threads) lazily.
112 own_external_cpu_backend_context_.reset(new ExternalCpuBackendContext());
113 external_contexts_[kTfLiteCpuBackendContext] =
114 own_external_cpu_backend_context_.get();
115 }
116
~Interpreter()117 Interpreter::~Interpreter() {
118 // The owned external Cpu Backend Context will go out of scope with this
119 // interpreter. If we have an external backend context that is not
120 // owned, we need to clear the cache for other interpreters that may
121 // use the context.
122 if (external_contexts_[kTfLiteCpuBackendContext] &&
123 (external_contexts_[kTfLiteCpuBackendContext] !=
124 own_external_cpu_backend_context_.get())) {
125 ExternalCpuBackendContext* external_context =
126 static_cast<ExternalCpuBackendContext*>(
127 external_contexts_[kTfLiteCpuBackendContext]);
128 TfLiteInternalBackendContext* internal_context =
129 external_context->internal_backend_context();
130 if (internal_context) {
131 // This call may have negative performance impacts on the next inference
132 // for any interpreter using this context. The cache will be refreshed
133 // by the next inference.
134 internal_context->ClearCaches();
135 }
136 }
137 }
138
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)139 void Interpreter::SetExternalContext(TfLiteExternalContextType type,
140 TfLiteExternalContext* ctx) {
141 if (ctx == own_external_cpu_backend_context_.get()) {
142 error_reporter_->Report(
143 "WARNING: The passed external context is identical to the internally "
144 "owned one.");
145 return;
146 }
147
148 // We have an internally owned external context of kTfLiteCpuBackendContext.
149 // If it's overwritten here, we will release the resource of the internally
150 // owned external context.
151 // Note: the 'max thread count' info associated with the overwritten context
152 // will be lost here, and such info is now determined by the new context, thus
153 // affecting how much parallelism a TFLite op would have.
154 if (kTfLiteCpuBackendContext == type &&
155 external_contexts_[kTfLiteCpuBackendContext] ==
156 own_external_cpu_backend_context_.get()) {
157 own_external_cpu_backend_context_.reset();
158 }
159
160 // This essentially changes the "external_contexts_[type]".
161 primary_subgraph().SetExternalContext(type, ctx);
162 }
163
SetInputs(std::vector<int> inputs)164 TfLiteStatus Interpreter::SetInputs(std::vector<int> inputs) {
165 return primary_subgraph().SetInputs(std::move(inputs));
166 }
167
SetOutputs(std::vector<int> outputs)168 TfLiteStatus Interpreter::SetOutputs(std::vector<int> outputs) {
169 return primary_subgraph().SetOutputs(std::move(outputs));
170 }
171
SetVariables(std::vector<int> variables)172 TfLiteStatus Interpreter::SetVariables(std::vector<int> variables) {
173 return primary_subgraph().SetVariables(std::move(variables));
174 }
175
AllocateTensors()176 TfLiteStatus Interpreter::AllocateTensors() {
177 // Apply the default delegate that TFLite will enable at this point to allow
178 // other user-level delegates to be applied first. Only returns error when
179 // the status is kTfLiteError. For other statuses, it will fall back to the
180 // default implementation.
181 if (ApplyLazyDelegateProviders() == kTfLiteError) return kTfLiteError;
182
183 return primary_subgraph().AllocateTensors();
184 }
185
AddSubgraphs(int subgraphs_to_add,int * first_new_subgraph_index)186 void Interpreter::AddSubgraphs(int subgraphs_to_add,
187 int* first_new_subgraph_index) {
188 const size_t base_index = subgraphs_.size();
189 if (first_new_subgraph_index) *first_new_subgraph_index = base_index;
190
191 subgraphs_.reserve(base_index + subgraphs_to_add);
192 for (int i = 0; i < subgraphs_to_add; ++i) {
193 Subgraph* subgraph =
194 new Subgraph(error_reporter_, external_contexts_, &subgraphs_,
195 &resources_, &resource_ids_, &initialization_status_map_);
196 subgraphs_.emplace_back(subgraph);
197 }
198 }
199
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)200 TfLiteStatus Interpreter::AddNodeWithParameters(
201 const std::vector<int>& inputs, const std::vector<int>& outputs,
202 const char* init_data, size_t init_data_size, void* builtin_data,
203 const TfLiteRegistration* registration, int* node_index) {
204 return primary_subgraph().AddNodeWithParameters(
205 inputs, outputs, {}, init_data, init_data_size, builtin_data,
206 registration, node_index);
207 }
208
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)209 TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
210 const std::vector<int>& dims) {
211 return primary_subgraph().ResizeInputTensor(tensor_index, dims);
212 }
213
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)214 TfLiteStatus Interpreter::ResizeInputTensorStrict(
215 int tensor_index, const std::vector<int>& dims) {
216 return primary_subgraph().ResizeInputTensorStrict(tensor_index, dims);
217 }
218
Invoke()219 TfLiteStatus Interpreter::Invoke() {
220 ScopedRuntimeInstrumentationProfile scoped_runtime_event(installed_profiler_,
221 "invoke");
222
223 // Denormal floating point numbers could cause significant slowdown on
224 // platforms like x86, therefore, we suppress denormals here to prevent this
225 // from happening.
226 ruy::ScopedSuppressDenormals suppress_denormals;
227
228 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
229 scoped_runtime_event, primary_subgraph().Invoke());
230
231 if (!allow_buffer_handle_output_) {
232 for (int tensor_index : outputs()) {
233 TF_LITE_ENSURE_STATUS_WITH_SCOPED_INSTRUMENTATION(
234 scoped_runtime_event,
235 primary_subgraph().EnsureTensorDataIsReadable(tensor_index));
236 }
237 }
238
239 return kTfLiteOk;
240 }
241
AddTensors(int tensors_to_add,int * first_new_tensor_index)242 TfLiteStatus Interpreter::AddTensors(int tensors_to_add,
243 int* first_new_tensor_index) {
244 return primary_subgraph().AddTensors(tensors_to_add, first_new_tensor_index);
245 }
246
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation)247 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
248 int tensor_index, TfLiteType type, const char* name,
249 const std::vector<int>& dims, TfLiteQuantization quantization,
250 const char* buffer, size_t bytes, const Allocation* allocation) {
251 return primary_subgraph().SetTensorParametersReadOnly(
252 tensor_index, type, name, dims.size(), dims.data(), quantization, buffer,
253 bytes, allocation);
254 }
255
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const std::vector<int> & dims,TfLiteQuantization quantization,bool is_variable)256 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
257 int tensor_index, TfLiteType type, const char* name,
258 const std::vector<int>& dims, TfLiteQuantization quantization,
259 bool is_variable) {
260 return primary_subgraph().SetTensorParametersReadWrite(
261 tensor_index, type, name, dims.size(), dims.data(), quantization,
262 is_variable);
263 }
264
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,const char * buffer,size_t bytes,const Allocation * allocation)265 TfLiteStatus Interpreter::SetTensorParametersReadOnly(
266 int tensor_index, TfLiteType type, const char* name, const size_t rank,
267 const int* dims, TfLiteQuantizationParams quantization, const char* buffer,
268 size_t bytes, const Allocation* allocation) {
269 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
270 return primary_subgraph().SetTensorParametersReadOnly(
271 tensor_index, type, name, rank, dims, new_quantization, buffer, bytes,
272 allocation);
273 }
274
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantizationParams quantization,bool is_variable,const size_t rank_dims_signature,const int * dims_signature)275 TfLiteStatus Interpreter::SetTensorParametersReadWrite(
276 int tensor_index, TfLiteType type, const char* name, const size_t rank,
277 const int* dims, TfLiteQuantizationParams quantization, bool is_variable,
278 const size_t rank_dims_signature, const int* dims_signature) {
279 TfLiteQuantization new_quantization = GetQuantizationFromLegacy(quantization);
280 return primary_subgraph().SetTensorParametersReadWrite(
281 tensor_index, type, name, rank, dims, new_quantization, is_variable,
282 rank_dims_signature, dims_signature);
283 }
284
SetExecutionPlan(const std::vector<int> & new_plan)285 TfLiteStatus Interpreter::SetExecutionPlan(const std::vector<int>& new_plan) {
286 return primary_subgraph().SetExecutionPlan(new_plan);
287 }
288
SetNumThreads(int num_threads)289 TfLiteStatus Interpreter::SetNumThreads(int num_threads) {
290 if (num_threads < -1) {
291 context_->ReportError(context_,
292 "num_threads should be >=0 or just -1 to let TFLite "
293 "runtime set the value.");
294 return kTfLiteError;
295 }
296
297 // num_threads == 0 has the same effect as num_threads == 1.
298 num_threads = num_threads == 0 ? 1 : num_threads;
299 for (auto& subgraph : subgraphs_) {
300 subgraph->context()->recommended_num_threads = num_threads;
301 }
302
303 for (int i = 0; i < kTfLiteMaxExternalContexts; ++i) {
304 auto* c = external_contexts_[i];
305 if (c && c->Refresh) {
306 c->Refresh(context_);
307 }
308 }
309 return kTfLiteOk;
310 }
311
ApplyLazyDelegateProviders()312 TfLiteStatus Interpreter::ApplyLazyDelegateProviders() {
313 if (lazy_delegate_providers_.empty()) return kTfLiteOk;
314
315 // We only apply lazy delegate providers once.
316 std::vector<TfLiteDelegatePtr> delegate_providers;
317 delegate_providers.swap(lazy_delegate_providers_);
318
319 TFLITE_LOG(TFLITE_LOG_INFO,
320 "Applying %zu TensorFlow Lite delegate(s) lazily.",
321 delegate_providers.size());
322 // At the momement, XNNPACK delegate is the only one that might be applied
323 // by default, in which case, the execution will fall back to default
324 // implementation if the XNNPACK delegate fails to be applied.
325 for (size_t i = 0; i < delegate_providers.size(); ++i) {
326 auto status = ModifyGraphWithDelegate(std::move(delegate_providers[i]));
327 switch (status) {
328 case kTfLiteOk:
329 TFLITE_LOG(TFLITE_LOG_INFO,
330 "Successfully applied the default TensorFlow Lite "
331 "delegate indexed at %zu.",
332 i);
333 break;
334 case kTfLiteError:
335 TF_LITE_REPORT_ERROR(error_reporter_,
336 "Failed to apply the default TensorFlow Lite "
337 "delegate indexed at %zu.",
338 i);
339 return kTfLiteError;
340 case kTfLiteDelegateError:
341 TF_LITE_REPORT_ERROR(
342 error_reporter_,
343 "Error in applying the default TensorFlow Lite delegate indexed "
344 "at %zu, and all previously applied delegates are reverted.",
345 i);
346 return kTfLiteDelegateError;
347 case kTfLiteApplicationError:
348 TF_LITE_REPORT_ERROR(error_reporter_,
349 "Ignoring failed application of the default "
350 "TensorFlow Lite delegate indexed at %zu.",
351 i);
352 return kTfLiteApplicationError;
353 default:
354 TF_LITE_REPORT_ERROR(error_reporter_,
355 "Unknown status (%d) after applying the default "
356 "TensorFlow Lite delegate indexed at %zu.",
357 status, i);
358 return kTfLiteError;
359 }
360 }
361 return kTfLiteOk;
362 }
363
GetSignatureRunner(const char * signature_key)364 SignatureRunner* Interpreter::GetSignatureRunner(const char* signature_key) {
365 auto iter = signature_runner_map_.find(signature_key);
366 if (iter != signature_runner_map_.end()) {
367 return &(iter->second);
368 }
369
370 // Default delegates are applied once for all subgraphs. Only returns error
371 // when the status is kTfLiteError. For other statuses, it will fall back to
372 // the default implementation.
373 if (ApplyLazyDelegateProviders() == kTfLiteError) {
374 return nullptr;
375 }
376
377 for (const auto& signature : signature_defs_) {
378 if (signature.signature_key == signature_key) {
379 auto status = signature_runner_map_.insert(
380 {signature_key,
381 SignatureRunner(&signature, subgraph(signature.subgraph_index))});
382 return &(status.first->second);
383 }
384 }
385 return nullptr;
386 }
387
SetMetadata(const std::map<std::string,std::string> & metadata)388 TfLiteStatus Interpreter::SetMetadata(
389 const std::map<std::string, std::string>& metadata) {
390 metadata_ = metadata;
391 for (int subgraph_index = 0; subgraph_index < subgraphs_.size();
392 ++subgraph_index) {
393 TF_LITE_ENSURE_STATUS(subgraphs_[subgraph_index]->SetMetadata(&metadata_));
394 }
395 return kTfLiteOk;
396 }
397
398 } // namespace tflite
399