1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/core/subgraph.h"
17
18 #include <stdarg.h>
19 #include <stddef.h>
20
21 #include <algorithm>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <cstring>
25 #include <iterator>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30
31 #include "tensorflow/lite/allocation.h"
32 #include "tensorflow/lite/builtin_ops.h"
33 #include "tensorflow/lite/c/c_api_types.h"
34 #include "tensorflow/lite/c/common.h"
35 #include "tensorflow/lite/c/common_internal.h"
36 #include "tensorflow/lite/context_util.h"
37 #include "tensorflow/lite/core/api/error_reporter.h"
38 #include "tensorflow/lite/core/api/profiler.h"
39 #include "tensorflow/lite/core/api/tensor_utils.h"
40 #include "tensorflow/lite/core/macros.h"
41 #include "tensorflow/lite/experimental/resource/resource_base.h"
42 #include "tensorflow/lite/graph_info.h"
43 #include "tensorflow/lite/memory_planner.h"
44 #include "tensorflow/lite/minimal_logging.h"
45 #include "tensorflow/lite/schema/schema_generated.h"
46 #include "tensorflow/lite/util.h"
47 #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
48 #include "tensorflow/lite/simple_planner.h"
49 #else
50 #include "tensorflow/lite/arena_planner.h"
51 #endif
52 #ifdef TF_LITE_TENSORFLOW_PROFILER
53 #include "tensorflow/lite/tensorflow_profiler_logger.h"
54 #endif // TF_LITE_TENSORFLOW_PROFILER
55
56 namespace tflite {
57
58 namespace {
59
60 struct TfLiteQuantizationDeleter {
operator ()tflite::__anon38136fff0111::TfLiteQuantizationDeleter61 void operator()(TfLiteQuantization* q) {
62 if (q) TfLiteQuantizationFree(q);
63 }
64 };
65
66 using ScopedTfLiteQuantization =
67 std::unique_ptr<TfLiteQuantization, TfLiteQuantizationDeleter>;
68
69 struct TfLiteSparsityDeleter {
operator ()tflite::__anon38136fff0111::TfLiteSparsityDeleter70 void operator()(TfLiteSparsity* s) {
71 if (s) TfLiteSparsityFree(s);
72 }
73 };
74
75 using ScopedTfLiteSparsity =
76 std::unique_ptr<TfLiteSparsity, TfLiteSparsityDeleter>;
77
ReportOpError(TfLiteContext * context,const TfLiteNode & node,const TfLiteRegistration & registration,int node_index,const char * message)78 TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node,
79 const TfLiteRegistration& registration,
80 int node_index, const char* message) {
81 TF_LITE_KERNEL_LOG(context, "Node number %d (%s) %s.", node_index,
82 registration.custom_name
83 ? registration.custom_name
84 : EnumNameBuiltinOperator(static_cast<BuiltinOperator>(
85 registration.builtin_code)),
86 message);
87 return kTfLiteError;
88 }
89
90 // Stub method which returns kTfLiteError when the function is forbidden.
91 // We're registering this function to several different function to save
92 // compiled binary size. Please note the restrictions:
93 // * The type of first parameter have to be `TfLiteContext*`.
94 // * All parameters must be trivially destructible. (E.g. No C++ class)
ForbiddenContextFunction(TfLiteContext * context,...)95 TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) {
96 TF_LITE_KERNEL_LOG(context,
97 "The function is forbidden if not calling in delegate.");
98 return kTfLiteError;
99 }
100
101 // Set the ForbiddenContextFunction to a compatible function pointer.
102 template <typename FunctionType>
SetForbiddenContextFunction(FunctionType * func)103 void SetForbiddenContextFunction(FunctionType* func) {
104 *func = reinterpret_cast<FunctionType>(ForbiddenContextFunction);
105 }
106
107 // Returns true if at least one tensor in the given list is kTfLiteDynamic.
108 template <typename TensorIntArray>
HasDynamicTensorImpl(const TfLiteContext & context,const TensorIntArray & int_array,int * dynamic_tensor_index)109 bool HasDynamicTensorImpl(const TfLiteContext& context,
110 const TensorIntArray& int_array,
111 int* dynamic_tensor_index) {
112 for (int i : int_array) {
113 if (i == kTfLiteOptionalTensor) continue;
114 const TfLiteTensor& tensor = context.tensors[i];
115 if (tensor.allocation_type == kTfLiteDynamic) {
116 if (dynamic_tensor_index) {
117 *dynamic_tensor_index = i;
118 }
119 return true;
120 }
121 }
122 return false;
123 }
124
HasDynamicTensor(const TfLiteContext & context,const TfLiteIntArray * int_array,int * dynamic_tensor_index)125 bool HasDynamicTensor(const TfLiteContext& context,
126 const TfLiteIntArray* int_array,
127 int* dynamic_tensor_index) {
128 return HasDynamicTensorImpl(context, TfLiteIntArrayView{int_array},
129 dynamic_tensor_index);
130 }
131
132 // Gets the legacy TfLiteQuantizationParams from the current TfLiteQuantization.
GetLegacyQuantization(const TfLiteQuantization & quantization)133 TfLiteQuantizationParams GetLegacyQuantization(
134 const TfLiteQuantization& quantization) {
135 TfLiteQuantizationParams legacy_quantization;
136 legacy_quantization.scale = 0;
137 legacy_quantization.zero_point = 0;
138
139 // If the quantization type isn't affine, return the empty
140 // legacy_quantization.
141 if (quantization.type != kTfLiteAffineQuantization) {
142 return legacy_quantization;
143 }
144
145 auto* affine_quantization =
146 static_cast<TfLiteAffineQuantization*>(quantization.params);
147 if (!affine_quantization || !affine_quantization->scale ||
148 !affine_quantization->zero_point ||
149 affine_quantization->scale->size != 1 ||
150 affine_quantization->zero_point->size != 1) {
151 return legacy_quantization;
152 }
153
154 // We know its per-layer quantization now.
155 legacy_quantization.scale = affine_quantization->scale->data[0];
156 legacy_quantization.zero_point = affine_quantization->zero_point->data[0];
157 return legacy_quantization;
158 }
159
160 static constexpr const char kUnknownCustomOpName[] = "UnknownCustomOp";
GetTFLiteOpName(const TfLiteRegistration & op_reg)161 const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) {
162 if (op_reg.builtin_code == tflite::BuiltinOperator_CUSTOM) {
163 const char* const custom_name = op_reg.custom_name;
164 return custom_name ? custom_name : kUnknownCustomOpName;
165 }
166 if (op_reg.builtin_code == tflite::BuiltinOperator_DELEGATE &&
167 op_reg.custom_name) {
168 return op_reg.custom_name;
169 }
170 return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code];
171 }
172
173 // Verifies custom allocation for tensor, if applicable.
VerifyCustomAllocationForTensor(TfLiteContext * context,const std::map<int,TfLiteCustomAllocation> & tensor_idx_to_alloc,const int tensor_idx)174 TfLiteStatus VerifyCustomAllocationForTensor(
175 TfLiteContext* context,
176 const std::map<int, TfLiteCustomAllocation>& tensor_idx_to_alloc,
177 const int tensor_idx) {
178 auto& tensor = context->tensors[tensor_idx];
179 if (tensor.allocation_type != kTfLiteCustom) return kTfLiteOk;
180 const auto idx_and_alloc = tensor_idx_to_alloc.find(tensor_idx);
181 TF_LITE_ENSURE(context, idx_and_alloc != tensor_idx_to_alloc.end());
182 if (idx_and_alloc->second.bytes < tensor.bytes) {
183 TF_LITE_KERNEL_LOG(context,
184 "Custom allocation is too small for tensor idx: %d",
185 tensor_idx);
186 return kTfLiteError;
187 }
188 return kTfLiteOk;
189 }
190
191 } // namespace
192
193 // A trivial implementation of GraphInfo around the Interpreter.
194 // NOTE: this interpreter info represents the subset of the
195 // graph that is executed according to execution plan. Thus,
196 // the indices are execution plan indices rather than raw node
197 // indices.
198 class InterpreterInfo : public GraphInfo {
199 public:
InterpreterInfo(Subgraph * subgraph)200 explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {}
201
num_tensors() const202 size_t num_tensors() const override { return subgraph_->tensors_size(); }
tensor(size_t index)203 TfLiteTensor* tensor(size_t index) override {
204 return subgraph_->tensor(index);
205 }
num_execution_nodes() const206 size_t num_execution_nodes() const override {
207 return subgraph_->execution_plan().size();
208 }
num_total_nodes() const209 size_t num_total_nodes() const override { return subgraph_->nodes_size(); }
node(size_t index) const210 const TfLiteNode& node(size_t index) const override {
211 int node_index = subgraph_->execution_plan()[index];
212 return subgraph_->nodes_and_registration()[node_index].first;
213 }
node_index(size_t index) const214 size_t node_index(size_t index) const override {
215 return subgraph_->execution_plan()[index];
216 }
inputs() const217 const std::vector<int>& inputs() const override {
218 return subgraph_->inputs();
219 }
outputs() const220 const std::vector<int>& outputs() const override {
221 return subgraph_->outputs();
222 }
variables() const223 const std::vector<int>& variables() const override {
224 return subgraph_->variables();
225 }
226
227 public:
228 Subgraph* subgraph_;
229 };
230
Subgraph(ErrorReporter * error_reporter,TfLiteExternalContext ** external_contexts,std::vector<std::unique_ptr<Subgraph>> * subgraphs,resource::ResourceMap * resources,resource::ResourceIDMap * resource_ids,resource::InitializationStatusMap * initialization_status_map,int subgraph_index)231 Subgraph::Subgraph(ErrorReporter* error_reporter,
232 TfLiteExternalContext** external_contexts,
233 std::vector<std::unique_ptr<Subgraph>>* subgraphs,
234 resource::ResourceMap* resources,
235 resource::ResourceIDMap* resource_ids,
236 resource::InitializationStatusMap* initialization_status_map,
237 int subgraph_index)
238 : external_contexts_(external_contexts),
239 error_reporter_(error_reporter),
240 next_execution_plan_index_to_prepare_(0),
241 next_execution_plan_index_to_plan_allocation_(0),
242 subgraphs_(subgraphs),
243 subgraph_index_(subgraph_index),
244 resources_(resources),
245 resource_ids_(resource_ids),
246 initialization_status_map_(initialization_status_map),
247 options_(nullptr) {
248 context_.impl_ = static_cast<void*>(this);
249 context_.ResizeTensor = ResizeTensor;
250 context_.ReportError = ReportErrorC;
251 context_.AddTensors = AddTensors;
252 context_.tensors = nullptr;
253 context_.tensors_size = 0;
254 context_.allow_fp32_relax_to_fp16 = false;
255 context_.recommended_num_threads = -1;
256 context_.GetExternalContext = GetExternalContext;
257 context_.SetExternalContext = SetExternalContext;
258 context_.profiler = nullptr;
259 context_.GetTensor = nullptr;
260 context_.GetEvalTensor = nullptr;
261 context_.GetModelMetadata = GetModelMetadata;
262
263 // Reserve some space for the tensors to avoid excessive resizing.
264 tensors_.reserve(kTensorsReservedCapacity);
265 nodes_and_registration_.reserve(kTensorsReservedCapacity);
266 // Invalid to call these except from TfLiteDelegate
267 SwitchToKernelContext();
268 }
269
~Subgraph()270 Subgraph::~Subgraph() {
271 for (int node_index = 0; node_index < nodes_and_registration_.size();
272 ++node_index) {
273 CleanupNode(node_index);
274 }
275
276 for (size_t i = 0; i < context_.tensors_size; i++) {
277 TfLiteTensor* tensor = &context_.tensors[i];
278 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
279 tensor->delegate->FreeBufferHandle != nullptr) {
280 tensor->delegate->FreeBufferHandle(&context_, tensor->delegate,
281 &tensor->buffer_handle);
282 }
283 TfLiteTensorFree(tensor);
284 }
285 }
286
CleanupNode(int node_index)287 void Subgraph::CleanupNode(int node_index) {
288 TfLiteNode& node = nodes_and_registration_[node_index].first;
289 const TfLiteRegistration& registration =
290 nodes_and_registration_[node_index].second;
291 TfLiteIntArrayFree(node.inputs);
292 TfLiteIntArrayFree(node.outputs);
293 TfLiteIntArrayFree(node.temporaries);
294 TfLiteIntArrayFree(node.intermediates);
295 if (node.builtin_data) free(node.builtin_data);
296 OpFree(registration, node.user_data);
297 node.builtin_data = nullptr;
298 }
299
ReplaceNodeSubsetsWithDelegateKernels(TfLiteContext * context,TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)300 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
301 TfLiteContext* context, TfLiteRegistration registration,
302 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
303 return static_cast<Subgraph*>(context->impl_)
304 ->ReplaceNodeSubsetsWithDelegateKernels(registration, nodes_to_replace,
305 delegate);
306 }
307
308 namespace {
309
310 // Copy a std::vector<int> to an existing TfLiteIntArray.
311 // This is a low-level data manipulation function, and it's caller's
312 // responsibility to ensure TfLiteIntArray has enough size.
CopyVectorToTfLiteIntArray(const std::vector<int> & vec,TfLiteIntArray * arr)313 void CopyVectorToTfLiteIntArray(const std::vector<int>& vec,
314 TfLiteIntArray* arr) {
315 arr->size = vec.size();
316 memcpy(arr->data, vec.data(), sizeof(int) * arr->size);
317 }
318
319 // This function allocates a continuous memory space that contains a
320 // TfLiteDelegateParams followed by a several TfLiteIntArray.
321 // When calling `free` at TfLiteDelegateParams*, all the allocated space
322 // will be freed together.
323 //
324 // +-----------------------------------+
325 // | TfLiteDelegateParams |
326 // | TfLiteDelegate* delegate; |
327 // | TfLiteIntArray* nodes_to_replace; |--\
328 // | TfLiteIntArray* input_tensors; |--+--\
329 // | TfLiteIntArray* output_tensors; |--+--+--\
330 // +-----------------------------------+ | | |
331 // | TfLiteIntArray (variable size) |<-/ | |
332 // +-----------------------------------+ | |
333 // | TfLiteIntArray (variable size) |<----/ |
334 // +-----------------------------------+ |
335 // | TfLiteIntArray (variable size) |<-------/
336 // +-----------------------------------+
CreateDelegateParams(TfLiteDelegate * delegate,const NodeSubset & node_subset)337 TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate,
338 const NodeSubset& node_subset) {
339 // Step 1: Calculate the allocation size.
340 int allocation_size = sizeof(TfLiteDelegateParams);
341
342 int nodes_to_replace_size =
343 TfLiteIntArrayGetSizeInBytes(node_subset.nodes.size());
344 allocation_size += nodes_to_replace_size;
345
346 int input_tensors_size =
347 TfLiteIntArrayGetSizeInBytes(node_subset.input_tensors.size());
348 allocation_size += input_tensors_size;
349
350 int output_tensors_size =
351 TfLiteIntArrayGetSizeInBytes(node_subset.output_tensors.size());
352 allocation_size += output_tensors_size;
353
354 // Step 2: Allocate the memory.
355 // Use `char*` for conveniently step through the allocated space by bytes.
356 char* allocation = static_cast<char*>(malloc(allocation_size));
357
358 // Step 3: Fill all data structures.
359 TfLiteDelegateParams* params =
360 reinterpret_cast<TfLiteDelegateParams*>(allocation);
361 params->delegate = delegate;
362 allocation += sizeof(TfLiteDelegateParams);
363
364 params->nodes_to_replace = reinterpret_cast<TfLiteIntArray*>(allocation);
365 CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
366 allocation += nodes_to_replace_size;
367
368 params->input_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
369 CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
370 allocation += input_tensors_size;
371
372 params->output_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
373 CopyVectorToTfLiteIntArray(node_subset.output_tensors,
374 params->output_tensors);
375 allocation += output_tensors_size;
376
377 return params;
378 }
379
380 // Assumes that params is not nullptr.
PopulatePreviewDelegateParams(const NodeSubset & node_subset,TfLiteDelegateParams * params)381 void PopulatePreviewDelegateParams(const NodeSubset& node_subset,
382 TfLiteDelegateParams* params) {
383 // Since these params are used for previewing partitioning, params->delegate
384 // is not required.
385 params->delegate = nullptr;
386
387 params->nodes_to_replace = TfLiteIntArrayCreate(node_subset.nodes.size());
388 CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
389
390 params->input_tensors =
391 TfLiteIntArrayCreate(node_subset.input_tensors.size());
392 CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
393
394 params->output_tensors =
395 TfLiteIntArrayCreate(node_subset.output_tensors.size());
396 CopyVectorToTfLiteIntArray(node_subset.output_tensors,
397 params->output_tensors);
398 }
399
400 } // namespace
401
ReplaceNodeSubsetsWithDelegateKernels(TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)402 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
403 TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace,
404 TfLiteDelegate* delegate) {
405 // Ignore empty node replacement sets.
406 if (!nodes_to_replace->size) {
407 return kTfLiteOk;
408 }
409
410 // Annotate the registration as DELEGATE op.
411 registration.builtin_code = BuiltinOperator_DELEGATE;
412
413 // Analyze the graph to find all independent node_subsets that are either
414 // fully not-this-delegate or this-delegate computation.
415 InterpreterInfo info(this);
416 std::vector<NodeSubset> node_subsets;
417 PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
418 &node_subsets);
419
420 // On Android the log message below is used for diagnosing delegation success
421 // also in production builds. Use VERBOSE here so that the logging is turned
422 // off in production builds on other platforms.
423 TFLITE_LOG_PROD(
424 tflite::TFLITE_LOG_VERBOSE,
425 "Replacing %d node(s) with delegate (%s) node, yielding %zu partitions.",
426 nodes_to_replace->size,
427 registration.custom_name ? registration.custom_name : "unknown",
428 node_subsets.size());
429
430 execution_plan_.clear();
431
432 for (auto& node_subset : node_subsets) {
433 // Subsets claimed by the delegate should have a "macro" op created, the
434 // other node_subsets (kTfNonPartition) just have their nodes added back to
435 // the execution plan.
436 switch (node_subset.type) {
437 case NodeSubset::kTfNonPartition:
438 for (auto it = node_subset.nodes.begin(); it != node_subset.nodes.end();
439 ++it) {
440 execution_plan_.push_back(*it);
441 }
442 break;
443 case NodeSubset::kTfPartition: {
444 int node_index;
445
446 TfLiteDelegateParams* params =
447 CreateDelegateParams(delegate, node_subset);
448 TF_LITE_ENSURE_STATUS(AddNodeWithParameters(
449 node_subset.input_tensors, node_subset.output_tensors, {}, nullptr,
450 0, params, ®istration, &node_index));
451
452 // Initialize the output tensors's delegate-related fields.
453 for (int tensor_index : node_subset.output_tensors) {
454 TfLiteTensor* tensor = &tensors_[tensor_index];
455 TF_LITE_ENSURE(&context_, tensor->delegate == nullptr ||
456 tensor->delegate == delegate);
457 tensor->delegate = delegate;
458 }
459
460 // Associate the node with the delegate.
461 TfLiteNode* node = &nodes_and_registration_[node_index].first;
462 node->delegate = delegate;
463 } break;
464 case NodeSubset::kTfUnexplored:
465 return kTfLiteError;
466 break;
467 }
468 }
469 return kTfLiteOk;
470 }
471
GetExternalContext(TfLiteExternalContextType type)472 TfLiteExternalContext* Subgraph::GetExternalContext(
473 TfLiteExternalContextType type) {
474 if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
475 return external_contexts_[type];
476 }
477 return nullptr;
478 }
479
GetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type)480 TfLiteExternalContext* Subgraph::GetExternalContext(
481 struct TfLiteContext* context, TfLiteExternalContextType type) {
482 return static_cast<Subgraph*>(context->impl_)->GetExternalContext(type);
483 }
484
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)485 void Subgraph::SetExternalContext(TfLiteExternalContextType type,
486 TfLiteExternalContext* ctx) {
487 if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
488 external_contexts_[type] = ctx;
489 }
490 }
491
SetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type,TfLiteExternalContext * ctx)492 void Subgraph::SetExternalContext(struct TfLiteContext* context,
493 TfLiteExternalContextType type,
494 TfLiteExternalContext* ctx) {
495 return static_cast<Subgraph*>(context->impl_)->SetExternalContext(type, ctx);
496 }
497
498 // Gets an TfLiteIntArray* representing the execution plan. The interpreter owns
499 // this memory and it is only guaranteed to exist during the invocation of the
500 // delegate prepare.
GetExecutionPlan(TfLiteIntArray ** execution_plan)501 TfLiteStatus Subgraph::GetExecutionPlan(TfLiteIntArray** execution_plan) {
502 plan_cache_.reset(TfLiteIntArrayCreate(execution_plan_.size()));
503 *execution_plan = plan_cache_.get();
504 static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]),
505 "TfLiteIntArray and execution_plan do not contain same type.");
506 std::memcpy(plan_cache_->data, execution_plan_.data(),
507 sizeof(plan_cache_->data[0]) * execution_plan_.size());
508 return kTfLiteOk;
509 }
510
511 // WARNING: This is an experimental interface that is subject to change.
512 // Entry point for C node plugin API to get the execution plan
GetExecutionPlan(struct TfLiteContext * context,TfLiteIntArray ** execution_plan)513 TfLiteStatus Subgraph::GetExecutionPlan(struct TfLiteContext* context,
514 TfLiteIntArray** execution_plan) {
515 return static_cast<Subgraph*>(context->impl_)
516 ->GetExecutionPlan(execution_plan);
517 }
518
FreeDelegatePartitioningData()519 void Subgraph::FreeDelegatePartitioningData() {
520 for (auto& params : partitioning_preview_cache_) {
521 TfLiteIntArrayFree(params.nodes_to_replace);
522 TfLiteIntArrayFree(params.input_tensors);
523 TfLiteIntArrayFree(params.output_tensors);
524 }
525 partitioning_preview_cache_.clear();
526 }
527
GetModelMetadata(const char * name,const char ** ptr,size_t * bytes)528 TfLiteStatus Subgraph::GetModelMetadata(const char* name, const char** ptr,
529 size_t* bytes) {
530 TF_LITE_ENSURE(&context_, ptr != nullptr);
531 TF_LITE_ENSURE(&context_, bytes != nullptr);
532 *ptr = nullptr;
533 *bytes = 0;
534 if (!metadata_) return kTfLiteError;
535 const std::string name_str = name;
536 auto itr = metadata_->find(name_str);
537 if (itr != metadata_->end()) {
538 *ptr = itr->second.c_str();
539 *bytes = itr->second.size();
540 return kTfLiteOk;
541 }
542 return kTfLiteError;
543 }
544
GetModelMetadata(const struct TfLiteContext * context,const char * name,const char ** ptr,size_t * bytes)545 TfLiteStatus Subgraph::GetModelMetadata(const struct TfLiteContext* context,
546 const char* name, const char** ptr,
547 size_t* bytes) {
548 return static_cast<Subgraph*>(context->impl_)
549 ->GetModelMetadata(name, ptr, bytes);
550 }
551
PreviewDelegatePartitioning(const TfLiteIntArray * nodes_to_replace,TfLiteDelegateParams ** partition_params_array,int * num_partitions)552 TfLiteStatus Subgraph::PreviewDelegatePartitioning(
553 const TfLiteIntArray* nodes_to_replace,
554 TfLiteDelegateParams** partition_params_array, int* num_partitions) {
555 // Ensure partitioning cache is empty.
556 FreeDelegatePartitioningData();
557 // Defaults.
558 if (!partition_params_array || !num_partitions) return kTfLiteError;
559 *partition_params_array = nullptr;
560 *num_partitions = 0;
561 if (!nodes_to_replace->size) {
562 return kTfLiteOk;
563 }
564
565 // Partition the execution plan into node subsets.
566 InterpreterInfo info(this);
567 std::vector<NodeSubset> node_subsets;
568 PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
569 &node_subsets);
570
571 // Create one TfLiteDelegateParams per node-subset which would be delegated.
572 for (auto& node_subset : node_subsets) {
573 if (node_subset.type != NodeSubset::kTfPartition) {
574 continue;
575 }
576 partitioning_preview_cache_.emplace_back();
577 PopulatePreviewDelegateParams(node_subset,
578 &partitioning_preview_cache_.back());
579 ++*num_partitions;
580 }
581
582 *partition_params_array = partitioning_preview_cache_.data();
583 return kTfLiteOk;
584 }
585
PreviewDelegatePartitioning(struct TfLiteContext * context,const TfLiteIntArray * nodes_to_replace,TfLiteDelegateParams ** partition_params_array,int * num_partitions)586 TfLiteStatus Subgraph::PreviewDelegatePartitioning(
587 struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
588 TfLiteDelegateParams** partition_params_array, int* num_partitions) {
589 return static_cast<Subgraph*>(context->impl_)
590 ->PreviewDelegatePartitioning(nodes_to_replace, partition_params_array,
591 num_partitions);
592 }
593
SetInputs(std::vector<int> inputs)594 TfLiteStatus Subgraph::SetInputs(std::vector<int> inputs) {
595 TF_LITE_ENSURE_OK(&context_,
596 CheckTensorIndices("inputs", inputs.data(), inputs.size()));
597 inputs_ = std::move(inputs);
598 return kTfLiteOk;
599 }
600
SetOutputs(std::vector<int> outputs)601 TfLiteStatus Subgraph::SetOutputs(std::vector<int> outputs) {
602 TF_LITE_ENSURE_OK(
603 &context_, CheckTensorIndices("outputs", outputs.data(), outputs.size()));
604 outputs_ = std::move(outputs);
605 return kTfLiteOk;
606 }
607
SetVariables(std::vector<int> variables)608 TfLiteStatus Subgraph::SetVariables(std::vector<int> variables) {
609 TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("variables", variables.data(),
610 variables.size()));
611 variables_ = std::move(variables);
612 return kTfLiteOk;
613 }
614
SetMetadata(const std::map<std::string,std::string> * metadata)615 TfLiteStatus Subgraph::SetMetadata(
616 const std::map<std::string, std::string>* metadata) {
617 metadata_ = metadata;
618 return kTfLiteOk;
619 }
620
SetCancellationFunction(void * data,bool (* check_cancelled_func)(void *))621 void Subgraph::SetCancellationFunction(void* data,
622 bool (*check_cancelled_func)(void*)) {
623 cancellation_data_ = data;
624 check_cancelled_func_ = check_cancelled_func;
625 }
626
IsCancelled()627 bool Subgraph::IsCancelled() {
628 return (check_cancelled_func_ != nullptr) &&
629 (*check_cancelled_func_)(cancellation_data_);
630 }
631
ReserveNodes(int count)632 void Subgraph::ReserveNodes(int count) {
633 nodes_and_registration_.reserve(count);
634 }
635
CheckTensorIndices(const char * label,const int * indices,int length)636 TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
637 int length) {
638 // Making sure kTfLiteOptionalTensor is not re-defined to something other than
639 // -1.
640 static_assert(kTfLiteOptionalTensor == -1,
641 "kTfLiteOptionalTensor should be defined -1");
642
643 for (int i = 0; i < length; i++) {
644 int index = indices[i];
645 // Continue if index == kTfLiteOptionalTensor before additional comparisons
646 // below, size_t(-1) is always >= context_tensors_size.
647 if (index == kTfLiteOptionalTensor) {
648 continue;
649 }
650 if (index < 0 || static_cast<size_t>(index) >= context_.tensors_size) {
651 ReportError(
652 "Invalid tensor index %d in %s. The subgraph has %d tensors\n", index,
653 label, context_.tensors_size);
654 consistent_ = false;
655 return kTfLiteError;
656 }
657 }
658 return kTfLiteOk;
659 }
660
661 // We have two arrays and we need to check that elements from one array don't
662 // show up in the other. We could sort both arrays and then iterate with two
663 // pointers from start to finish always increasing the smaller one but since
664 // these arrays are usually short (<25 elements for inputs, usually <3 for
665 // outputs), this might be slower than the naive approach (if arrays have size n
666 // and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is
667 // O(n)). Plus, sorting the input and output arrays might not be something we
668 // want as it destroys ordering of elements.
669 //
670 // If it turns out that this is an issue, we can switch to the other algorithm.
CheckInputAndOutputForOverlap(const int * input_indices,int num_inputs,const int * output_indices,int num_outputs)671 TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices,
672 int num_inputs,
673 const int* output_indices,
674 int num_outputs) {
675 for (int i = 0; i < num_inputs; i++) {
676 for (int j = 0; j < num_outputs; j++) {
677 if (input_indices[i] == output_indices[j]) {
678 ReportError("Tensor %d is both input %d and output %d\n",
679 input_indices[i], i, j);
680 consistent_ = false;
681 return kTfLiteError;
682 }
683 }
684 }
685 return kTfLiteOk;
686 }
687
BytesRequired(TfLiteType type,const int * dims,size_t dims_size,size_t * bytes)688 TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
689 size_t dims_size, size_t* bytes) {
690 TF_LITE_ENSURE(&context_, bytes != nullptr);
691 // When 'dims_size' is 0, we simply assume it's a scalar. Therefore, we start
692 // 'count' as 1.
693 size_t count = 1;
694 for (int k = 0; k < dims_size; k++) {
695 size_t old_count = count;
696 TF_LITE_ENSURE_MSG(
697 &context_,
698 MultiplyAndCheckOverflow(old_count, dims[k], &count) == kTfLiteOk,
699 "BytesRequired number of elements overflowed.\n");
700 }
701 size_t type_size = 0;
702 TF_LITE_ENSURE_OK(&context_, GetSizeOfType(&context_, type, &type_size));
703 TF_LITE_ENSURE_MSG(
704 &context_, MultiplyAndCheckOverflow(type_size, count, bytes) == kTfLiteOk,
705 "BytesRequired number of bytes overflowed.\n");
706 return kTfLiteOk;
707 }
708
AllocateTensors()709 TfLiteStatus Subgraph::AllocateTensors() {
710 if (!consistent_) {
711 ReportError("AllocateTensors() called on inconsistent model.");
712 return kTfLiteError;
713 }
714
715 // Restore delegation state if applicable.
716 TF_LITE_ENSURE_STATUS(RedoAllDelegates());
717
718 // The runtime doesn't need to adjust any allocations if the state is
719 // invokable & no inputs are dynamic (which implies memory plan is unchanged).
720 const bool no_reallocations_necessary =
721 state_ != kStateUninvokable &&
722 !HasDynamicTensorImpl(context_, inputs(), &dynamic_tensor_index_);
723 if (no_reallocations_necessary) {
724 // If non-persistent memory was released, re-allocate it.
725 if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
726 memory_planner_->AcquireNonPersistentMemory();
727 }
728 // Check custom allocations, which may have been modified since last
729 // AllocateTensors() call.
730 if (!custom_allocations_.empty()) {
731 for (const auto& idx_and_alloc : custom_allocations_) {
732 const int idx = idx_and_alloc.first;
733 TfLiteTensor* tensor_at_index = tensor(idx);
734 TF_LITE_ENSURE_EQ(context(), tensor_at_index->allocation_type,
735 kTfLiteCustom);
736 TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor(
737 context(), custom_allocations_, idx));
738 }
739 }
740 return kTfLiteOk;
741 }
742
743 // Profile "AllocateTensors" only when memory planning is needed.
744 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "AllocateTensors");
745
746 next_execution_plan_index_to_prepare_ = 0;
747 next_execution_plan_index_to_plan_allocation_ = 0;
748 next_original_execution_plan_index_to_prepare_ = 0;
749 if (memory_planner_) {
750 TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
751 }
752
753 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
754
755 state_ = kStateInvokable;
756
757 // Reset the variable tensors to zero after (re)allocating the tensors.
758 // Developers shouldn't rely on the side effect of this function to reset
759 // variable tensors. They should call `ResetVariableTensors` directly
760 // instead.
761 ResetVariableTensors();
762
763 // Initialize the mapping between tensor index and the last execution plan
764 // index that uses the tensor.
765 InitializeTensorReleaseMap();
766
767 return kTfLiteOk;
768 }
769
770 // TODO(b/115961645): Support non-zero default values.
ResetVariableTensors()771 TfLiteStatus Subgraph::ResetVariableTensors() {
772 for (auto& tensor : tensors_) {
773 if (!tensor.is_variable) {
774 continue;
775 }
776
777 if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
778 // If variable tensors allocation type is `kTfLiteArenaRwPersistent`, then
779 // they must be allocated after the initial `PrepareOpsAndTensors()` is
780 // called.
781 TF_LITE_ENSURE(&context_, tensor.data.raw != nullptr);
782 tflite::ResetVariableTensor(&tensor);
783 } else {
784 // If variable tensors allocation type is not `kTfLiteArenaRwPersistent`,
785 // then it can only be `kTfLiteCustom` in which case, we do not reset it.
786 TF_LITE_ENSURE_EQ(&context_, tensor.allocation_type, kTfLiteCustom);
787 }
788 }
789 return kTfLiteOk;
790 }
791
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const std::vector<int> & intermediates,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)792 TfLiteStatus Subgraph::AddNodeWithParameters(
793 const std::vector<int>& inputs, const std::vector<int>& outputs,
794 const std::vector<int>& intermediates, const char* init_data,
795 size_t init_data_size, void* builtin_data,
796 const TfLiteRegistration* registration, int* node_index) {
797 std::unique_ptr<void, decltype(free)*> builtin_data_deleter(builtin_data,
798 free);
799 if (state_ == kStateInvokableAndImmutable) {
800 ReportError("AddNodeWithParameters is disallowed when graph is immutable.");
801 return kTfLiteError;
802 }
803 state_ = kStateUninvokable;
804
805 TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("node inputs", inputs.data(),
806 inputs.size()));
807 TF_LITE_ENSURE_OK(
808 &context_,
809 CheckTensorIndices("node outputs", outputs.data(), outputs.size()));
810
811 // For builtin ops, inputs and outputs must not overlap. Custom ops must do
812 // this check by themselves if they don't support overlapping tensors. This
813 // distinction is to allow custom ops to just forward a tensor, reusing it as
814 // both input and output.
815 if (builtin_data != nullptr) {
816 TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap(
817 inputs.data(), inputs.size(),
818 outputs.data(), outputs.size()));
819 }
820
821 int new_node_index = nodes_and_registration_.size();
822 if (node_index) *node_index = new_node_index;
823 nodes_and_registration_.emplace_back();
824 auto& node_and_reg = nodes_and_registration_.back();
825 TfLiteNode& node = node_and_reg.first;
826
827 // NOTE, here we are not using move semantics yet, since our internal
828 // representation isn't std::vector, but in the future we would like to avoid
829 // copies, so we want the interface to take r-value references now.
830 node.inputs = ConvertVectorToTfLiteIntArray(inputs);
831 node.outputs = ConvertVectorToTfLiteIntArray(outputs);
832 node.intermediates = ConvertVectorToTfLiteIntArray(intermediates);
833 node.temporaries = TfLiteIntArrayCreate(0);
834 if (init_data) {
835 node.user_data = OpInit(*registration, init_data, init_data_size);
836 } else {
837 node.user_data = OpInit(
838 *registration, static_cast<const char*>(builtin_data_deleter.get()), 0);
839 }
840
841 node.builtin_data = builtin_data_deleter.release();
842
843 if (registration->builtin_code == BuiltinOperator_CUSTOM) {
844 // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer
845 // `Operator` table is passed in.
846 node.custom_initial_data = init_data;
847 node.custom_initial_data_size = init_data_size;
848 } else {
849 node.custom_initial_data = nullptr;
850 node.custom_initial_data_size = 0;
851 }
852 node.might_have_side_effect = OpMightHaveSideEffect(&node, registration);
853
854 node.delegate = nullptr;
855 // Copying of registration is required to support unresolved custom ops.
856 node_and_reg.second = *registration;
857 execution_plan_.push_back(new_node_index);
858 return kTfLiteOk;
859 }
860
861 namespace {
862 // Returns true if any tensor identified by indexes in 'tensor_indexes' is
863 // of type 'kTfLiteResource'. False otherwise.
AnyTensorOfTypeResource(const std::vector<TfLiteTensor> & tensors,const TfLiteIntArray * tensor_indexes)864 bool AnyTensorOfTypeResource(const std::vector<TfLiteTensor>& tensors,
865 const TfLiteIntArray* tensor_indexes) {
866 for (int i = 0; i < tensor_indexes->size; ++i) {
867 int tensor_index = tensor_indexes->data[i];
868 if (tensor_index >= 0 && tensor_index < tensors.size() &&
869 tensors[tensor_index].type == kTfLiteResource)
870 return true;
871 }
872 return false;
873 }
874
875 } // namespace
876
OpMightHaveSideEffect(const TfLiteNode * node,const TfLiteRegistration * registration) const877 bool Subgraph::OpMightHaveSideEffect(
878 const TfLiteNode* node, const TfLiteRegistration* registration) const {
879 // Check if any of the input tensors are of type resource.
880 if (AnyTensorOfTypeResource(tensors_, node->inputs)) return true;
881 // Check if any of the output tensors are of type resource.
882 if (AnyTensorOfTypeResource(tensors_, node->outputs)) return true;
883 // Consider control flow ops has side effect, some ops in the control flow
884 // subgraph can have side effect.
885 if (registration->builtin_code == kTfLiteBuiltinIf ||
886 registration->builtin_code == kTfLiteBuiltinWhile ||
887 registration->builtin_code == kTfLiteBuiltinCallOnce)
888 return true;
889 return false;
890 }
891
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)892 TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index,
893 const std::vector<int>& dims) {
894 const bool delegates_applied = !pre_delegation_execution_plan_.empty();
895 const bool graph_is_immutable = state_ == kStateInvokableAndImmutable;
896 if (graph_is_immutable && !delegates_applied) {
897 ReportError("ResizeInputTensor is disallowed when graph is immutable.");
898 return kTfLiteError;
899 }
900
901 TF_LITE_ENSURE(&context_,
902 tensor_index < context_.tensors_size && tensor_index >= 0);
903 TfLiteTensor* tensor = &context_.tensors[tensor_index];
904
905 // Short-circuit the state change if the dimensions don't change, avoiding
906 // unnecessary (re)allocations.
907 //
908 // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
909 // the subgraph won't allocate memory for a dynamic tensor when its size
910 // is equal to the original tensor size.
911 if (tensor->data.raw != nullptr &&
912 EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
913 return kTfLiteOk;
914 }
915
916 if (graph_is_immutable) {
917 // Undo delegation if it resulted in the graph being immutable.
918 TF_LITE_ENSURE_STATUS(UndoAllDelegates());
919 }
920 state_ = kStateUninvokable;
921 return ResizeTensorImpl(tensor, ConvertVectorToTfLiteIntArray(dims));
922 }
923
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)924 TfLiteStatus Subgraph::ResizeInputTensorStrict(int tensor_index,
925 const std::vector<int>& dims) {
926 TF_LITE_ENSURE(&context_,
927 tensor_index < context_.tensors_size && tensor_index >= 0);
928 TfLiteTensor* tensor = &context_.tensors[tensor_index];
929
930 // Ensure that only unknown dimensions can be resized.
931 TF_LITE_ENSURE_EQ(&context_, tensor->dims->size, dims.size());
932 for (size_t idx = 0; idx < dims.size(); idx++) {
933 // `dims_signature` is not defined when no unknown dimensions are present.
934 int dim_signature;
935 if (tensor->dims_signature && tensor->dims_signature->size) {
936 dim_signature = tensor->dims_signature->data[idx];
937 } else {
938 dim_signature = tensor->dims->data[idx];
939 }
940
941 if (dim_signature != -1 && dim_signature != dims[idx]) {
942 ReportError(
943 "Attempting to resize dimension %d of tensor %d with value %d to %d. "
944 "ResizeInputTensorStrict only allows mutating unknown dimensions "
945 "identified by -1.",
946 idx, tensor_index, dim_signature, dims[idx]);
947 return kTfLiteError;
948 }
949 }
950
951 return ResizeInputTensor(tensor_index, dims);
952 }
953
ReleaseNonPersistentMemory()954 TfLiteStatus Subgraph::ReleaseNonPersistentMemory() {
955 state_ = kStateUninvokable;
956 if (memory_planner_) {
957 TF_LITE_ENSURE_STATUS(memory_planner_->ReleaseNonPersistentMemory());
958 }
959 return kTfLiteOk;
960 }
961
ReleaseMemory()962 TfLiteStatus Subgraph::ReleaseMemory() {
963 state_ = kStateUninvokable;
964 ReleaseNonPersistentMemory();
965
966 // Free dynamic input tensors.
967 for (const int input_tensor_idx : inputs_) {
968 if (input_tensor_idx == kTfLiteOptionalTensor) continue;
969 TfLiteTensor* input_tensor = tensor(input_tensor_idx);
970 if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic)
971 continue;
972 if (input_tensor->data.raw) {
973 TfLiteTensorDataFree(input_tensor);
974 }
975 }
976 // Free dynamic output tensors.
977 for (const int output_tensor_idx : outputs_) {
978 if (output_tensor_idx == kTfLiteOptionalTensor) continue;
979 TfLiteTensor* output_tensor = tensor(output_tensor_idx);
980 if (!output_tensor || output_tensor->allocation_type != kTfLiteDynamic)
981 continue;
982 if (output_tensor->data.raw) {
983 TfLiteTensorDataFree(output_tensor);
984 }
985 }
986
987 return kTfLiteOk;
988 }
989
990 // Give 'op_reg' a chance to initialize itself using the contents of
991 // 'buffer'. If registration_external is valid, use the 'init' callback from
992 // that.
OpInit(const TfLiteRegistration & op_reg,const char * buffer,size_t length)993 void* Subgraph::OpInit(const TfLiteRegistration& op_reg, const char* buffer,
994 size_t length) {
995 if (op_reg.registration_external && op_reg.registration_external->init) {
996 return op_reg.registration_external->init(
997 reinterpret_cast<TfLiteOpaqueContext*>(&context_), buffer, length);
998 }
999 if (op_reg.init == nullptr) return nullptr;
1000 return op_reg.init(&context_, buffer, length);
1001 }
1002
OpPrepare(const TfLiteRegistration & op_reg,TfLiteNode * node)1003 TfLiteStatus Subgraph::OpPrepare(const TfLiteRegistration& op_reg,
1004 TfLiteNode* node) {
1005 if (op_reg.registration_external && op_reg.registration_external->prepare) {
1006 return op_reg.registration_external->prepare(
1007 reinterpret_cast<TfLiteOpaqueContext*>(&context_),
1008 reinterpret_cast<TfLiteOpaqueNode*>(node));
1009 }
1010 if (op_reg.prepare == nullptr) {
1011 // Check if it's an unresolved custom op.
1012 if (IsUnresolvedCustomOp(op_reg)) {
1013 if (IsFlexOp(op_reg.custom_name)) {
1014 ReportError(
1015 "Select TensorFlow op(s), included in the given model, is(are) not "
1016 "supported by this interpreter. Make sure you apply/link the Flex "
1017 "delegate before inference. For the Android, it can be resolved by "
1018 "adding \"org.tensorflow:tensorflow-lite-select-tf-ops\" "
1019 "dependency. See instructions: "
1020 "https://www.tensorflow.org/lite/guide/ops_select");
1021 } else {
1022 ReportError(
1023 "Encountered unresolved custom op: %s.\nSee instructions: "
1024 "https://www.tensorflow.org/lite/guide/ops_custom ",
1025 op_reg.custom_name ? op_reg.custom_name : "UnknownOp");
1026 }
1027 return kTfLiteUnresolvedOps;
1028 }
1029 // Resolved ops can have a null Prepare function.
1030 return kTfLiteOk;
1031 }
1032 return op_reg.prepare(&context_, node);
1033 }
1034
1035 // Invoke the operator represented by 'node'.
OpInvoke(const TfLiteRegistration & op_reg,TfLiteNode * node)1036 TfLiteStatus Subgraph::OpInvoke(const TfLiteRegistration& op_reg,
1037 TfLiteNode* node) {
1038 if (op_reg.registration_external && op_reg.registration_external->invoke) {
1039 return op_reg.registration_external->invoke(
1040 reinterpret_cast<TfLiteOpaqueContext*>(&context_),
1041 reinterpret_cast<TfLiteOpaqueNode*>(node));
1042 }
1043 if (op_reg.invoke == nullptr) return kTfLiteError;
1044 return op_reg.invoke(&context_, node);
1045 }
1046
1047 // Let 'op_reg' release any memory it might have allocated via 'OpInit'.
1048 // If registration_external is valid, use the 'free' callback from that.
OpFree(const TfLiteRegistration & op_reg,void * buffer)1049 void Subgraph::OpFree(const TfLiteRegistration& op_reg, void* buffer) {
1050 if (op_reg.registration_external && op_reg.registration_external->free &&
1051 buffer) {
1052 return op_reg.registration_external->free(
1053 reinterpret_cast<TfLiteOpaqueContext*>(&context_), buffer);
1054 }
1055 if (op_reg.free == nullptr) return;
1056 if (buffer) {
1057 op_reg.free(&context_, buffer);
1058 }
1059 }
1060
MayAllocateOpOutput(TfLiteNode * node)1061 TfLiteStatus Subgraph::MayAllocateOpOutput(TfLiteNode* node) {
1062 if (ShouldOptimizeMemoryForLargeTensors()) {
1063 for (int i = 0; i < node->outputs->size; ++i) {
1064 int tensor_index = node->outputs->data[i];
1065 TfLiteTensor* tensor = &context_.tensors[tensor_index];
1066 if (tensor->data.raw == nullptr &&
1067 tensor->allocation_type == kTfLiteDynamic) {
1068 TfLiteTensorRealloc(tensor->bytes, tensor);
1069 }
1070 }
1071 }
1072 return kTfLiteOk;
1073 }
1074
PrepareOpsStartingAt(int first_execution_plan_index,const std::vector<int> & execution_plan,int * last_execution_plan_index_prepared)1075 TfLiteStatus Subgraph::PrepareOpsStartingAt(
1076 int first_execution_plan_index, const std::vector<int>& execution_plan,
1077 int* last_execution_plan_index_prepared) {
1078 if (first_execution_plan_index == 0) {
1079 // Forwarding inputs without modification won't be not evaluated in the
1080 // operators. So, it needs to look up the subgraph's output tensors at the
1081 // beginning.
1082 has_dynamic_tensors_ =
1083 HasDynamicTensorImpl(context_, outputs(), &dynamic_tensor_index_);
1084 }
1085 for (int execution_plan_index = first_execution_plan_index;
1086 execution_plan_index < execution_plan.size(); execution_plan_index++) {
1087 int node_index = execution_plan[execution_plan_index];
1088 TfLiteNode& node = nodes_and_registration_[node_index].first;
1089 const TfLiteRegistration& registration =
1090 nodes_and_registration_[node_index].second;
1091 EnsureTensorsVectorCapacity();
1092 #ifdef TF_LITE_TENSORFLOW_PROFILER
1093 tflite::OnTfLiteOpPrepare(GetTFLiteOpName(registration), subgraph_index_,
1094 node_index);
1095 #endif // TF_LITE_TENSORFLOW_PROFILER
1096 const TfLiteStatus op_prepare_status = OpPrepare(registration, &node);
1097 if (op_prepare_status != kTfLiteOk) {
1098 ReportOpError(&context_, node, registration, node_index,
1099 "failed to prepare");
1100 return op_prepare_status;
1101 }
1102
1103 *last_execution_plan_index_prepared = execution_plan_index;
1104
1105 // Discontinue if the node has dynamic outputs. Note that we don't
1106 // stop for dynamic temporary tensors since they won't affect the
1107 // sizes of other tensors in the graph.
1108 if (HasDynamicTensor(context_, node.outputs, &dynamic_tensor_index_)) {
1109 has_dynamic_tensors_ = true;
1110 return kTfLiteOk;
1111 }
1112 }
1113 return kTfLiteOk;
1114 }
1115
PrepareOpsAndTensors()1116 TfLiteStatus Subgraph::PrepareOpsAndTensors() {
1117 if (!memory_planner_) {
1118 #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
1119 memory_planner_.reset(new SimplePlanner(&context_, CreateGraphInfo()));
1120 #else
1121 memory_planner_ = std::make_unique<ArenaPlanner>(
1122 &context_, CreateGraphInfo(), ShouldPreserveAllTensors(),
1123 kDefaultTensorAlignment, subgraph_index_);
1124 #endif
1125 memory_planner_->PlanAllocations();
1126 }
1127
1128 // Prepare original execution plan if any applied delegate wants it.
1129 // If any of the delegates is immutable, this won't be triggered
1130 // post-delegation (since we undo/redo delegation). For all other cases, other
1131 // delegates that do shape propagation themselves would still be able to.
1132 bool prepare_original_plan = false;
1133 if (!pre_delegation_execution_plan_.empty()) {
1134 for (int i = 0; i < delegates_applied_.size(); ++i) {
1135 if ((delegates_applied_[i]->flags &
1136 kTfLiteDelegateFlagsRequirePropagatedShapes)) {
1137 prepare_original_plan = true;
1138 break;
1139 }
1140 }
1141 }
1142 if (prepare_original_plan) {
1143 int last_original_exec_plan_index_prepared = 0;
1144 TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1145 next_execution_plan_index_to_prepare_, pre_delegation_execution_plan_,
1146 &last_original_exec_plan_index_prepared));
1147 next_original_execution_plan_index_to_prepare_ =
1148 last_original_exec_plan_index_prepared + 1;
1149 }
1150
1151 int last_exec_plan_index_prepared = 0;
1152 TF_LITE_ENSURE_STATUS(
1153 PrepareOpsStartingAt(next_execution_plan_index_to_prepare_,
1154 execution_plan_, &last_exec_plan_index_prepared));
1155 next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
1156
1157 // Execute arena allocations.
1158 TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
1159 next_execution_plan_index_to_plan_allocation_,
1160 last_exec_plan_index_prepared));
1161
1162 if (!custom_allocations_.empty()) {
1163 // Verify custom allocations for output tensors from the ops that have just
1164 // been prepared. Other output tensors might be resized later.
1165 if (!nodes_and_registration_.empty()) {
1166 for (int node_idx = next_execution_plan_index_to_plan_allocation_;
1167 node_idx <= last_exec_plan_index_prepared; ++node_idx) {
1168 TfLiteNode& node = nodes_and_registration_[node_idx].first;
1169 for (int i = 0; i < node.outputs->size; ++i) {
1170 const int output_tensor_idx = node.outputs->data[i];
1171 if (output_tensor_idx == kTfLiteOptionalTensor) continue;
1172 TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor(
1173 context(), custom_allocations_, output_tensor_idx));
1174 }
1175 }
1176 }
1177 // Check input custom allocs only if we just prepared nodes from the idx 0.
1178 if (next_execution_plan_index_to_plan_allocation_ == 0) {
1179 for (const int input_tensor_idx : inputs_) {
1180 if (input_tensor_idx == kTfLiteOptionalTensor) continue;
1181 TF_LITE_ENSURE_STATUS(VerifyCustomAllocationForTensor(
1182 context(), custom_allocations_, input_tensor_idx));
1183 }
1184 }
1185 }
1186
1187 next_execution_plan_index_to_plan_allocation_ =
1188 last_exec_plan_index_prepared + 1;
1189
1190 return kTfLiteOk;
1191 }
1192
RemoveUnusedInputs()1193 TfLiteStatus Subgraph::RemoveUnusedInputs() {
1194 auto graph_info = CreateGraphInfo();
1195 std::vector<int> refcounts(graph_info->num_tensors(), 0);
1196
1197 for (int tensor_index : graph_info->variables()) {
1198 refcounts[tensor_index]++;
1199 }
1200 // Count references to node input tensors.
1201 for (size_t i = 0; i < graph_info->num_execution_nodes(); ++i) {
1202 const TfLiteNode& node = graph_info->node(i);
1203 TfLiteIntArray* node_inputs = node.inputs;
1204 for (int j = 0; j < node_inputs->size; ++j) {
1205 int tensor_index = node_inputs->data[j];
1206 if (tensor_index != kTfLiteOptionalTensor) {
1207 refcounts[tensor_index]++;
1208 }
1209 }
1210 }
1211 // Count references to SubGraph output tensors.
1212 for (auto iter = outputs_.begin(); iter != outputs_.end(); iter++) {
1213 if (*iter == kTfLiteOptionalTensor) continue;
1214 refcounts[*iter]++;
1215 }
1216
1217 // Mark unused inputs as kTfLiteOptionalTensor.
1218 for (auto iter = inputs_.begin(); iter != inputs_.end(); iter++) {
1219 if (*iter == kTfLiteOptionalTensor) continue;
1220 if (refcounts[*iter] == 0) {
1221 tensor(*iter)->bytes = 0; // To make it clearer for memory analysis.
1222 *iter = kTfLiteOptionalTensor;
1223 }
1224 }
1225 return kTfLiteOk;
1226 }
1227
Invoke()1228 TfLiteStatus Subgraph::Invoke() {
1229 if (!consistent_) {
1230 ReportError("Invoke called on model that is not consistent.");
1231 return kTfLiteError;
1232 }
1233
1234 TfLiteStatus status = kTfLiteOk;
1235 if (state_ == kStateUninvokable) {
1236 ReportError("Invoke called on model that is not ready.");
1237 return kTfLiteError;
1238 } else if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
1239 ReportError("Non-persistent memory is not available.");
1240 return kTfLiteError;
1241 }
1242 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "Invoke");
1243 #ifdef TF_LITE_TENSORFLOW_PROFILER
1244 tensorflow::profiler::TraceMe* trace_subgraph =
1245 tflite::OnTfLiteSubgraphInvoke(name_.c_str(), subgraph_index_);
1246 #endif // TF_LITE_TENSORFLOW_PROFILER
1247
1248 // Invocations are always done in node order.
1249 // Note that calling Invoke repeatedly will cause the original memory plan to
1250 // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
1251 // called.
1252 for (int execution_plan_index = 0;
1253 execution_plan_index < execution_plan_.size(); execution_plan_index++) {
1254 if (execution_plan_index == next_execution_plan_index_to_prepare_) {
1255 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
1256 TF_LITE_ENSURE(&context_, next_execution_plan_index_to_prepare_ >=
1257 execution_plan_index);
1258 }
1259 int node_index = execution_plan_[execution_plan_index];
1260 TfLiteNode& node = nodes_and_registration_[node_index].first;
1261 const TfLiteRegistration& registration =
1262 nodes_and_registration_[node_index].second;
1263
1264 const char* op_name = nullptr;
1265 if (profiler_) op_name = GetTFLiteOpName(registration);
1266 #ifdef TF_LITE_TENSORFLOW_PROFILER
1267 if (!op_name) {
1268 op_name = GetTFLiteOpName(registration);
1269 }
1270 tensorflow::profiler::TraceMe* trace_op =
1271 tflite::OnTfLiteOpInvoke(op_name, subgraph_index_, node_index);
1272 #endif // TF_LITE_TENSORFLOW_PROFILER
1273 TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler_.get(), op_name, node_index);
1274
1275 for (int i = 0; i < node.inputs->size; ++i) {
1276 int tensor_index = node.inputs->data[i];
1277 if (tensor_index == kTfLiteOptionalTensor) {
1278 continue;
1279 }
1280 TfLiteTensor* tensor = &tensors_[tensor_index];
1281 if (tensor->delegate && tensor->delegate != node.delegate &&
1282 tensor->data_is_stale) {
1283 TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
1284 }
1285 if (tensor->data.raw == nullptr && tensor->bytes > 0) {
1286 if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 &&
1287 tensor->dims->size != 1) {
1288 // In general, having a tensor here with no buffer will be an error.
1289 // However, for the reshape operator, the second input tensor is
1290 // sometimes only used for the shape, not for the data. Thus, null
1291 // buffer is ok in this situation.
1292 // The situation where null buffer is not ok for reshape operator is
1293 // only when there are 2 inputs given to the node and the one
1294 // corresponding to the shape (i == 1) is a vector that contains all
1295 // dimensions. See `GetOutputShape()` function in
1296 // `tensorflow/lite/kernels/reshape.cc`
1297 continue;
1298 } else {
1299 // In all other cases, we need to return an error as otherwise we will
1300 // trigger a null pointer dereference (likely).
1301 ReportError("Input tensor %d lacks data", tensor_index);
1302 return kTfLiteError;
1303 }
1304 }
1305 }
1306 // Allocate dynamic tensors which memory is required to be allocated
1307 // before executing the node.
1308 MayAllocateOpOutput(&node);
1309
1310 if (check_cancelled_func_ != nullptr &&
1311 check_cancelled_func_(cancellation_data_)) {
1312 ReportError("Client requested cancel during Invoke()");
1313 return kTfLiteError;
1314 }
1315
1316 EnsureTensorsVectorCapacity();
1317 tensor_resized_since_op_invoke_ = false;
1318 if (OpInvoke(registration, &node) != kTfLiteOk) {
1319 return ReportOpError(&context_, node, registration, node_index,
1320 "failed to invoke");
1321 }
1322
1323 // Force execution prep for downstream ops if the latest op triggered the
1324 // resize of a dynamic tensor.
1325 if (tensor_resized_since_op_invoke_ &&
1326 HasDynamicTensor(context_, node.outputs, nullptr)) {
1327 next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
1328
1329 // This happens when an intermediate dynamic tensor is resized.
1330 // We don't have to prepare all the ops, but we need to recompute
1331 // the allocation plan.
1332 if (next_execution_plan_index_to_plan_allocation_ >
1333 next_execution_plan_index_to_prepare_) {
1334 next_execution_plan_index_to_plan_allocation_ =
1335 next_execution_plan_index_to_prepare_;
1336 if (memory_planner_) {
1337 TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocationsAfter(
1338 next_execution_plan_index_to_plan_allocation_ - 1));
1339 }
1340 }
1341 }
1342 // Release dynamic tensor memory if configured by the user.
1343 MaybeReleaseDynamicTensors(node, node_index);
1344
1345 #ifdef TF_LITE_TENSORFLOW_PROFILER
1346 tflite::OnTfLiteOpInvokeEnd(trace_op);
1347 #endif // TF_LITE_TENSORFLOW_PROFILER
1348 }
1349 #ifdef TF_LITE_TENSORFLOW_PROFILER
1350 tflite::OnTfLiteSubgraphInvokeEnd(trace_subgraph);
1351 #endif // TF_LITE_TENSORFLOW_PROFILER
1352 return status;
1353 }
1354
ResizeTensor(TfLiteContext * context,TfLiteTensor * tensor,TfLiteIntArray * new_size)1355 TfLiteStatus Subgraph::ResizeTensor(TfLiteContext* context,
1356 TfLiteTensor* tensor,
1357 TfLiteIntArray* new_size) {
1358 // If the dimensions don't change, avoiding
1359 // unnecessary (re)allocations.
1360 //
1361 // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
1362 // the subgraph won't allocate memory for a dynamic tensor when its size
1363 // is equal to the original tensor size.
1364 if (tensor->data.raw != nullptr &&
1365 EqualArrayAndTfLiteIntArray(tensor->dims, new_size->size,
1366 new_size->data)) {
1367 // A number of clients assume |new_size| remains valid upon success, so
1368 // swap it in as the new (but logically identical) tensor dims.
1369 TfLiteIntArrayFree(tensor->dims);
1370 tensor->dims = new_size;
1371 return kTfLiteOk;
1372 }
1373
1374 // Note here that context->impl_ is recovering the this pointer for an
1375 // instance of Interpreter to call into the member function ResizeTensorImpl
1376 // (this function is static).
1377 return static_cast<Subgraph*>(context->impl_)
1378 ->ResizeTensorImpl(tensor, new_size);
1379 }
1380
ReportErrorImpl(const char * format,va_list args)1381 void Subgraph::ReportErrorImpl(const char* format, va_list args) {
1382 error_reporter_->Report(format, args);
1383 }
1384
ReportErrorC(TfLiteContext * context,const char * format,...)1385 void Subgraph::ReportErrorC(TfLiteContext* context, const char* format, ...) {
1386 va_list args;
1387 va_start(args, format);
1388 auto* f = static_cast<Subgraph*>(context->impl_);
1389 // Note here that context->impl_ is recovering the this pointer for an
1390 // instance of Subgraph to call into the member function ReportErrorImpl
1391 // (this function is static).
1392 f->ReportErrorImpl(format, args);
1393 va_end(args);
1394 }
1395
1396 // Entry point for C node plugin API to report an error.
ReportError(const char * format,...)1397 void Subgraph::ReportError(const char* format, ...) {
1398 va_list args;
1399 va_start(args, format);
1400 auto* f = static_cast<Subgraph*>(context_.impl_);
1401 // Note here that context->impl_ is recovering the this pointer for an
1402 // instance of Subgraph to call into the member function ReportErrorImpl
1403 // (this function is static).
1404 f->ReportErrorImpl(format, args);
1405 va_end(args);
1406 }
1407
AddTensors(int tensors_to_add,int * first_new_tensor_index)1408 TfLiteStatus Subgraph::AddTensors(int tensors_to_add,
1409 int* first_new_tensor_index) {
1410 const size_t base_index = tensors_.size();
1411 if (first_new_tensor_index) *first_new_tensor_index = base_index;
1412 tensors_.resize(tensors_.size() + tensors_to_add);
1413 for (size_t i = base_index; i < tensors_.size(); i++) {
1414 memset(&tensors_[i], 0, sizeof(tensors_[i]));
1415 tensors_[i].buffer_handle = kTfLiteNullBufferHandle;
1416 }
1417 context_.tensors = tensors_.data();
1418 context_.tensors_size = tensors_.size();
1419 return kTfLiteOk;
1420 }
1421
AddTensors(TfLiteContext * context,int tensors_to_add,int * first_new_tensor_index)1422 TfLiteStatus Subgraph::AddTensors(TfLiteContext* context, int tensors_to_add,
1423 int* first_new_tensor_index) {
1424 // Note here that context->impl_ is recovering the this pointer for an
1425 // instance of Interpreter to call into the member function AddTensors
1426 // (this function is static).
1427 return static_cast<Subgraph*>(context->impl_)
1428 ->AddTensors(tensors_to_add, first_new_tensor_index);
1429 }
1430
GetNodeAndRegistration(int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)1431 TfLiteStatus Subgraph::GetNodeAndRegistration(
1432 int node_index, TfLiteNode** node, TfLiteRegistration** registration) {
1433 TF_LITE_ENSURE(&context_, node_index >= 0);
1434 auto nodes_size = nodes_and_registration_.size();
1435 TF_LITE_ENSURE(&context_, static_cast<size_t>(node_index) < nodes_size);
1436 TF_LITE_ENSURE(&context_, node != nullptr && registration != nullptr);
1437 auto& node_and_reg = nodes_and_registration_[node_index];
1438 *node = &node_and_reg.first;
1439 *registration = &node_and_reg.second;
1440 return kTfLiteOk;
1441 }
1442
GetNodeAndRegistration(struct TfLiteContext * context,int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)1443 TfLiteStatus Subgraph::GetNodeAndRegistration(
1444 struct TfLiteContext* context, int node_index, TfLiteNode** node,
1445 TfLiteRegistration** registration) {
1446 return static_cast<Subgraph*>(context->impl_)
1447 ->GetNodeAndRegistration(node_index, node, registration);
1448 }
1449
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t ndims,const int * dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation,TfLiteSparsity * sparsity)1450 TfLiteStatus Subgraph::SetTensorParametersReadOnly(
1451 int tensor_index, TfLiteType type, const char* name, const size_t ndims,
1452 const int* dims, TfLiteQuantization quantization, const char* buffer,
1453 size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity) {
1454 // Ensure quantization cleanup on failure.
1455 ScopedTfLiteQuantization scoped_quantization(&quantization);
1456 ScopedTfLiteSparsity scoped_sparsity(sparsity);
1457 if (state_ == kStateInvokableAndImmutable) {
1458 ReportError(
1459 "SetTensorParametersReadOnly is disallowed when graph is immutable.");
1460 return kTfLiteError;
1461 }
1462
1463 TF_LITE_ENSURE(&context_,
1464 tensor_index < context_.tensors_size && tensor_index >= 0);
1465
1466 // For most tensors we know exactly how much memory is necessary so we can
1467 // ensure the buffer is large enough. However, we need to skip string tensors
1468 // and sparse tensors because their sizes change with the contents.
1469 // TODO(b/145615516): Extend BytesRequired to check sparse tensors.
1470 if (type != kTfLiteString && type != kTfLiteResource &&
1471 type != kTfLiteVariant && sparsity == nullptr) {
1472 size_t required_bytes;
1473 TF_LITE_ENSURE_OK(&context_,
1474 BytesRequired(type, dims, ndims, &required_bytes));
1475 TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes);
1476 }
1477
1478 TfLiteTensor& tensor = context_.tensors[tensor_index];
1479 if (type == tensor.type &&
1480 EqualArrayAndTfLiteIntArray(tensor.dims, ndims, dims)) {
1481 // Fast path which does not invalidate the invokable property.
1482 TfLiteTensorDataFree(&tensor);
1483 TfLiteQuantizationFree(&tensor.quantization);
1484 tensor.data.raw = const_cast<char*>(buffer);
1485 if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(ndims, dims);
1486 tensor.params = GetLegacyQuantization(quantization);
1487 tensor.quantization = *scoped_quantization.release();
1488 tensor.sparsity = scoped_sparsity.release();
1489 tensor.allocation_type = kTfLiteMmapRo;
1490 tensor.allocation = allocation;
1491 } else {
1492 state_ = kStateUninvokable;
1493 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(ndims, dims),
1494 GetLegacyQuantization(quantization),
1495 const_cast<char*>(buffer), bytes, kTfLiteMmapRo,
1496 allocation, false, &tensor);
1497 tensor.quantization = *scoped_quantization.release();
1498 tensor.sparsity = scoped_sparsity.release();
1499 }
1500 return kTfLiteOk;
1501 }
1502
1503 // Set description of inputs/outputs/data/fptrs for node `node_index`.
1504 // This variant assumes an external buffer has been allocated of size
1505 // bytes. The lifetime of buffer must be ensured to be greater or equal
1506 // to Interpreter.
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t ndims,const int * dims,TfLiteQuantization quantization,bool is_variable,const size_t ndims_signature,const int * dims_signature)1507 TfLiteStatus Subgraph::SetTensorParametersReadWrite(
1508 int tensor_index, TfLiteType type, const char* name, const size_t ndims,
1509 const int* dims, TfLiteQuantization quantization, bool is_variable,
1510 const size_t ndims_signature, const int* dims_signature) {
1511 // Ensure quantization cleanup on failure.
1512 ScopedTfLiteQuantization scoped_quantization(&quantization);
1513 if (state_ == kStateInvokableAndImmutable) {
1514 ReportError(
1515 "SetTensorParametersReadWrite is disallowed when graph is immutable.");
1516 return kTfLiteError;
1517 }
1518 TF_LITE_ENSURE(&context_,
1519 tensor_index < context_.tensors_size && tensor_index >= 0);
1520 size_t required_bytes = 0;
1521 if (type != kTfLiteString && type != kTfLiteResource &&
1522 type != kTfLiteVariant) {
1523 // These types will be allocated in our arena so we need to record how
1524 // many bytes we will need based on the dimensions. String tensors are
1525 // allocated dynamically and we can't know ahead of time how much space
1526 // they will require.
1527 TF_LITE_ENSURE_OK(&context_,
1528 BytesRequired(type, dims, ndims, &required_bytes));
1529 }
1530
1531 TfLiteAllocationType allocation_type = kTfLiteArenaRw;
1532 if (type == kTfLiteString || type == kTfLiteResource ||
1533 type == kTfLiteVariant) {
1534 if (is_variable) {
1535 // We don't have a real use case for string variable tensor.
1536 ReportError("String variable tensor isn't supported.");
1537 return kTfLiteError;
1538 }
1539 allocation_type = kTfLiteDynamic;
1540 } else if (is_variable) {
1541 allocation_type = kTfLiteArenaRwPersistent;
1542 }
1543
1544 TfLiteTensor& tensor = context_.tensors[tensor_index];
1545
1546 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(ndims, dims),
1547 GetLegacyQuantization(quantization),
1548 /*buffer=*/nullptr, required_bytes, allocation_type,
1549 nullptr, is_variable, &tensor);
1550 tensor.quantization = *scoped_quantization.release();
1551 tensor.dims_signature =
1552 ConvertArrayToTfLiteIntArray(ndims_signature, dims_signature);
1553 return kTfLiteOk;
1554 }
1555
SetExecutionPlan(const std::vector<int> & new_plan)1556 TfLiteStatus Subgraph::SetExecutionPlan(const std::vector<int>& new_plan) {
1557 for (int node_index : new_plan) {
1558 TF_LITE_ENSURE(&context_, node_index >= 0 &&
1559 node_index < nodes_and_registration_.size());
1560 }
1561 execution_plan_ = new_plan;
1562 return kTfLiteOk;
1563 }
1564
ResizeTensorImpl(TfLiteTensor * tensor,TfLiteIntArray * new_size)1565 TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
1566 TfLiteIntArray* new_size) {
1567 // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too.
1568 if (tensor->allocation_type == kTfLiteArenaRw ||
1569 tensor->allocation_type == kTfLiteDynamic ||
1570 tensor->allocation_type == kTfLiteArenaRwPersistent ||
1571 tensor->allocation_type == kTfLitePersistentRo ||
1572 tensor->allocation_type == kTfLiteCustom) {
1573 tensor_resized_since_op_invoke_ |=
1574 TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
1575 if (tensor->type != kTfLiteString && tensor->type != kTfLiteResource &&
1576 tensor->type != kTfLiteVariant) {
1577 size_t bytesRequired;
1578 TfLiteStatus status = BytesRequired(tensor->type, new_size->data,
1579 new_size->size, &bytesRequired);
1580 if (status != kTfLiteOk) {
1581 TfLiteIntArrayFree(new_size);
1582 return kTfLiteError;
1583 }
1584
1585 // Realloc space for heap-allocated tensors.
1586 TfLiteTensorRealloc(bytesRequired, tensor);
1587 tensor->bytes = bytesRequired;
1588 }
1589 if (tensor->dims) TfLiteIntArrayFree(tensor->dims);
1590 tensor->dims = new_size;
1591
1592 // Reset arena-allocated tensors; they will be allocated later.
1593 if (tensor->allocation_type == kTfLiteArenaRw ||
1594 tensor->allocation_type == kTfLiteArenaRwPersistent) {
1595 tensor->data.raw = nullptr;
1596 }
1597 } else {
1598 // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore
1599 // of fixed size.
1600 TfLiteIntArrayFree(new_size);
1601 ReportError("Attempting to resize a fixed-size tensor.");
1602 return kTfLiteError;
1603 }
1604 return kTfLiteOk;
1605 }
1606
OptimizeMemoryForLargeTensors(int large_tensors_thresholds_in_bytes)1607 void Subgraph::OptimizeMemoryForLargeTensors(
1608 int large_tensors_thresholds_in_bytes) {
1609 for (size_t tensor_index = 0; tensor_index < context_.tensors_size;
1610 tensor_index++) {
1611 TfLiteTensor* tensor = &context_.tensors[tensor_index];
1612 if (tensor->bytes >= large_tensors_thresholds_in_bytes &&
1613 tensor->allocation_type == kTfLiteArenaRw &&
1614 // Skip input tensors since they are handled by ResizeInputTensor().
1615 std::find(inputs_.begin(), inputs_.end(), tensor_index) ==
1616 inputs_.end()) {
1617 // Change large tensors' allocation_type and data.raw. This method must be
1618 // called before AllocateTensors() to avoid handling them by ArenaPlanner.
1619 tensor->allocation_type = kTfLiteDynamic;
1620 tensor->data.raw = nullptr;
1621 }
1622 }
1623 }
1624
SwitchToDelegateContext()1625 void Subgraph::SwitchToDelegateContext() {
1626 context_.GetNodeAndRegistration = GetNodeAndRegistration;
1627 context_.ReplaceNodeSubsetsWithDelegateKernels =
1628 ReplaceNodeSubsetsWithDelegateKernels;
1629 context_.GetExecutionPlan = GetExecutionPlan;
1630 context_.PreviewDelegatePartitioning = PreviewDelegatePartitioning;
1631 }
1632
SwitchToKernelContext()1633 void Subgraph::SwitchToKernelContext() {
1634 context_.GetNodeAndRegistration = [](struct TfLiteContext* context,
1635 int node_index, TfLiteNode** node,
1636 TfLiteRegistration** registration) {
1637 return ForbiddenContextFunction(context);
1638 };
1639 context_.ReplaceNodeSubsetsWithDelegateKernels =
1640 [](TfLiteContext* context, TfLiteRegistration registration,
1641 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
1642 return ForbiddenContextFunction(context);
1643 };
1644 context_.GetExecutionPlan = [](struct TfLiteContext* context,
1645 TfLiteIntArray**) {
1646 return ForbiddenContextFunction(context);
1647 };
1648 context_.PreviewDelegatePartitioning =
1649 [](struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
1650 TfLiteDelegateParams** partition_params_array,
1651 int* num_partitions) { return ForbiddenContextFunction(context); };
1652 // Free any memory that might have been allocated by
1653 // PreviewDelegatePartitioning.
1654 FreeDelegatePartitioningData();
1655 }
1656
UndoAllDelegates()1657 TfLiteStatus Subgraph::UndoAllDelegates() {
1658 // Return early if there is nothing to reset to.
1659 if (pre_delegation_execution_plan_.empty()) return kTfLiteOk;
1660
1661 // First free all delegate nodes.
1662 for (int execution_plan_index = 0;
1663 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1664 int node_index = execution_plan_[execution_plan_index];
1665 TfLiteNode& node = nodes_and_registration_[node_index].first;
1666 if (node.delegate == nullptr) {
1667 continue;
1668 }
1669 CleanupNode(node_index);
1670 }
1671
1672 // Reset execution plan.
1673 execution_plan_ = pre_delegation_execution_plan_;
1674 pre_delegation_execution_plan_.clear();
1675
1676 // Handling FP16 delegation (if applies).
1677 //
1678 // First pass through execution plan to remember mapping of FP16
1679 // dequantizations in the graph.
1680 // This is required because delegates that support FP16 could remap supported
1681 // nodes' inputs to point to their fp16 versions (if delegate supports fp16
1682 // acceleration). This remapping is performed in FP16GraphPartitionHelper in
1683 // delegates/utils. We need to undo this remapping to ensure CPU kernels work.
1684 std::vector<int> fp16_to_fp32(tensors_size(), -1);
1685 for (int execution_plan_index = 0;
1686 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1687 int node_index = execution_plan_[execution_plan_index];
1688 auto& node_and_reg = nodes_and_registration_[node_index];
1689 const TfLiteNode& node = node_and_reg.first;
1690 const TfLiteRegistration& reg = node_and_reg.second;
1691 if (reg.builtin_code == kTfLiteBuiltinDequantize &&
1692 node.inputs->size == 1 && node.outputs->size == 1) {
1693 const int input_idx = node.inputs->data[0];
1694 if (tensors_[input_idx].type == kTfLiteFloat16) {
1695 fp16_to_fp32[input_idx] = node.outputs->data[0];
1696 }
1697 }
1698 }
1699 // Second pass through the execution plan to remap applicable nodes' fp16
1700 // inputs to their original fp32 versions. Note that if a CPU kernel does
1701 // support fp16, the model will not contain a DEQUANTIZE for its constant
1702 // input.
1703 for (int execution_plan_index = 0;
1704 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1705 int node_index = execution_plan_[execution_plan_index];
1706 auto& node_and_reg = nodes_and_registration_[node_index];
1707 const TfLiteNode& node = node_and_reg.first;
1708 const TfLiteRegistration& reg = node_and_reg.second;
1709 if (reg.builtin_code == kTfLiteBuiltinDequantize) continue;
1710 for (int i = 0; i < node.inputs->size; ++i) {
1711 const int original_input_idx = node.inputs->data[i];
1712 if (original_input_idx == kTfLiteOptionalTensor) continue;
1713 if (tensors_[original_input_idx].type == kTfLiteFloat16) {
1714 node.inputs->data[i] = fp16_to_fp32[original_input_idx];
1715 }
1716 }
1717 }
1718
1719 // Delegate nodes are appended to nodes_and_registration_. Therefore,
1720 // cleanup nodes_and_registration_ to only contain nodes from
1721 // pre_delegation_execution_plan_.
1722 int max_retained_node_index = 0;
1723 for (int execution_plan_index = 0;
1724 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1725 max_retained_node_index = std::max(max_retained_node_index,
1726 execution_plan_[execution_plan_index]);
1727 }
1728 nodes_and_registration_.resize(max_retained_node_index + 1);
1729 // After undoing delegates, the graph is uninvokable, but mutable.
1730 state_ = kStateUninvokable;
1731
1732 delegates_undone_ = true;
1733 return kTfLiteOk;
1734 }
1735
RedoAllDelegates()1736 TfLiteStatus Subgraph::RedoAllDelegates() {
1737 if (!delegates_undone_) return kTfLiteOk;
1738
1739 delegates_undone_ = false;
1740 std::vector<TfLiteDelegate*> delegates_to_apply;
1741 delegates_applied_.swap(delegates_to_apply);
1742 for (auto* delegate : delegates_to_apply) {
1743 TF_LITE_ENSURE_STATUS(ModifyGraphWithDelegate(delegate));
1744 }
1745 return kTfLiteOk;
1746 }
1747
RemoveAllDelegates()1748 TfLiteStatus Subgraph::RemoveAllDelegates() {
1749 TF_LITE_ENSURE_STATUS(UndoAllDelegates());
1750 delegates_applied_.clear();
1751 delegates_undone_ = false;
1752 TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1753 return kTfLiteOk;
1754 }
1755
HasDelegates()1756 bool Subgraph::HasDelegates() { return !delegates_applied_.empty(); }
1757
IsFullyDelegated() const1758 bool Subgraph::IsFullyDelegated() const {
1759 for (const int nid : execution_plan_) {
1760 const TfLiteNode& node = nodes_and_registration_[nid].first;
1761 if (node.delegate == nullptr) return false;
1762 }
1763 return true;
1764 }
1765
EnsureTensorsVectorCapacity()1766 void Subgraph::EnsureTensorsVectorCapacity() {
1767 const size_t required_capacity = tensors_.size() + kTensorsCapacityHeadroom;
1768 if (required_capacity > tensors_.capacity()) {
1769 // Whenever it's required to increase the vector capacity, make it at
1770 // least twice bigger. The behavior is consistent with the default
1771 // behavior of GCC STL's `std::vector::resize()`. This avoids frequently
1772 // allocating and copying the underlying buffer.
1773 size_t reserved_capacity =
1774 std::max(required_capacity, tensors_.capacity() * 2);
1775 tensors_.reserve(reserved_capacity);
1776 context_.tensors = tensors_.data();
1777 }
1778 }
1779
EnsureMemoryAllocations()1780 TfLiteStatus Subgraph::EnsureMemoryAllocations() {
1781 if (memory_planner_) {
1782 state_ = kStateUninvokable;
1783 TF_LITE_ENSURE_OK(&context_, memory_planner_->PlanAllocations());
1784 }
1785 TF_LITE_ENSURE_OK(&context_, AllocateTensors());
1786 TF_LITE_ENSURE_EQ(&context_, state_, kStateInvokable);
1787 return kTfLiteOk;
1788 }
1789
ModifyGraphWithDelegate(TfLiteDelegate * delegate)1790 TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
1791 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(),
1792 "ModifyGraphWithDelegate");
1793
1794 if (delegate == nullptr) {
1795 ReportError("Null delegate.");
1796 return kTfLiteDelegateError;
1797 }
1798
1799 // Resets delegation & leaves graph in consistent state if delegate status is
1800 // not okay.
1801 auto reset_delegation_if_not_ok = [this](TfLiteStatus status) {
1802 if (status != kTfLiteOk) {
1803 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1804 ReportError(
1805 "Restored original execution plan after delegate application "
1806 "failure.");
1807 return kTfLiteDelegateError;
1808 }
1809 return kTfLiteOk;
1810 };
1811
1812 // STEP 1: Verify & prepare graph for delegation.
1813 // ==============================================
1814
1815 // Restore delegation state if applicable.
1816 TF_LITE_ENSURE_STATUS(RedoAllDelegates());
1817
1818 const bool delegate_supports_dynamic_shapes =
1819 delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors;
1820 const auto pre_delegation_state = state_;
1821
1822 if (state_ == kStateInvokableAndImmutable) {
1823 // A delegate that doesn't support dynamic shapes was already applied, so
1824 // we can assume tensor shapes have been propagated & there are no dynamic
1825 // tensors.
1826 // Reset the state to force tensor/op reallocation.
1827 state_ = kStateUninvokable;
1828 } else if (!delegate_supports_dynamic_shapes) {
1829 // Check if graph has dynamic tensors by preparing ops.
1830 int last_execution_plan_index_prepared;
1831 TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1832 0, execution_plan_, &last_execution_plan_index_prepared));
1833 if (has_dynamic_tensors_) {
1834 TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1835 TFLITE_LOG(
1836 tflite::TFLITE_LOG_WARNING,
1837 "Attempting to use a delegate that only supports static-sized "
1838 "tensors with a graph that has dynamic-sized tensors (tensor#%d is a "
1839 "dynamic-sized tensor).",
1840 dynamic_tensor_index_);
1841 return kTfLiteApplicationError;
1842 }
1843 }
1844
1845 if (delegates_applied_.empty()) {
1846 // This is the first delegate being applied, so remember original execution
1847 // plan.
1848 pre_delegation_execution_plan_ = execution_plan_;
1849 }
1850
1851 // STEP 2: Delegate replaces applicable nodes with delegate kernels.
1852 // =================================================================
1853
1854 // Setup additional context interface.
1855 SwitchToDelegateContext();
1856 TfLiteStatus status = delegate->Prepare(&context_, delegate);
1857 // Remove additional context info.
1858 SwitchToKernelContext();
1859 TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(status));
1860
1861 // STEP 3: Leave graph in consistent state based on delegate & previous state.
1862 // ===========================================================================
1863
1864 if (!delegate_supports_dynamic_shapes) {
1865 // CASE 1: Current delegate does not support dynamic shapes.
1866 // Reset the state to force tensor/op reallocation.
1867 state_ = kStateUninvokable;
1868 TF_LITE_ENSURE_STATUS(
1869 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1870 // After using a delegate which doesn't support dynamic tensors, make the
1871 // entire graph immutable.
1872 state_ = kStateInvokableAndImmutable;
1873 } else if (pre_delegation_state == kStateInvokableAndImmutable) {
1874 // CASE 2: Current delegate supports dynamic shapes, but a previous one
1875 // does not.
1876 // Make sure new delegate didn't mark a tensor as dynamic.
1877 int last_execution_plan_index_prepared;
1878 TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(PrepareOpsStartingAt(
1879 0, execution_plan_, &last_execution_plan_index_prepared)));
1880 if (has_dynamic_tensors_) {
1881 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1882 ReportError(
1883 "Cannot allow dynamic tensors due to previous delegation, resetting "
1884 "to original execution plan.");
1885 return kTfLiteApplicationError;
1886 }
1887 // Redo memory allocations & ensure state is set back to original value.
1888 TF_LITE_ENSURE_STATUS(
1889 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1890 state_ = kStateInvokableAndImmutable;
1891 } else if (pre_delegation_state == kStateInvokable) {
1892 // CASE 3: Current delegate supports dynamic shapes, and the graph was
1893 // previously invokable.
1894 // Flush allocation now to leave it in a consistent state.
1895 TF_LITE_ENSURE_STATUS(
1896 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1897 }
1898 delegates_applied_.push_back(delegate);
1899
1900 return status;
1901 }
1902
SetCustomAllocationForTensor(int tensor_index,const TfLiteCustomAllocation & allocation,int64_t flags)1903 TfLiteStatus Subgraph::SetCustomAllocationForTensor(
1904 int tensor_index, const TfLiteCustomAllocation& allocation, int64_t flags) {
1905 TfLiteTensor* tensor = &context_.tensors[tensor_index];
1906 TF_LITE_ENSURE(context(),
1907 (tensor->allocation_type == kTfLiteArenaRw ||
1908 tensor->allocation_type == kTfLiteArenaRwPersistent ||
1909 tensor->allocation_type == kTfLiteCustom));
1910 // Don't check allocation.bytes here, we do that after all ops are prepared
1911 // to allow tensor shape propagation.
1912 TF_LITE_ENSURE(context(), allocation.data != nullptr);
1913 if (!(flags & kTfLiteCustomAllocationFlagsSkipAlignCheck)) {
1914 const intptr_t data_ptr_value = reinterpret_cast<intptr_t>(allocation.data);
1915 TF_LITE_ENSURE(context(), data_ptr_value % kDefaultTensorAlignment == 0);
1916 }
1917
1918 const auto iter_and_success =
1919 custom_allocations_.insert({tensor_index, allocation});
1920 if (!iter_and_success.second) {
1921 iter_and_success.first->second = allocation;
1922 }
1923
1924 tensor->allocation_type = kTfLiteCustom;
1925 tensor->data.data = allocation.data;
1926
1927 return kTfLiteOk;
1928 }
1929
SetName(const char * name)1930 void Subgraph::SetName(const char* name) {
1931 if (name) {
1932 name_ = name;
1933 } else {
1934 name_ = "";
1935 }
1936 }
1937
GetName() const1938 const std::string& Subgraph::GetName() const { return name_; }
1939
DumpMemoryPlannerDebugInfo() const1940 void Subgraph::DumpMemoryPlannerDebugInfo() const {
1941 if (memory_planner_ == nullptr) return;
1942 memory_planner_->DumpDebugInfo(execution_plan());
1943 }
1944
GetMemoryAllocInfo(SubgraphAllocInfo * alloc_info) const1945 void Subgraph::GetMemoryAllocInfo(SubgraphAllocInfo* alloc_info) const {
1946 memset(alloc_info, 0, sizeof(SubgraphAllocInfo));
1947 if (memory_planner_ == nullptr) return;
1948 memory_planner_->GetAllocInfo(&alloc_info->arena_size,
1949 &alloc_info->arena_persist_size);
1950 for (const auto& tensor : tensors_) {
1951 if (tensor.allocation_type == kTfLiteDynamic &&
1952 tensor.data.raw != nullptr) {
1953 alloc_info->dynamic_size += tensor.bytes;
1954 }
1955 }
1956 if (GetSubgraphIndex() == 0) {
1957 for (const auto& res : *resources_) {
1958 alloc_info->resource_size += res.second->GetMemoryUsage();
1959 }
1960 }
1961 }
1962
CreateGraphInfo()1963 std::unique_ptr<GraphInfo> Subgraph::CreateGraphInfo() {
1964 return std::unique_ptr<GraphInfo>(new InterpreterInfo(this));
1965 }
1966
InitializeTensorReleaseMap()1967 void Subgraph::InitializeTensorReleaseMap() {
1968 for (int i = 0; i < execution_plan_.size(); ++i) {
1969 int node_index = execution_plan_[i];
1970 const TfLiteNode& node = nodes_and_registration_[node_index].first;
1971 for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
1972 int input_tensor_index = node.inputs->data[input_index];
1973 TfLiteTensor* input_tensor = tensor(input_tensor_index);
1974 if (!input_tensor) continue;
1975 tensor_to_last_op_index_[input_tensor_index] = node_index;
1976 }
1977 // Also checks outputs of a node to make sure tensors are released in case
1978 // when a tensor is not used for input of another node.
1979 for (int output_index = 0; output_index < node.outputs->size;
1980 ++output_index) {
1981 int output_tensor_index = node.outputs->data[output_index];
1982 TfLiteTensor* output_tensor = tensor(output_tensor_index);
1983 if (!output_tensor) continue;
1984 tensor_to_last_op_index_[output_tensor_index] = node_index;
1985 }
1986 }
1987 }
1988
MaybeReleaseDynamicTensors(const TfLiteNode & node,size_t node_index)1989 void Subgraph::MaybeReleaseDynamicTensors(const TfLiteNode& node,
1990 size_t node_index) {
1991 if (!ShouldReleaseDynamicTensors()) return;
1992
1993 // Release input tensors if they're neither graph input tensors nor no
1994 // longer used by remaining graph execution.
1995 auto tensorIsInput = [&](int index) {
1996 for (int idx : inputs_) {
1997 if (idx == index) return true;
1998 }
1999 return false;
2000 };
2001 auto tensorIsOutput = [&](int index) {
2002 for (int idx : outputs_) {
2003 if (idx == index) return true;
2004 }
2005 return false;
2006 };
2007 for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
2008 int input_tensor_index = node.inputs->data[input_index];
2009 TfLiteTensor* input_tensor = tensor(input_tensor_index);
2010 if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic ||
2011 input_tensor->type == kTfLiteString ||
2012 input_tensor->type == kTfLiteResource ||
2013 tensorIsInput(input_tensor_index) || tensorIsOutput(input_tensor_index))
2014 continue;
2015 auto it = tensor_to_last_op_index_.find(input_tensor_index);
2016 if (it != tensor_to_last_op_index_.end() && it->second == node_index) {
2017 if (input_tensor->data.raw) {
2018 TfLiteTensorDataFree(input_tensor);
2019 }
2020 }
2021 }
2022
2023 // Release output tensors if they're neither graph output tensors nor no
2024 // longer used by remaining graph execution.
2025 for (int output_index = 0; output_index < node.outputs->size;
2026 ++output_index) {
2027 int output_tensor_index = node.outputs->data[output_index];
2028 TfLiteTensor* output_tensor = tensor(output_tensor_index);
2029 if (!output_tensor || output_tensor->allocation_type != kTfLiteDynamic ||
2030 output_tensor->type == kTfLiteString ||
2031 output_tensor->type == kTfLiteResource ||
2032 tensorIsInput(output_tensor_index) ||
2033 tensorIsOutput(output_tensor_index))
2034 continue;
2035 auto it = tensor_to_last_op_index_.find(output_tensor_index);
2036 if (it != tensor_to_last_op_index_.end() && it->second == node_index) {
2037 if (output_tensor->data.raw) {
2038 TfLiteTensorDataFree(output_tensor);
2039 }
2040 }
2041 }
2042 }
2043
2044 } // namespace tflite
2045