1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/core/subgraph.h"
17
18 #include <stdarg.h>
19 #include <stddef.h>
20
21 #include <algorithm>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <cstring>
25 #include <iterator>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30
31 #include "tensorflow/lite/allocation.h"
32 #include "tensorflow/lite/builtin_ops.h"
33 #include "tensorflow/lite/c/c_api_types.h"
34 #include "tensorflow/lite/c/common.h"
35 #include "tensorflow/lite/context_util.h"
36 #include "tensorflow/lite/core/api/error_reporter.h"
37 #include "tensorflow/lite/core/api/profiler.h"
38 #include "tensorflow/lite/core/api/tensor_utils.h"
39 #include "tensorflow/lite/core/macros.h"
40 #include "tensorflow/lite/experimental/resource/resource_base.h"
41 #include "tensorflow/lite/graph_info.h"
42 #include "tensorflow/lite/memory_planner.h"
43 #include "tensorflow/lite/minimal_logging.h"
44 #include "tensorflow/lite/schema/schema_generated.h"
45 #include "tensorflow/lite/util.h"
46 #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
47 #include "tensorflow/lite/simple_planner.h"
48 #else
49 #include "tensorflow/lite/arena_planner.h"
50 #endif
51
52 namespace tflite {
53
54 namespace {
55
56 struct TfLiteQuantizationDeleter {
operator ()tflite::__anon8bdb3e260111::TfLiteQuantizationDeleter57 void operator()(TfLiteQuantization* q) {
58 if (q) TfLiteQuantizationFree(q);
59 }
60 };
61
62 using ScopedTfLiteQuantization =
63 std::unique_ptr<TfLiteQuantization, TfLiteQuantizationDeleter>;
64
65 struct TfLiteSparsityDeleter {
operator ()tflite::__anon8bdb3e260111::TfLiteSparsityDeleter66 void operator()(TfLiteSparsity* s) {
67 if (s) TfLiteSparsityFree(s);
68 }
69 };
70
71 using ScopedTfLiteSparsity =
72 std::unique_ptr<TfLiteSparsity, TfLiteSparsityDeleter>;
73
ReportOpError(TfLiteContext * context,const TfLiteNode & node,const TfLiteRegistration & registration,int node_index,const char * message)74 TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node,
75 const TfLiteRegistration& registration,
76 int node_index, const char* message) {
77 context->ReportError(
78 context, "Node number %d (%s) %s.\n", node_index,
79 registration.custom_name
80 ? registration.custom_name
81 : EnumNameBuiltinOperator(
82 static_cast<BuiltinOperator>(registration.builtin_code)),
83 message);
84 return kTfLiteError;
85 }
86
87 // Stub method which returns kTfLiteError when the function is forbidden.
88 // We're registering this function to several different function to save
89 // compiled binary size. Please note the restrictions:
90 // * The type of first parameter have to be `TfLiteContext*`.
91 // * All parameters must be trivially destructible. (E.g. No C++ class)
ForbiddenContextFunction(TfLiteContext * context,...)92 TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) {
93 context->ReportError(context,
94 "The function is forbidden if not calling in delegate.");
95 return kTfLiteError;
96 }
97
98 // Set the ForbiddenContextFunction to a compatible function pointer.
99 template <typename FunctionType>
SetForbiddenContextFunction(FunctionType * func)100 void SetForbiddenContextFunction(FunctionType* func) {
101 *func = reinterpret_cast<FunctionType>(ForbiddenContextFunction);
102 }
103
104 // Returns true if at least one tensor in the given list is kTfLiteDynamic.
105 template <typename TensorIntArray>
HasDynamicTensorImpl(const TfLiteContext & context,const TensorIntArray & int_array)106 bool HasDynamicTensorImpl(const TfLiteContext& context,
107 const TensorIntArray& int_array) {
108 for (int i : int_array) {
109 if (i == kTfLiteOptionalTensor) continue;
110 const TfLiteTensor& tensor = context.tensors[i];
111 if (tensor.allocation_type == kTfLiteDynamic) {
112 return true;
113 }
114 }
115 return false;
116 }
117
HasDynamicTensor(const TfLiteContext & context,const TfLiteIntArray * int_array)118 bool HasDynamicTensor(const TfLiteContext& context,
119 const TfLiteIntArray* int_array) {
120 return HasDynamicTensorImpl(context, TfLiteIntArrayView{int_array});
121 }
122
123 // Gets the legacy TfLiteQuantizationParams from the current TfLiteQuantization.
GetLegacyQuantization(const TfLiteQuantization & quantization)124 TfLiteQuantizationParams GetLegacyQuantization(
125 const TfLiteQuantization& quantization) {
126 TfLiteQuantizationParams legacy_quantization;
127 legacy_quantization.scale = 0;
128 legacy_quantization.zero_point = 0;
129
130 // If the quantization type isn't affine, return the empty
131 // legacy_quantization.
132 if (quantization.type != kTfLiteAffineQuantization) {
133 return legacy_quantization;
134 }
135
136 auto* affine_quantization =
137 static_cast<TfLiteAffineQuantization*>(quantization.params);
138 if (!affine_quantization || !affine_quantization->scale ||
139 !affine_quantization->zero_point ||
140 affine_quantization->scale->size != 1 ||
141 affine_quantization->zero_point->size != 1) {
142 return legacy_quantization;
143 }
144
145 // We know its per-layer quantization now.
146 legacy_quantization.scale = affine_quantization->scale->data[0];
147 legacy_quantization.zero_point = affine_quantization->zero_point->data[0];
148 return legacy_quantization;
149 }
150
151 static constexpr const char kUnknownCustomOpName[] = "UnknownCustomOp";
GetTFLiteOpName(const TfLiteRegistration & op_reg)152 const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) {
153 if (op_reg.builtin_code == tflite::BuiltinOperator_CUSTOM) {
154 const char* const custom_name = op_reg.custom_name;
155 return custom_name ? custom_name : kUnknownCustomOpName;
156 }
157 if (op_reg.builtin_code == tflite::BuiltinOperator_DELEGATE &&
158 op_reg.custom_name) {
159 return op_reg.custom_name;
160 }
161 return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code];
162 }
163
164 } // namespace
165
166 // A trivial implementation of GraphInfo around the Interpreter.
167 // NOTE: this interpreter info represents the subset of the
168 // graph that is executed according to execution plan. Thus,
169 // the indices are execution plan indices rather than raw node
170 // indices.
171 class InterpreterInfo : public GraphInfo {
172 public:
InterpreterInfo(Subgraph * subgraph)173 explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {}
174
num_tensors() const175 size_t num_tensors() const override { return subgraph_->tensors_size(); }
tensor(size_t index)176 TfLiteTensor* tensor(size_t index) override {
177 return subgraph_->tensor(index);
178 }
num_execution_nodes() const179 size_t num_execution_nodes() const override {
180 return subgraph_->execution_plan().size();
181 }
num_total_nodes() const182 size_t num_total_nodes() const override { return subgraph_->nodes_size(); }
node(size_t index) const183 const TfLiteNode& node(size_t index) const override {
184 int node_index = subgraph_->execution_plan()[index];
185 return subgraph_->nodes_and_registration()[node_index].first;
186 }
node_index(size_t index) const187 size_t node_index(size_t index) const override {
188 return subgraph_->execution_plan()[index];
189 }
inputs() const190 const std::vector<int>& inputs() const override {
191 return subgraph_->inputs();
192 }
outputs() const193 const std::vector<int>& outputs() const override {
194 return subgraph_->outputs();
195 }
variables() const196 const std::vector<int>& variables() const override {
197 return subgraph_->variables();
198 }
199
200 public:
201 Subgraph* subgraph_;
202 };
203
Subgraph(ErrorReporter * error_reporter,TfLiteExternalContext ** external_contexts,std::vector<std::unique_ptr<Subgraph>> * subgraphs,resource::ResourceMap * resources,resource::ResourceIDMap * resource_ids,resource::InitializationStatusMap * initialization_status_map)204 Subgraph::Subgraph(ErrorReporter* error_reporter,
205 TfLiteExternalContext** external_contexts,
206 std::vector<std::unique_ptr<Subgraph>>* subgraphs,
207 resource::ResourceMap* resources,
208 resource::ResourceIDMap* resource_ids,
209 resource::InitializationStatusMap* initialization_status_map)
210 : external_contexts_(external_contexts),
211 error_reporter_(error_reporter),
212 next_execution_plan_index_to_prepare_(0),
213 next_execution_plan_index_to_plan_allocation_(0),
214 subgraphs_(subgraphs),
215 resources_(resources),
216 resource_ids_(resource_ids),
217 initialization_status_map_(initialization_status_map) {
218 // TODO(b/161272052): Consider a better TfLiteContext initialization pattern:
219 context_.impl_ = static_cast<void*>(this);
220 context_.ResizeTensor = ResizeTensor;
221 context_.ReportError = ReportErrorC;
222 context_.AddTensors = AddTensors;
223 context_.tensors = nullptr;
224 context_.tensors_size = 0;
225 context_.allow_fp32_relax_to_fp16 = false;
226 context_.recommended_num_threads = -1;
227 context_.GetExternalContext = GetExternalContext;
228 context_.SetExternalContext = SetExternalContext;
229 context_.profiler = nullptr;
230 context_.GetTensor = nullptr;
231 context_.GetEvalTensor = nullptr;
232 context_.GetModelMetadata = GetModelMetadata;
233
234 // Reserve some space for the tensors to avoid excessive resizing.
235 tensors_.reserve(kTensorsReservedCapacity);
236 nodes_and_registration_.reserve(kTensorsReservedCapacity);
237 // Invalid to call these except from TfLiteDelegate
238 SwitchToKernelContext();
239 }
240
~Subgraph()241 Subgraph::~Subgraph() {
242 for (int node_index = 0; node_index < nodes_and_registration_.size();
243 ++node_index) {
244 CleanupNode(node_index);
245 }
246
247 for (size_t i = 0; i < context_.tensors_size; i++) {
248 TfLiteTensor* tensor = &context_.tensors[i];
249 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
250 tensor->delegate->FreeBufferHandle != nullptr) {
251 tensor->delegate->FreeBufferHandle(&context_, tensor->delegate,
252 &tensor->buffer_handle);
253 }
254 TfLiteTensorFree(tensor);
255 }
256 }
257
CleanupNode(int node_index)258 void Subgraph::CleanupNode(int node_index) {
259 TfLiteNode& node = nodes_and_registration_[node_index].first;
260 const TfLiteRegistration& registration =
261 nodes_and_registration_[node_index].second;
262 TfLiteIntArrayFree(node.inputs);
263 TfLiteIntArrayFree(node.outputs);
264 TfLiteIntArrayFree(node.temporaries);
265 TfLiteIntArrayFree(node.intermediates);
266 if (node.builtin_data) free(node.builtin_data);
267 OpFree(registration, node.user_data);
268 node.builtin_data = nullptr;
269 }
270
ReplaceNodeSubsetsWithDelegateKernels(TfLiteContext * context,TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)271 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
272 TfLiteContext* context, TfLiteRegistration registration,
273 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
274 return static_cast<Subgraph*>(context->impl_)
275 ->ReplaceNodeSubsetsWithDelegateKernels(registration, nodes_to_replace,
276 delegate);
277 }
278
279 namespace {
280
281 // Copy a std::vector<int> to an existing TfLiteIntArray.
282 // This is a low-level data manipulation function, and it's caller's
283 // responsibility to ensure TfLiteIntArray has enough size.
CopyVectorToTfLiteIntArray(const std::vector<int> & vec,TfLiteIntArray * arr)284 void CopyVectorToTfLiteIntArray(const std::vector<int>& vec,
285 TfLiteIntArray* arr) {
286 arr->size = vec.size();
287 memcpy(arr->data, vec.data(), sizeof(int) * arr->size);
288 }
289
290 // This function allocates a continuous memory space that contains a
291 // TfLiteDelegateParams followed by a several TfLiteIntArray.
292 // When calling `free` at TfLiteDelegateParams*, all the allocated space
293 // will be freed together.
294 //
295 // +-----------------------------------+
296 // | TfLiteDelegateParams |
297 // | TfLiteDelegate* delegate; |
298 // | TfLiteIntArray* nodes_to_replace; |--\
299 // | TfLiteIntArray* input_tensors; |--+--\
300 // | TfLiteIntArray* output_tensors; |--+--+--\
301 // +-----------------------------------+ | | |
302 // | TfLiteIntArray (variable size) |<-/ | |
303 // +-----------------------------------+ | |
304 // | TfLiteIntArray (variable size) |<----/ |
305 // +-----------------------------------+ |
306 // | TfLiteIntArray (variable size) |<-------/
307 // +-----------------------------------+
CreateDelegateParams(TfLiteDelegate * delegate,const NodeSubset & node_subset)308 TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate,
309 const NodeSubset& node_subset) {
310 // Step 1: Calculate the allocation size.
311 int allocation_size = sizeof(TfLiteDelegateParams);
312
313 int nodes_to_replace_size =
314 TfLiteIntArrayGetSizeInBytes(node_subset.nodes.size());
315 allocation_size += nodes_to_replace_size;
316
317 int input_tensors_size =
318 TfLiteIntArrayGetSizeInBytes(node_subset.input_tensors.size());
319 allocation_size += input_tensors_size;
320
321 int output_tensors_size =
322 TfLiteIntArrayGetSizeInBytes(node_subset.output_tensors.size());
323 allocation_size += output_tensors_size;
324
325 // Step 2: Allocate the memory.
326 // Use `char*` for conveniently step through the allocated space by bytes.
327 char* allocation = static_cast<char*>(malloc(allocation_size));
328
329 // Step 3: Fill all data structures.
330 TfLiteDelegateParams* params =
331 reinterpret_cast<TfLiteDelegateParams*>(allocation);
332 params->delegate = delegate;
333 allocation += sizeof(TfLiteDelegateParams);
334
335 params->nodes_to_replace = reinterpret_cast<TfLiteIntArray*>(allocation);
336 CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
337 allocation += nodes_to_replace_size;
338
339 params->input_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
340 CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
341 allocation += input_tensors_size;
342
343 params->output_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
344 CopyVectorToTfLiteIntArray(node_subset.output_tensors,
345 params->output_tensors);
346 allocation += output_tensors_size;
347
348 return params;
349 }
350
351 // Assumes that params is not nullptr.
PopulatePreviewDelegateParams(const NodeSubset & node_subset,TfLiteDelegateParams * params)352 void PopulatePreviewDelegateParams(const NodeSubset& node_subset,
353 TfLiteDelegateParams* params) {
354 // Since these params are used for previewing partitioning, params->delegate
355 // is not required.
356 params->delegate = nullptr;
357
358 params->nodes_to_replace = TfLiteIntArrayCreate(node_subset.nodes.size());
359 CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
360
361 params->input_tensors =
362 TfLiteIntArrayCreate(node_subset.input_tensors.size());
363 CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
364
365 params->output_tensors =
366 TfLiteIntArrayCreate(node_subset.output_tensors.size());
367 CopyVectorToTfLiteIntArray(node_subset.output_tensors,
368 params->output_tensors);
369 }
370
371 } // namespace
372
ReplaceNodeSubsetsWithDelegateKernels(TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)373 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
374 TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace,
375 TfLiteDelegate* delegate) {
376 // Ignore empty node replacement sets.
377 if (!nodes_to_replace->size) {
378 return kTfLiteOk;
379 }
380
381 // Annotate the registration as DELEGATE op.
382 registration.builtin_code = BuiltinOperator_DELEGATE;
383
384 // Analyze the graph to find all independent node_subsets that are either
385 // fully not-this-delegate or this-delegate computation.
386 InterpreterInfo info(this);
387 std::vector<NodeSubset> node_subsets;
388 PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
389 &node_subsets);
390
391 #ifdef __ANDROID__
392 // On Android the log message below is used for diagnosing delegation success
393 // also in production builds. Delegation happens sufficiently rarely that the
394 // message isn't spammy.
395 TFLITE_LOG_PROD(
396 tflite::TFLITE_LOG_INFO,
397 "Replacing %d node(s) with delegate (%s) node, yielding %zu partitions.",
398 nodes_to_replace->size,
399 registration.custom_name ? registration.custom_name : "unknown",
400 node_subsets.size());
401 #else // !__ANDROID__
402 // Server-side, delegation may happen so often as to make logging spammy + we
403 // don't have a clear need for the diagnostic in production builds.
404 TFLITE_LOG(
405 tflite::TFLITE_LOG_INFO,
406 "Replacing %d node(s) with delegate (%s) node, yielding %zu partitions.",
407 nodes_to_replace->size,
408 registration.custom_name ? registration.custom_name : "unknown",
409 node_subsets.size());
410 #endif // __ANDROID__
411
412 execution_plan_.clear();
413
414 for (auto& node_subset : node_subsets) {
415 // Subsets claimed by the delegate should have a "macro" op created, the
416 // other node_subsets (kTfNonPartition) just have their nodes added back to
417 // the execution plan.
418 switch (node_subset.type) {
419 case NodeSubset::kTfNonPartition:
420 for (auto it = node_subset.nodes.begin(); it != node_subset.nodes.end();
421 ++it) {
422 execution_plan_.push_back(*it);
423 }
424 break;
425 case NodeSubset::kTfPartition: {
426 int node_index;
427
428 TfLiteDelegateParams* params =
429 CreateDelegateParams(delegate, node_subset);
430 TF_LITE_ENSURE_STATUS(AddNodeWithParameters(
431 node_subset.input_tensors, node_subset.output_tensors, {}, nullptr,
432 0, params, ®istration, &node_index));
433
434 // Initialize the output tensors's delegate-related fields.
435 for (int tensor_index : node_subset.output_tensors) {
436 TfLiteTensor* tensor = &tensors_[tensor_index];
437 TF_LITE_ENSURE(&context_, tensor->delegate == nullptr ||
438 tensor->delegate == delegate);
439 tensor->delegate = delegate;
440 }
441
442 // Associate the node with the delegate.
443 TfLiteNode* node = &nodes_and_registration_[node_index].first;
444 node->delegate = delegate;
445 } break;
446 case NodeSubset::kTfUnexplored:
447 return kTfLiteError;
448 break;
449 }
450 }
451 return kTfLiteOk;
452 }
453
GetExternalContext(TfLiteExternalContextType type)454 TfLiteExternalContext* Subgraph::GetExternalContext(
455 TfLiteExternalContextType type) {
456 if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
457 return external_contexts_[type];
458 }
459 return nullptr;
460 }
461
GetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type)462 TfLiteExternalContext* Subgraph::GetExternalContext(
463 struct TfLiteContext* context, TfLiteExternalContextType type) {
464 return static_cast<Subgraph*>(context->impl_)->GetExternalContext(type);
465 }
466
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)467 void Subgraph::SetExternalContext(TfLiteExternalContextType type,
468 TfLiteExternalContext* ctx) {
469 if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
470 external_contexts_[type] = ctx;
471 }
472 }
473
SetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type,TfLiteExternalContext * ctx)474 void Subgraph::SetExternalContext(struct TfLiteContext* context,
475 TfLiteExternalContextType type,
476 TfLiteExternalContext* ctx) {
477 return static_cast<Subgraph*>(context->impl_)->SetExternalContext(type, ctx);
478 }
479
480 // Gets an TfLiteIntArray* representing the execution plan. The interpreter owns
481 // this memory and it is only guaranteed to exist during the invocation of the
482 // delegate prepare.
GetExecutionPlan(TfLiteIntArray ** execution_plan)483 TfLiteStatus Subgraph::GetExecutionPlan(TfLiteIntArray** execution_plan) {
484 plan_cache_.reset(TfLiteIntArrayCreate(execution_plan_.size()));
485 *execution_plan = plan_cache_.get();
486 static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]),
487 "TfLiteIntArray and execution_plan do not contain same type.");
488 std::memcpy(plan_cache_->data, execution_plan_.data(),
489 sizeof(plan_cache_->data[0]) * execution_plan_.size());
490 return kTfLiteOk;
491 }
492
493 // WARNING: This is an experimental interface that is subject to change.
494 // Entry point for C node plugin API to get the execution plan
GetExecutionPlan(struct TfLiteContext * context,TfLiteIntArray ** execution_plan)495 TfLiteStatus Subgraph::GetExecutionPlan(struct TfLiteContext* context,
496 TfLiteIntArray** execution_plan) {
497 return static_cast<Subgraph*>(context->impl_)
498 ->GetExecutionPlan(execution_plan);
499 }
500
FreeDelegatePartitioningData()501 void Subgraph::FreeDelegatePartitioningData() {
502 for (auto& params : partitioning_preview_cache_) {
503 TfLiteIntArrayFree(params.nodes_to_replace);
504 TfLiteIntArrayFree(params.input_tensors);
505 TfLiteIntArrayFree(params.output_tensors);
506 }
507 partitioning_preview_cache_.clear();
508 }
509
GetModelMetadata(const char * name,const char ** ptr,size_t * bytes)510 TfLiteStatus Subgraph::GetModelMetadata(const char* name, const char** ptr,
511 size_t* bytes) {
512 TF_LITE_ENSURE(&context_, ptr != nullptr);
513 TF_LITE_ENSURE(&context_, bytes != nullptr);
514 *ptr = nullptr;
515 *bytes = 0;
516 if (!metadata_) return kTfLiteError;
517 const std::string name_str = name;
518 auto itr = metadata_->find(name_str);
519 if (itr != metadata_->end()) {
520 *ptr = itr->second.c_str();
521 *bytes = itr->second.size();
522 return kTfLiteOk;
523 }
524 return kTfLiteError;
525 }
526
GetModelMetadata(const struct TfLiteContext * context,const char * name,const char ** ptr,size_t * bytes)527 TfLiteStatus Subgraph::GetModelMetadata(const struct TfLiteContext* context,
528 const char* name, const char** ptr,
529 size_t* bytes) {
530 return static_cast<Subgraph*>(context->impl_)
531 ->GetModelMetadata(name, ptr, bytes);
532 }
533
PreviewDelegatePartitioning(const TfLiteIntArray * nodes_to_replace,TfLiteDelegateParams ** partition_params_array,int * num_partitions)534 TfLiteStatus Subgraph::PreviewDelegatePartitioning(
535 const TfLiteIntArray* nodes_to_replace,
536 TfLiteDelegateParams** partition_params_array, int* num_partitions) {
537 // Ensure partitioning cache is empty.
538 FreeDelegatePartitioningData();
539 // Defaults.
540 if (!partition_params_array || !num_partitions) return kTfLiteError;
541 *partition_params_array = nullptr;
542 *num_partitions = 0;
543 if (!nodes_to_replace->size) {
544 return kTfLiteOk;
545 }
546
547 // Partition the execution plan into node subsets.
548 InterpreterInfo info(this);
549 std::vector<NodeSubset> node_subsets;
550 PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
551 &node_subsets);
552
553 // Create one TfLiteDelegateParams per node-subset which would be delegated.
554 for (auto& node_subset : node_subsets) {
555 if (node_subset.type != NodeSubset::kTfPartition) {
556 continue;
557 }
558 partitioning_preview_cache_.emplace_back();
559 PopulatePreviewDelegateParams(node_subset,
560 &partitioning_preview_cache_.back());
561 ++*num_partitions;
562 }
563
564 *partition_params_array = partitioning_preview_cache_.data();
565 return kTfLiteOk;
566 }
567
PreviewDelegatePartitioning(struct TfLiteContext * context,const TfLiteIntArray * nodes_to_replace,TfLiteDelegateParams ** partition_params_array,int * num_partitions)568 TfLiteStatus Subgraph::PreviewDelegatePartitioning(
569 struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
570 TfLiteDelegateParams** partition_params_array, int* num_partitions) {
571 return static_cast<Subgraph*>(context->impl_)
572 ->PreviewDelegatePartitioning(nodes_to_replace, partition_params_array,
573 num_partitions);
574 }
575
SetInputs(std::vector<int> inputs)576 TfLiteStatus Subgraph::SetInputs(std::vector<int> inputs) {
577 TF_LITE_ENSURE_OK(&context_,
578 CheckTensorIndices("inputs", inputs.data(), inputs.size()));
579 inputs_ = std::move(inputs);
580 return kTfLiteOk;
581 }
582
SetOutputs(std::vector<int> outputs)583 TfLiteStatus Subgraph::SetOutputs(std::vector<int> outputs) {
584 TF_LITE_ENSURE_OK(
585 &context_, CheckTensorIndices("outputs", outputs.data(), outputs.size()));
586 outputs_ = std::move(outputs);
587 return kTfLiteOk;
588 }
589
SetVariables(std::vector<int> variables)590 TfLiteStatus Subgraph::SetVariables(std::vector<int> variables) {
591 TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("variables", variables.data(),
592 variables.size()));
593 variables_ = std::move(variables);
594 return kTfLiteOk;
595 }
596
SetMetadata(const std::map<std::string,std::string> * metadata)597 TfLiteStatus Subgraph::SetMetadata(
598 const std::map<std::string, std::string>* metadata) {
599 metadata_ = metadata;
600 // TODO(b/188185962): Set context_.allow_fp32_relax_to_fp16 based on metadata.
601 return kTfLiteOk;
602 }
603
SetCancellationFunction(void * data,bool (* check_cancelled_func)(void *))604 void Subgraph::SetCancellationFunction(void* data,
605 bool (*check_cancelled_func)(void*)) {
606 cancellation_data_ = data;
607 check_cancelled_func_ = check_cancelled_func;
608 }
609
IsCancelled()610 bool Subgraph::IsCancelled() {
611 return (check_cancelled_func_ != nullptr) &&
612 (*check_cancelled_func_)(cancellation_data_);
613 }
614
ReserveNodes(int count)615 void Subgraph::ReserveNodes(int count) {
616 nodes_and_registration_.reserve(count);
617 }
618
CheckTensorIndices(const char * label,const int * indices,int length)619 TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
620 int length) {
621 // Making sure kTfLiteOptionalTensor is not re-defined to something other than
622 // -1.
623 static_assert(kTfLiteOptionalTensor == -1,
624 "kTfLiteOptionalTensor should be defined -1");
625
626 for (int i = 0; i < length; i++) {
627 int index = indices[i];
628 // Continue if index == kTfLiteOptionalTensor before additional comparisons
629 // below, size_t(-1) is always >= context_tensors_size.
630 if (index == kTfLiteOptionalTensor) {
631 continue;
632 }
633 if (index < 0 || static_cast<size_t>(index) >= context_.tensors_size) {
634 ReportError(
635 "Invalid tensor index %d in %s. The subgraph has %d tensors\n", index,
636 label, context_.tensors_size);
637 consistent_ = false;
638 return kTfLiteError;
639 }
640 }
641 return kTfLiteOk;
642 }
643
644 // We have two arrays and we need to check that elements from one array don't
645 // show up in the other. We could sort both arrays and then iterate with two
646 // pointers from start to finish always increasing the smaller one but since
647 // these arrays are usually short (<25 elements for inputs, usually <3 for
648 // outputs), this might be slower than the naive approach (if arrays have size n
649 // and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is
650 // O(n)). Plus, sorting the input and output arrays might not be something we
651 // want as it destroys ordering of elements.
652 //
653 // If it turns out that this is an issue, we can switch to the other algorithm.
CheckInputAndOutputForOverlap(const int * input_indices,int num_inputs,const int * output_indices,int num_outputs)654 TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices,
655 int num_inputs,
656 const int* output_indices,
657 int num_outputs) {
658 for (int i = 0; i < num_inputs; i++) {
659 for (int j = 0; j < num_outputs; j++) {
660 if (input_indices[i] == output_indices[j]) {
661 ReportError("Tensor %d is both input %d and output %d\n",
662 input_indices[i], i, j);
663 consistent_ = false;
664 return kTfLiteError;
665 }
666 }
667 }
668 return kTfLiteOk;
669 }
670
671 namespace {
672 // Multiply two sizes and return true if overflow occurred;
673 // This is based off tensorflow/overflow.h but is simpler as we already
674 // have unsigned numbers. It is also generalized to work where sizeof(size_t)
675 // is not 8.
MultiplyAndCheckOverflow(size_t a,size_t b,size_t * product)676 TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product) {
677 // Multiplying a * b where a and b are size_t cannot result in overflow in a
678 // size_t accumulator if both numbers have no non-zero bits in their upper
679 // half.
680 constexpr size_t size_t_bits = 8 * sizeof(size_t);
681 constexpr size_t overflow_upper_half_bit_position = size_t_bits / 2;
682 *product = a * b;
683 // If neither integers have non-zero bits past 32 bits can't overflow.
684 // Otherwise check using slow devision.
685 if (TFLITE_EXPECT_FALSE((a | b) >> overflow_upper_half_bit_position != 0)) {
686 if (a != 0 && *product / a != b) return kTfLiteError;
687 }
688 return kTfLiteOk;
689 }
690 } // namespace
691
BytesRequired(TfLiteType type,const int * dims,size_t dims_size,size_t * bytes)692 TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
693 size_t dims_size, size_t* bytes) {
694 TF_LITE_ENSURE(&context_, bytes != nullptr);
695 // When 'dims_size' is 0, we simply assume it's a scalar. Therefore, we start
696 // 'count' as 1.
697 size_t count = 1;
698 for (int k = 0; k < dims_size; k++) {
699 size_t old_count = count;
700 TF_LITE_ENSURE_MSG(
701 &context_,
702 MultiplyAndCheckOverflow(old_count, dims[k], &count) == kTfLiteOk,
703 "BytesRequired number of elements overflowed.\n");
704 }
705 size_t type_size = 0;
706 TF_LITE_ENSURE_OK(&context_, GetSizeOfType(&context_, type, &type_size));
707 TF_LITE_ENSURE_MSG(
708 &context_, MultiplyAndCheckOverflow(type_size, count, bytes) == kTfLiteOk,
709 "BytesRequired number of bytes overflowed.\n");
710 return kTfLiteOk;
711 }
712
AllocateTensors()713 TfLiteStatus Subgraph::AllocateTensors() {
714 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "AllocateTensors");
715 if (!consistent_) {
716 ReportError("AllocateTensors() called on inconsistent model.");
717 return kTfLiteError;
718 }
719
720 // Restore delegation state if applicable.
721 TF_LITE_ENSURE_STATUS(RedoAllDelegates());
722
723 // Explicit (re)allocation is necessary if nodes have been changed or tensors
724 // have been resized. For inputs marked as dynamic, we can't short-circuit the
725 // allocation as the client may have done the resize manually.
726 if (state_ != kStateUninvokable &&
727 !HasDynamicTensorImpl(context_, inputs())) {
728 if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
729 // If the only change was the release of non-persistent memory via
730 // ReleaseNonPersistentMemory(), just re-allocate it. For any other type
731 // of memory-planning change (for eg, ResizeInputTensor), the state would
732 // be kStateUninvokable.
733 memory_planner_->AcquireNonPersistentMemory();
734 }
735 return kTfLiteOk;
736 }
737
738 next_execution_plan_index_to_prepare_ = 0;
739 next_execution_plan_index_to_plan_allocation_ = 0;
740 next_original_execution_plan_index_to_prepare_ = 0;
741 if (memory_planner_) {
742 TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
743 }
744
745 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
746
747 state_ = kStateInvokable;
748
749 // Reset the variable tensors to zero after (re)allocating the tensors.
750 // Developers shouldn't rely on the side effect of this function to reset
751 // variable tensors. They should call `ResetVariableTensors` directly
752 // instead.
753 ResetVariableTensors();
754
755 return kTfLiteOk;
756 }
757
758 // TODO(b/115961645): Support non-zero default values.
ResetVariableTensors()759 TfLiteStatus Subgraph::ResetVariableTensors() {
760 for (auto& tensor : tensors_) {
761 if (!tensor.is_variable) {
762 continue;
763 }
764
765 if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
766 // If variable tensors allocation type is `kTfLiteArenaRwPersistent`, then
767 // they must be allocated after the initial `PrepareOpsAndTensors()` is
768 // called.
769 TF_LITE_ENSURE(&context_, tensor.data.raw != nullptr);
770 tflite::ResetVariableTensor(&tensor);
771 } else {
772 // If variable tensors allocation type is not `kTfLiteArenaRwPersistent`,
773 // then it can only be `kTfLiteCustom` in which case, we do not reset it.
774 TF_LITE_ENSURE_EQ(&context_, tensor.allocation_type, kTfLiteCustom);
775 }
776 }
777 return kTfLiteOk;
778 }
779
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const std::vector<int> & intermediates,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)780 TfLiteStatus Subgraph::AddNodeWithParameters(
781 const std::vector<int>& inputs, const std::vector<int>& outputs,
782 const std::vector<int>& intermediates, const char* init_data,
783 size_t init_data_size, void* builtin_data,
784 const TfLiteRegistration* registration, int* node_index) {
785 std::unique_ptr<void, decltype(free)*> builtin_data_deleter(builtin_data,
786 free);
787 if (state_ == kStateInvokableAndImmutable) {
788 ReportError("AddNodeWithParameters is disallowed when graph is immutable.");
789 return kTfLiteError;
790 }
791 state_ = kStateUninvokable;
792
793 TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("node inputs", inputs.data(),
794 inputs.size()));
795 TF_LITE_ENSURE_OK(
796 &context_,
797 CheckTensorIndices("node outputs", outputs.data(), outputs.size()));
798
799 // For builtin ops, inputs and outputs must not overlap. Custom ops must do
800 // this check by themselves if they don't support overlapping tensors. This
801 // distinction is to allow custom ops to just forward a tensor, reusing it as
802 // both input and output.
803 if (builtin_data != nullptr) {
804 TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap(
805 inputs.data(), inputs.size(),
806 outputs.data(), outputs.size()));
807 }
808
809 int new_node_index = nodes_and_registration_.size();
810 if (node_index) *node_index = new_node_index;
811 nodes_and_registration_.emplace_back();
812 auto& node_and_reg = nodes_and_registration_.back();
813 TfLiteNode& node = node_and_reg.first;
814
815 // NOTE, here we are not using move semantics yet, since our internal
816 // representation isn't std::vector, but in the future we would like to avoid
817 // copies, so we want the interface to take r-value references now.
818 node.inputs = ConvertVectorToTfLiteIntArray(inputs);
819 node.outputs = ConvertVectorToTfLiteIntArray(outputs);
820 node.intermediates = ConvertVectorToTfLiteIntArray(intermediates);
821 node.temporaries = TfLiteIntArrayCreate(0);
822 if (init_data) {
823 node.user_data = OpInit(*registration, init_data, init_data_size);
824 } else {
825 node.user_data = OpInit(
826 *registration, static_cast<const char*>(builtin_data_deleter.get()), 0);
827 }
828
829 node.builtin_data = builtin_data_deleter.release();
830
831 if (registration->builtin_code == BuiltinOperator_CUSTOM) {
832 // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer
833 // `Operator` table is passed in.
834 node.custom_initial_data = init_data;
835 node.custom_initial_data_size = init_data_size;
836 } else {
837 node.custom_initial_data = nullptr;
838 node.custom_initial_data_size = 0;
839 }
840 node.might_have_side_effect = OpMightHaveSideEffect(&node, registration);
841
842 node.delegate = nullptr;
843 // Copying of registration is required to support unresolved custom ops.
844 node_and_reg.second = *registration;
845 execution_plan_.push_back(new_node_index);
846 return kTfLiteOk;
847 }
848
849 namespace {
850 // Returns true if any tensor identified by indexes in 'tensor_indexes' is
851 // of type 'kTfLiteResource'. False otherwise.
AnyTensorOfTypeResource(const std::vector<TfLiteTensor> & tensors,const TfLiteIntArray * tensor_indexes)852 bool AnyTensorOfTypeResource(const std::vector<TfLiteTensor>& tensors,
853 const TfLiteIntArray* tensor_indexes) {
854 for (int i = 0; i < tensor_indexes->size; ++i) {
855 int tensor_index = tensor_indexes->data[i];
856 if (tensor_index >= 0 && tensor_index < tensors.size() &&
857 tensors[tensor_index].type == kTfLiteResource)
858 return true;
859 }
860 return false;
861 }
862
863 } // namespace
864
OpMightHaveSideEffect(const TfLiteNode * node,const TfLiteRegistration * registration) const865 bool Subgraph::OpMightHaveSideEffect(
866 const TfLiteNode* node, const TfLiteRegistration* registration) const {
867 // Check if any of the input tensors are of type resource.
868 if (AnyTensorOfTypeResource(tensors_, node->inputs)) return true;
869 // Check if any of the output tensors are of type resource.
870 if (AnyTensorOfTypeResource(tensors_, node->outputs)) return true;
871 // Consider control flow ops has side effect, some ops in the control flow
872 // subgraph can have side effect.
873 if (registration->builtin_code == kTfLiteBuiltinIf ||
874 registration->builtin_code == kTfLiteBuiltinWhile ||
875 registration->builtin_code == kTfLiteBuiltinCallOnce)
876 return true;
877 return false;
878 }
879
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)880 TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index,
881 const std::vector<int>& dims) {
882 const bool delegates_applied = !pre_delegation_execution_plan_.empty();
883 const bool graph_is_immutable = state_ == kStateInvokableAndImmutable;
884 if (graph_is_immutable && !delegates_applied) {
885 ReportError("ResizeInputTensor is disallowed when graph is immutable.");
886 return kTfLiteError;
887 }
888
889 TF_LITE_ENSURE(&context_,
890 tensor_index < context_.tensors_size && tensor_index >= 0);
891 TfLiteTensor* tensor = &context_.tensors[tensor_index];
892
893 // Short-circuit the state change if the dimensions don't change, avoiding
894 // unnecessary (re)allocations.
895 //
896 // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
897 // the subgraph won't allocate memory for a dynamic tensor when its size
898 // is equal to the original tensor size.
899 if (tensor->data.raw != nullptr &&
900 EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
901 return kTfLiteOk;
902 }
903
904 if (graph_is_immutable) {
905 // Undo delegation if it resulted in the graph being immutable.
906 TF_LITE_ENSURE_STATUS(UndoAllDelegates());
907 }
908 state_ = kStateUninvokable;
909 return ResizeTensorImpl(tensor, ConvertVectorToTfLiteIntArray(dims));
910 }
911
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)912 TfLiteStatus Subgraph::ResizeInputTensorStrict(int tensor_index,
913 const std::vector<int>& dims) {
914 TF_LITE_ENSURE(&context_,
915 tensor_index < context_.tensors_size && tensor_index >= 0);
916 TfLiteTensor* tensor = &context_.tensors[tensor_index];
917
918 // Ensure that only unknown dimensions can be resized.
919 TF_LITE_ENSURE_EQ(&context_, tensor->dims->size, dims.size());
920 for (size_t idx = 0; idx < dims.size(); idx++) {
921 // `dims_signature` is not defined when no unknown dimensions are present.
922 int dim_signature;
923 if (tensor->dims_signature && tensor->dims_signature->size) {
924 dim_signature = tensor->dims_signature->data[idx];
925 } else {
926 dim_signature = tensor->dims->data[idx];
927 }
928
929 if (dim_signature != -1 && dim_signature != dims[idx]) {
930 ReportError(
931 "Attempting to resize dimension %d of tensor %d with value %d to %d. "
932 "ResizeInputTensorStrict only allows mutating unknown dimensions "
933 "identified by -1.",
934 idx, tensor_index, dim_signature, dims[idx]);
935 return kTfLiteError;
936 }
937 }
938
939 return ResizeInputTensor(tensor_index, dims);
940 }
941
ReleaseNonPersistentMemory()942 TfLiteStatus Subgraph::ReleaseNonPersistentMemory() {
943 if (memory_planner_) {
944 TF_LITE_ENSURE_STATUS(memory_planner_->ReleaseNonPersistentMemory());
945 }
946 return kTfLiteOk;
947 }
948
OpPrepare(const TfLiteRegistration & op_reg,TfLiteNode * node)949 TfLiteStatus Subgraph::OpPrepare(const TfLiteRegistration& op_reg,
950 TfLiteNode* node) {
951 if (op_reg.prepare == nullptr) {
952 // Check if it's an unresolved custom op.
953 if (IsUnresolvedCustomOp(op_reg)) {
954 if (IsFlexOp(op_reg.custom_name)) {
955 ReportError(
956 "Select TensorFlow op(s), included in the given model, is(are) not "
957 "supported by this interpreter. Make sure you apply/link the Flex "
958 "delegate before inference. For the Android, it can be resolved by "
959 "adding \"org.tensorflow:tensorflow-lite-select-tf-ops\" "
960 "dependency. See instructions: "
961 "https://www.tensorflow.org/lite/guide/ops_select");
962 } else {
963 ReportError(
964 "Encountered unresolved custom op: %s.\nSee instructions: "
965 "https://www.tensorflow.org/lite/guide/ops_custom",
966 op_reg.custom_name ? op_reg.custom_name : "UnknownOp");
967 }
968 return kTfLiteError;
969 }
970 // Resolved ops can have a null Prepare function.
971 return kTfLiteOk;
972 }
973 return op_reg.prepare(&context_, node);
974 }
975
PrepareOpsStartingAt(int first_execution_plan_index,const std::vector<int> & execution_plan,int * last_execution_plan_index_prepared)976 TfLiteStatus Subgraph::PrepareOpsStartingAt(
977 int first_execution_plan_index, const std::vector<int>& execution_plan,
978 int* last_execution_plan_index_prepared) {
979 if (first_execution_plan_index == 0) {
980 // Forwarding inputs without modification won't be not evaluated in the
981 // operators. So, it needs to look up the subgraph's output tensors at the
982 // beginning.
983 has_dynamic_tensors_ = HasDynamicTensorImpl(context_, outputs());
984 }
985 for (int execution_plan_index = first_execution_plan_index;
986 execution_plan_index < execution_plan.size(); execution_plan_index++) {
987 int node_index = execution_plan[execution_plan_index];
988 TfLiteNode& node = nodes_and_registration_[node_index].first;
989 const TfLiteRegistration& registration =
990 nodes_and_registration_[node_index].second;
991 EnsureTensorsVectorCapacity();
992 if (OpPrepare(registration, &node) != kTfLiteOk) {
993 return ReportOpError(&context_, node, registration, node_index,
994 "failed to prepare");
995 }
996
997 *last_execution_plan_index_prepared = execution_plan_index;
998
999 // Discontinue if the node has dynamic outputs. Note that we don't
1000 // stop for dynamic temporary tensors since they won't affect the
1001 // sizes of other tensors in the graph.
1002 if (HasDynamicTensor(context_, node.outputs)) {
1003 has_dynamic_tensors_ = true;
1004 return kTfLiteOk;
1005 }
1006 }
1007 return kTfLiteOk;
1008 }
1009
PrepareOpsAndTensors()1010 TfLiteStatus Subgraph::PrepareOpsAndTensors() {
1011 if (!memory_planner_) {
1012 #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
1013 memory_planner_.reset(new SimplePlanner(&context_, CreateGraphInfo()));
1014 #else
1015 memory_planner_.reset(new ArenaPlanner(&context_, CreateGraphInfo(),
1016 preserve_all_tensors_,
1017 kDefaultTensorAlignment));
1018 #endif
1019 memory_planner_->PlanAllocations();
1020 }
1021
1022 // Prepare original execution plan if any applied delegate wants it.
1023 // If any of the delegates is immutable, this won't be triggered
1024 // post-delegation (since we undo/redo delegation). For all other cases, other
1025 // delegates that do shape propagation themselves would still be able to.
1026 bool prepare_original_plan = false;
1027 if (!pre_delegation_execution_plan_.empty()) {
1028 for (int i = 0; i < delegates_applied_.size(); ++i) {
1029 if ((delegates_applied_[i]->flags &
1030 kTfLiteDelegateFlagsRequirePropagatedShapes)) {
1031 prepare_original_plan = true;
1032 break;
1033 }
1034 }
1035 }
1036 if (prepare_original_plan) {
1037 int last_original_exec_plan_index_prepared = 0;
1038 TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1039 next_execution_plan_index_to_prepare_, pre_delegation_execution_plan_,
1040 &last_original_exec_plan_index_prepared));
1041 next_original_execution_plan_index_to_prepare_ =
1042 last_original_exec_plan_index_prepared + 1;
1043 }
1044
1045 int last_exec_plan_index_prepared = 0;
1046 TF_LITE_ENSURE_STATUS(
1047 PrepareOpsStartingAt(next_execution_plan_index_to_prepare_,
1048 execution_plan_, &last_exec_plan_index_prepared));
1049 next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
1050
1051 // Execute arena allocations.
1052 TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
1053 next_execution_plan_index_to_plan_allocation_,
1054 last_exec_plan_index_prepared));
1055
1056 // Ensure custom allocations are large enough for applicable tensors.
1057 // This causes some extra validations for cases with dynamic tensors, but the
1058 // overhead should be minimal since the number of custom-allocated tensors
1059 // will typically be low.
1060 for (int i = 0; i < custom_allocations_.size(); ++i) {
1061 auto index_and_alloc = custom_allocations_[i];
1062 TfLiteTensor* tensor_at_index = tensor(index_and_alloc.first);
1063 const auto& alloc = index_and_alloc.second;
1064 TF_LITE_ENSURE_EQ(context(), tensor_at_index->allocation_type,
1065 kTfLiteCustom);
1066 if (alloc.bytes < tensor_at_index->bytes) {
1067 ReportError("Custom allocation is too small for tensor idx: %d",
1068 index_and_alloc.first);
1069 return kTfLiteError;
1070 }
1071 }
1072
1073 next_execution_plan_index_to_plan_allocation_ =
1074 last_exec_plan_index_prepared + 1;
1075
1076 return kTfLiteOk;
1077 }
1078
Invoke()1079 TfLiteStatus Subgraph::Invoke() {
1080 if (!consistent_) {
1081 ReportError("Invoke called on model that is not consistent.");
1082 return kTfLiteError;
1083 }
1084
1085 TfLiteStatus status = kTfLiteOk;
1086 if (state_ == kStateUninvokable) {
1087 ReportError("Invoke called on model that is not ready.");
1088 return kTfLiteError;
1089 } else if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
1090 ReportError("Non-persistent memory is not available.");
1091 return kTfLiteError;
1092 }
1093 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "Invoke");
1094
1095 // Invocations are always done in node order.
1096 // Note that calling Invoke repeatedly will cause the original memory plan to
1097 // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
1098 // called.
1099 for (int execution_plan_index = 0;
1100 execution_plan_index < execution_plan_.size(); execution_plan_index++) {
1101 if (execution_plan_index == next_execution_plan_index_to_prepare_) {
1102 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
1103 TF_LITE_ENSURE(&context_, next_execution_plan_index_to_prepare_ >=
1104 execution_plan_index);
1105 }
1106 int node_index = execution_plan_[execution_plan_index];
1107 TfLiteNode& node = nodes_and_registration_[node_index].first;
1108 const TfLiteRegistration& registration =
1109 nodes_and_registration_[node_index].second;
1110
1111 const char* op_name = nullptr;
1112 if (profiler_) op_name = GetTFLiteOpName(registration);
1113 TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler_.get(), op_name, node_index);
1114
1115 for (int i = 0; i < node.inputs->size; ++i) {
1116 int tensor_index = node.inputs->data[i];
1117 if (tensor_index == kTfLiteOptionalTensor) {
1118 continue;
1119 }
1120 TfLiteTensor* tensor = &tensors_[tensor_index];
1121 if (tensor->delegate && tensor->delegate != node.delegate &&
1122 tensor->data_is_stale) {
1123 TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
1124 }
1125 if (tensor->data.raw == nullptr && tensor->bytes > 0) {
1126 if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 &&
1127 tensor->dims->size != 1) {
1128 // In general, having a tensor here with no buffer will be an error.
1129 // However, for the reshape operator, the second input tensor is
1130 // sometimes only used for the shape, not for the data. Thus, null
1131 // buffer is ok in this situation.
1132 // The situation where null buffer is not ok for reshape operator is
1133 // only when there are 2 inputs given to the node and the one
1134 // corresponding to the shape (i == 1) is a vector that contains all
1135 // dimensions. See `GetOutputShape()` function in
1136 // `tensorflow/lite/kernels/reshape.cc`
1137 continue;
1138 } else {
1139 // In all other cases, we need to return an error as otherwise we will
1140 // trigger a null pointer dereference (likely).
1141 ReportError("Input tensor %d lacks data", tensor_index);
1142 return kTfLiteError;
1143 }
1144 }
1145 }
1146
1147 if (check_cancelled_func_ != nullptr &&
1148 check_cancelled_func_(cancellation_data_)) {
1149 ReportError("Client requested cancel during Invoke()");
1150 return kTfLiteError;
1151 }
1152
1153 EnsureTensorsVectorCapacity();
1154 tensor_resized_since_op_invoke_ = false;
1155 if (OpInvoke(registration, &node) != kTfLiteOk) {
1156 return ReportOpError(&context_, node, registration, node_index,
1157 "failed to invoke");
1158 }
1159
1160 // Force execution prep for downstream ops if the latest op triggered the
1161 // resize of a dynamic tensor.
1162 if (tensor_resized_since_op_invoke_ &&
1163 HasDynamicTensor(context_, node.outputs)) {
1164 next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
1165
1166 // This happens when an intermediate dynamic tensor is resized.
1167 // We don't have to prepare all the ops, but we need to recompute
1168 // the allocation plan.
1169 if (next_execution_plan_index_to_plan_allocation_ >
1170 next_execution_plan_index_to_prepare_) {
1171 next_execution_plan_index_to_plan_allocation_ =
1172 next_execution_plan_index_to_prepare_;
1173 if (memory_planner_) {
1174 TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocationsAfter(
1175 next_execution_plan_index_to_plan_allocation_ - 1));
1176 }
1177 }
1178 }
1179 }
1180
1181 return status;
1182 }
1183
ResizeTensor(TfLiteContext * context,TfLiteTensor * tensor,TfLiteIntArray * new_size)1184 TfLiteStatus Subgraph::ResizeTensor(TfLiteContext* context,
1185 TfLiteTensor* tensor,
1186 TfLiteIntArray* new_size) {
1187 // If the dimensions don't change, avoiding
1188 // unnecessary (re)allocations.
1189 //
1190 // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
1191 // the subgraph won't allocate memory for a dynamic tensor when its size
1192 // is equal to the original tensor size.
1193 if (tensor->data.raw != nullptr &&
1194 EqualArrayAndTfLiteIntArray(tensor->dims, new_size->size,
1195 new_size->data)) {
1196 // A number of clients assume |new_size| remains valid upon success, so
1197 // swap it in as the new (but logically identical) tensor dims.
1198 TfLiteIntArrayFree(tensor->dims);
1199 tensor->dims = new_size;
1200 return kTfLiteOk;
1201 }
1202
1203 // Note here that context->impl_ is recovering the this pointer for an
1204 // instance of Interpreter to call into the member function ResizeTensorImpl
1205 // (this function is static).
1206 return static_cast<Subgraph*>(context->impl_)
1207 ->ResizeTensorImpl(tensor, new_size);
1208 }
1209
ReportErrorImpl(const char * format,va_list args)1210 void Subgraph::ReportErrorImpl(const char* format, va_list args) {
1211 error_reporter_->Report(format, args);
1212 }
1213
ReportErrorC(TfLiteContext * context,const char * format,...)1214 void Subgraph::ReportErrorC(TfLiteContext* context, const char* format, ...) {
1215 va_list args;
1216 va_start(args, format);
1217 auto* f = static_cast<Subgraph*>(context->impl_);
1218 // Note here that context->impl_ is recovering the this pointer for an
1219 // instance of Subgraph to call into the member function ReportErrorImpl
1220 // (this function is static).
1221 f->ReportErrorImpl(format, args);
1222 va_end(args);
1223 }
1224
1225 // Entry point for C node plugin API to report an error.
ReportError(const char * format,...)1226 void Subgraph::ReportError(const char* format, ...) {
1227 va_list args;
1228 va_start(args, format);
1229 auto* f = static_cast<Subgraph*>(context_.impl_);
1230 // Note here that context->impl_ is recovering the this pointer for an
1231 // instance of Subgraph to call into the member function ReportErrorImpl
1232 // (this function is static).
1233 f->ReportErrorImpl(format, args);
1234 va_end(args);
1235 }
1236
AddTensors(int tensors_to_add,int * first_new_tensor_index)1237 TfLiteStatus Subgraph::AddTensors(int tensors_to_add,
1238 int* first_new_tensor_index) {
1239 const size_t base_index = tensors_.size();
1240 if (first_new_tensor_index) *first_new_tensor_index = base_index;
1241 tensors_.resize(tensors_.size() + tensors_to_add);
1242 for (size_t i = base_index; i < tensors_.size(); i++) {
1243 memset(&tensors_[i], 0, sizeof(tensors_[i]));
1244 tensors_[i].buffer_handle = kTfLiteNullBufferHandle;
1245 }
1246 context_.tensors = tensors_.data();
1247 context_.tensors_size = tensors_.size();
1248 return kTfLiteOk;
1249 }
1250
AddTensors(TfLiteContext * context,int tensors_to_add,int * first_new_tensor_index)1251 TfLiteStatus Subgraph::AddTensors(TfLiteContext* context, int tensors_to_add,
1252 int* first_new_tensor_index) {
1253 // Note here that context->impl_ is recovering the this pointer for an
1254 // instance of Interpreter to call into the member function AddTensors
1255 // (this function is static).
1256 return static_cast<Subgraph*>(context->impl_)
1257 ->AddTensors(tensors_to_add, first_new_tensor_index);
1258 }
1259
GetNodeAndRegistration(int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)1260 TfLiteStatus Subgraph::GetNodeAndRegistration(
1261 int node_index, TfLiteNode** node, TfLiteRegistration** registration) {
1262 TF_LITE_ENSURE(&context_, node_index >= 0);
1263 auto nodes_size = nodes_and_registration_.size();
1264 TF_LITE_ENSURE(&context_, static_cast<size_t>(node_index) < nodes_size);
1265 TF_LITE_ENSURE(&context_, node != nullptr && registration != nullptr);
1266 auto& node_and_reg = nodes_and_registration_[node_index];
1267 *node = &node_and_reg.first;
1268 *registration = &node_and_reg.second;
1269 return kTfLiteOk;
1270 }
1271
GetNodeAndRegistration(struct TfLiteContext * context,int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)1272 TfLiteStatus Subgraph::GetNodeAndRegistration(
1273 struct TfLiteContext* context, int node_index, TfLiteNode** node,
1274 TfLiteRegistration** registration) {
1275 return static_cast<Subgraph*>(context->impl_)
1276 ->GetNodeAndRegistration(node_index, node, registration);
1277 }
1278
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation,TfLiteSparsity * sparsity)1279 TfLiteStatus Subgraph::SetTensorParametersReadOnly(
1280 int tensor_index, TfLiteType type, const char* name, const size_t rank,
1281 const int* dims, TfLiteQuantization quantization, const char* buffer,
1282 size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity) {
1283 // Ensure quantization cleanup on failure.
1284 ScopedTfLiteQuantization scoped_quantization(&quantization);
1285 ScopedTfLiteSparsity scoped_sparsity(sparsity);
1286 if (state_ == kStateInvokableAndImmutable) {
1287 ReportError(
1288 "SetTensorParametersReadOnly is disallowed when graph is immutable.");
1289 return kTfLiteError;
1290 }
1291
1292 TF_LITE_ENSURE(&context_,
1293 tensor_index < context_.tensors_size && tensor_index >= 0);
1294
1295 // For most tensors we know exactly how much memory is necessary so we can
1296 // ensure the buffer is large enough. However, we need to skip string tensors
1297 // and sparse tensors because their sizes change with the contents.
1298 // TODO(b/145615516): Extend BytesRequired to check sparse tensors.
1299 if (type != kTfLiteString && type != kTfLiteResource &&
1300 type != kTfLiteVariant && sparsity == nullptr) {
1301 size_t required_bytes;
1302 TF_LITE_ENSURE_OK(&context_,
1303 BytesRequired(type, dims, rank, &required_bytes));
1304 TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes);
1305 }
1306
1307 TfLiteTensor& tensor = context_.tensors[tensor_index];
1308 if (type == tensor.type &&
1309 EqualArrayAndTfLiteIntArray(tensor.dims, rank, dims)) {
1310 // Fast path which does not invalidate the invokable property.
1311 TfLiteTensorDataFree(&tensor);
1312 TfLiteQuantizationFree(&tensor.quantization);
1313 tensor.data.raw = const_cast<char*>(buffer);
1314 if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(rank, dims);
1315 tensor.params = GetLegacyQuantization(quantization);
1316 tensor.quantization = *scoped_quantization.release();
1317 tensor.sparsity = scoped_sparsity.release();
1318 tensor.allocation_type = kTfLiteMmapRo;
1319 tensor.allocation = allocation;
1320 } else {
1321 state_ = kStateUninvokable;
1322 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
1323 GetLegacyQuantization(quantization),
1324 const_cast<char*>(buffer), bytes, kTfLiteMmapRo,
1325 allocation, false, &tensor);
1326 // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
1327 // if there are other required callers.
1328 tensor.quantization = *scoped_quantization.release();
1329 tensor.sparsity = scoped_sparsity.release();
1330 }
1331 return kTfLiteOk;
1332 }
1333
1334 // Set description of inputs/outputs/data/fptrs for node `node_index`.
1335 // This variant assumes an external buffer has been allocated of size
1336 // bytes. The lifetime of buffer must be ensured to be greater or equal
1337 // to Interpreter.
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,bool is_variable,const size_t rank_dims_signature,const int * dims_signature)1338 TfLiteStatus Subgraph::SetTensorParametersReadWrite(
1339 int tensor_index, TfLiteType type, const char* name, const size_t rank,
1340 const int* dims, TfLiteQuantization quantization, bool is_variable,
1341 const size_t rank_dims_signature, const int* dims_signature) {
1342 // Ensure quantization cleanup on failure.
1343 ScopedTfLiteQuantization scoped_quantization(&quantization);
1344 if (state_ == kStateInvokableAndImmutable) {
1345 ReportError(
1346 "SetTensorParametersReadWrite is disallowed when graph is immutable.");
1347 return kTfLiteError;
1348 }
1349 TF_LITE_ENSURE(&context_,
1350 tensor_index < context_.tensors_size && tensor_index >= 0);
1351 size_t required_bytes = 0;
1352 if (type != kTfLiteString && type != kTfLiteResource &&
1353 type != kTfLiteVariant) {
1354 // These types will be allocated in our arena so we need to record how
1355 // many bytes we will need based on the dimensions. String tensors are
1356 // allocated dynamically and we can't know ahead of time how much space
1357 // they will require.
1358 TF_LITE_ENSURE_OK(&context_,
1359 BytesRequired(type, dims, rank, &required_bytes));
1360 }
1361
1362 TfLiteAllocationType allocation_type = kTfLiteArenaRw;
1363 if (type == kTfLiteString || type == kTfLiteResource ||
1364 type == kTfLiteVariant) {
1365 if (is_variable) {
1366 // We don't have a real use case for string variable tensor.
1367 ReportError("String variable tensor isn't supported.");
1368 return kTfLiteError;
1369 }
1370 allocation_type = kTfLiteDynamic;
1371 } else if (is_variable) {
1372 allocation_type = kTfLiteArenaRwPersistent;
1373 }
1374
1375 TfLiteTensor& tensor = context_.tensors[tensor_index];
1376 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
1377 GetLegacyQuantization(quantization),
1378 /*buffer=*/nullptr, required_bytes, allocation_type,
1379 nullptr, is_variable, &tensor);
1380 // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
1381 // if there are other required callers.
1382 tensor.quantization = *scoped_quantization.release();
1383 tensor.dims_signature =
1384 ConvertArrayToTfLiteIntArray(rank_dims_signature, dims_signature);
1385 return kTfLiteOk;
1386 }
1387
SetExecutionPlan(const std::vector<int> & new_plan)1388 TfLiteStatus Subgraph::SetExecutionPlan(const std::vector<int>& new_plan) {
1389 for (int node_index : new_plan) {
1390 TF_LITE_ENSURE(&context_, node_index >= 0 &&
1391 node_index < nodes_and_registration_.size());
1392 }
1393 execution_plan_ = new_plan;
1394 return kTfLiteOk;
1395 }
1396
ResizeTensorImpl(TfLiteTensor * tensor,TfLiteIntArray * new_size)1397 TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
1398 TfLiteIntArray* new_size) {
1399 // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too.
1400 if (tensor->allocation_type == kTfLiteArenaRw ||
1401 tensor->allocation_type == kTfLiteDynamic ||
1402 tensor->allocation_type == kTfLiteArenaRwPersistent ||
1403 tensor->allocation_type == kTfLitePersistentRo ||
1404 tensor->allocation_type == kTfLiteCustom) {
1405 tensor_resized_since_op_invoke_ |=
1406 TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
1407 if (tensor->type != kTfLiteString && tensor->type != kTfLiteResource &&
1408 tensor->type != kTfLiteVariant) {
1409 size_t bytesRequired;
1410 TfLiteStatus status = BytesRequired(tensor->type, new_size->data,
1411 new_size->size, &bytesRequired);
1412 if (status != kTfLiteOk) {
1413 TfLiteIntArrayFree(new_size);
1414 return kTfLiteError;
1415 }
1416
1417 // Realloc space for heap-allocated tensors.
1418 TfLiteTensorRealloc(bytesRequired, tensor);
1419 tensor->bytes = bytesRequired;
1420 }
1421 if (tensor->dims) TfLiteIntArrayFree(tensor->dims);
1422 tensor->dims = new_size;
1423
1424 // Reset arena-allocated tensors; they will be allocated later.
1425 if (tensor->allocation_type == kTfLiteArenaRw ||
1426 tensor->allocation_type == kTfLiteArenaRwPersistent) {
1427 tensor->data.raw = nullptr;
1428 }
1429 } else {
1430 // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore
1431 // of fixed size.
1432 TfLiteIntArrayFree(new_size);
1433 ReportError("Attempting to resize a fixed-size tensor.");
1434 return kTfLiteError;
1435 }
1436 return kTfLiteOk;
1437 }
1438
SwitchToDelegateContext()1439 void Subgraph::SwitchToDelegateContext() {
1440 context_.GetNodeAndRegistration = GetNodeAndRegistration;
1441 context_.ReplaceNodeSubsetsWithDelegateKernels =
1442 ReplaceNodeSubsetsWithDelegateKernels;
1443 context_.GetExecutionPlan = GetExecutionPlan;
1444 context_.PreviewDelegatePartitioning = PreviewDelegatePartitioning;
1445 }
1446
SwitchToKernelContext()1447 void Subgraph::SwitchToKernelContext() {
1448 context_.GetNodeAndRegistration = [](struct TfLiteContext* context,
1449 int node_index, TfLiteNode** node,
1450 TfLiteRegistration** registration) {
1451 return ForbiddenContextFunction(context);
1452 };
1453 context_.ReplaceNodeSubsetsWithDelegateKernels =
1454 [](TfLiteContext* context, TfLiteRegistration registration,
1455 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
1456 return ForbiddenContextFunction(context);
1457 };
1458 context_.GetExecutionPlan = [](struct TfLiteContext* context,
1459 TfLiteIntArray**) {
1460 return ForbiddenContextFunction(context);
1461 };
1462 context_.PreviewDelegatePartitioning =
1463 [](struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
1464 TfLiteDelegateParams** partition_params_array,
1465 int* num_partitions) { return ForbiddenContextFunction(context); };
1466 // Free any memory that might have been allocated by
1467 // PreviewDelegatePartitioning.
1468 FreeDelegatePartitioningData();
1469 }
1470
UndoAllDelegates()1471 TfLiteStatus Subgraph::UndoAllDelegates() {
1472 // Return early if there is nothing to reset to.
1473 if (pre_delegation_execution_plan_.empty()) return kTfLiteOk;
1474
1475 // First free all delegate nodes.
1476 for (int execution_plan_index = 0;
1477 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1478 int node_index = execution_plan_[execution_plan_index];
1479 TfLiteNode& node = nodes_and_registration_[node_index].first;
1480 if (node.delegate == nullptr) {
1481 continue;
1482 }
1483 CleanupNode(node_index);
1484 }
1485
1486 // Reset execution plan.
1487 execution_plan_ = pre_delegation_execution_plan_;
1488 pre_delegation_execution_plan_.clear();
1489
1490 // Handling FP16 delegation (if applies).
1491 //
1492 // First pass through execution plan to remember mapping of FP16
1493 // dequantizations in the graph.
1494 // This is required because delegates that support FP16 could remap supported
1495 // nodes' inputs to point to their fp16 versions (if delegate supports fp16
1496 // acceleration). This remapping is performed in FP16GraphPartitionHelper in
1497 // delegates/utils. We need to undo this remapping to ensure CPU kernels work.
1498 std::vector<int> fp16_to_fp32(tensors_size(), -1);
1499 for (int execution_plan_index = 0;
1500 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1501 int node_index = execution_plan_[execution_plan_index];
1502 auto& node_and_reg = nodes_and_registration_[node_index];
1503 const TfLiteNode& node = node_and_reg.first;
1504 const TfLiteRegistration& reg = node_and_reg.second;
1505 if (reg.builtin_code == kTfLiteBuiltinDequantize &&
1506 node.inputs->size == 1 && node.outputs->size == 1) {
1507 const int input_idx = node.inputs->data[0];
1508 if (tensors_[input_idx].type == kTfLiteFloat16) {
1509 fp16_to_fp32[input_idx] = node.outputs->data[0];
1510 }
1511 }
1512 }
1513 // Second pass through the execution plan to remap applicable nodes' fp16
1514 // inputs to their original fp32 versions. Note that if a CPU kernel does
1515 // support fp16, the model will not contain a DEQUANTIZE for its constant
1516 // input.
1517 for (int execution_plan_index = 0;
1518 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1519 int node_index = execution_plan_[execution_plan_index];
1520 auto& node_and_reg = nodes_and_registration_[node_index];
1521 const TfLiteNode& node = node_and_reg.first;
1522 const TfLiteRegistration& reg = node_and_reg.second;
1523 if (reg.builtin_code == kTfLiteBuiltinDequantize) continue;
1524 for (int i = 0; i < node.inputs->size; ++i) {
1525 const int original_input_idx = node.inputs->data[i];
1526 if (original_input_idx == kTfLiteOptionalTensor) continue;
1527 if (tensors_[original_input_idx].type == kTfLiteFloat16) {
1528 node.inputs->data[i] = fp16_to_fp32[original_input_idx];
1529 }
1530 }
1531 }
1532
1533 // Delegate nodes are appended to nodes_and_registration_. Therefore,
1534 // cleanup nodes_and_registration_ to only contain nodes from
1535 // pre_delegation_execution_plan_.
1536 int max_retained_node_index = 0;
1537 for (int execution_plan_index = 0;
1538 execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1539 max_retained_node_index = std::max(max_retained_node_index,
1540 execution_plan_[execution_plan_index]);
1541 }
1542 nodes_and_registration_.resize(max_retained_node_index + 1);
1543 // After undoing delegates, the graph is uninvokable, but mutable.
1544 state_ = kStateUninvokable;
1545
1546 delegates_undone_ = true;
1547 return kTfLiteOk;
1548 }
1549
RedoAllDelegates()1550 TfLiteStatus Subgraph::RedoAllDelegates() {
1551 if (!delegates_undone_) return kTfLiteOk;
1552
1553 delegates_undone_ = false;
1554 std::vector<TfLiteDelegate*> delegates_to_apply;
1555 delegates_applied_.swap(delegates_to_apply);
1556 for (auto* delegate : delegates_to_apply) {
1557 TF_LITE_ENSURE_STATUS(ModifyGraphWithDelegate(delegate));
1558 }
1559 return kTfLiteOk;
1560 }
1561
RemoveAllDelegates()1562 TfLiteStatus Subgraph::RemoveAllDelegates() {
1563 TF_LITE_ENSURE_STATUS(UndoAllDelegates());
1564 delegates_applied_.clear();
1565 delegates_undone_ = false;
1566 TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1567 return kTfLiteOk;
1568 }
1569
HasDelegates()1570 bool Subgraph::HasDelegates() { return !delegates_applied_.empty(); }
1571
EnsureTensorsVectorCapacity()1572 void Subgraph::EnsureTensorsVectorCapacity() {
1573 const size_t required_capacity = tensors_.size() + kTensorsCapacityHeadroom;
1574 if (required_capacity > tensors_.capacity()) {
1575 // Whenever it's required to increase the vector capacity, make it at
1576 // least twice bigger. The behavior is consistent with the default
1577 // behavior of GCC STL's `std::vector::resize()`. This avoids frequently
1578 // allocating and copying the underlying buffer.
1579 size_t reserved_capacity =
1580 std::max(required_capacity, tensors_.capacity() * 2);
1581 tensors_.reserve(reserved_capacity);
1582 context_.tensors = tensors_.data();
1583 }
1584 }
1585
EnsureMemoryAllocations()1586 TfLiteStatus Subgraph::EnsureMemoryAllocations() {
1587 if (memory_planner_) {
1588 state_ = kStateUninvokable;
1589 TF_LITE_ENSURE_OK(&context_, memory_planner_->PlanAllocations());
1590 }
1591 TF_LITE_ENSURE_OK(&context_, AllocateTensors());
1592 TF_LITE_ENSURE_EQ(&context_, state_, kStateInvokable);
1593 return kTfLiteOk;
1594 }
1595
ModifyGraphWithDelegate(TfLiteDelegate * delegate)1596 TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
1597 TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(),
1598 "ModifyGraphWithDelegate");
1599
1600 if (delegate == nullptr) {
1601 ReportError("Null delegate.");
1602 return kTfLiteDelegateError;
1603 }
1604
1605 // Resets delegation & leaves graph in consistent state if delegate status is
1606 // not okay.
1607 auto reset_delegation_if_not_ok = [this](TfLiteStatus status) {
1608 if (status != kTfLiteOk) {
1609 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1610 ReportError(
1611 "Restored original execution plan after delegate application "
1612 "failure.");
1613 return kTfLiteDelegateError;
1614 }
1615 return kTfLiteOk;
1616 };
1617
1618 // STEP 1: Verify & prepare graph for delegation.
1619 // ==============================================
1620
1621 // Restore delegation state if applicable.
1622 TF_LITE_ENSURE_STATUS(RedoAllDelegates());
1623
1624 const bool delegate_supports_dynamic_shapes =
1625 delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors;
1626 const auto pre_delegation_state = state_;
1627
1628 if (state_ == kStateInvokableAndImmutable) {
1629 // A delegate that doesn't support dynamic shapes was already applied, so
1630 // we can assume tensor shapes have been propagated & there are no dynamic
1631 // tensors.
1632 // Reset the state to force tensor/op reallocation.
1633 state_ = kStateUninvokable;
1634 } else if (!delegate_supports_dynamic_shapes) {
1635 // Check if graph has dynamic tensors by preparing ops.
1636 int last_execution_plan_index_prepared;
1637 TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1638 0, execution_plan_, &last_execution_plan_index_prepared));
1639 if (has_dynamic_tensors_) {
1640 TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1641 ReportError(
1642 "Attempting to use a delegate that only supports static-sized "
1643 "tensors with a graph that has dynamic-sized tensors.");
1644 return kTfLiteApplicationError;
1645 }
1646 }
1647
1648 if (delegates_applied_.empty()) {
1649 // This is the first delegate being applied, so remember original execution
1650 // plan.
1651 // TODO(b/119623453): Restore execution plan to this state if delegate
1652 // application fails.
1653 pre_delegation_execution_plan_ = execution_plan_;
1654 }
1655
1656 // STEP 2: Delegate replaces applicable nodes with delegate kernels.
1657 // =================================================================
1658
1659 // Setup additional context interface.
1660 SwitchToDelegateContext();
1661 TfLiteStatus status = delegate->Prepare(&context_, delegate);
1662 // Remove additional context info.
1663 SwitchToKernelContext();
1664 TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(status));
1665
1666 // STEP 3: Leave graph in consistent state based on delegate & previous state.
1667 // ===========================================================================
1668
1669 if (!delegate_supports_dynamic_shapes) {
1670 // CASE 1: Current delegate does not support dynamic shapes.
1671 // Reset the state to force tensor/op reallocation.
1672 state_ = kStateUninvokable;
1673 TF_LITE_ENSURE_STATUS(
1674 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1675 // After using a delegate which doesn't support dynamic tensors, make the
1676 // entire graph immutable.
1677 state_ = kStateInvokableAndImmutable;
1678 } else if (pre_delegation_state == kStateInvokableAndImmutable) {
1679 // CASE 2: Current delegate supports dynamic shapes, but a previous one
1680 // does not.
1681 // Make sure new delegate didn't mark a tensor as dynamic.
1682 int last_execution_plan_index_prepared;
1683 TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(PrepareOpsStartingAt(
1684 0, execution_plan_, &last_execution_plan_index_prepared)));
1685 if (has_dynamic_tensors_) {
1686 TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1687 ReportError(
1688 "Cannot allow dynamic tensors due to previous delegation, resetting "
1689 "to original execution plan.");
1690 return kTfLiteApplicationError;
1691 }
1692 // Redo memory allocations & ensure state is set back to original value.
1693 TF_LITE_ENSURE_STATUS(
1694 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1695 state_ = kStateInvokableAndImmutable;
1696 } else if (pre_delegation_state == kStateInvokable) {
1697 // CASE 3: Current delegate supports dynamic shapes, and the graph was
1698 // previously invokable.
1699 // Flush allocation now to leave it in a consistent state.
1700 TF_LITE_ENSURE_STATUS(
1701 reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1702 }
1703 delegates_applied_.push_back(delegate);
1704
1705 return status;
1706 }
1707
SetCustomAllocationForTensor(int tensor_index,const TfLiteCustomAllocation & allocation,int64_t flags)1708 TfLiteStatus Subgraph::SetCustomAllocationForTensor(
1709 int tensor_index, const TfLiteCustomAllocation& allocation, int64_t flags) {
1710 TfLiteTensor* tensor = &context_.tensors[tensor_index];
1711 TF_LITE_ENSURE(context(),
1712 (tensor->allocation_type == kTfLiteArenaRw ||
1713 tensor->allocation_type == kTfLiteArenaRwPersistent ||
1714 tensor->allocation_type == kTfLiteCustom));
1715 // Don't check allocation.bytes here, we do that after all ops are prepared
1716 // to allow tensor shape propagation.
1717 TF_LITE_ENSURE(context(), allocation.data != nullptr);
1718 if (!(flags & kTfLiteCustomAllocationFlagsSkipAlignCheck)) {
1719 const intptr_t data_ptr_value = reinterpret_cast<intptr_t>(allocation.data);
1720 TF_LITE_ENSURE(context(), data_ptr_value % kDefaultTensorAlignment == 0);
1721 }
1722
1723 // If tensor already has a custom alloc, just reassign.
1724 const auto alloc_it = std::find_if(
1725 custom_allocations_.begin(), custom_allocations_.end(),
1726 [tensor_index](
1727 const std::pair<int, TfLiteCustomAllocation>& existing_alloc) {
1728 return existing_alloc.first == tensor_index;
1729 });
1730 if (alloc_it == custom_allocations_.end()) {
1731 custom_allocations_.emplace_back(tensor_index, allocation);
1732 } else {
1733 // If tensor already has a custom alloc, just reassign.
1734 alloc_it->second = allocation;
1735 }
1736
1737 tensor->allocation_type = kTfLiteCustom;
1738 tensor->data.data = allocation.data;
1739
1740 return kTfLiteOk;
1741 }
1742
SetName(const char * name)1743 void Subgraph::SetName(const char* name) {
1744 if (name) {
1745 name_ = name;
1746 } else {
1747 name_ = "";
1748 }
1749 }
1750
GetName() const1751 const std::string& Subgraph::GetName() const { return name_; }
1752
DumpMemoryPlannerDebugInfo() const1753 void Subgraph::DumpMemoryPlannerDebugInfo() const {
1754 if (memory_planner_ == nullptr) return;
1755 memory_planner_->DumpDebugInfo(execution_plan());
1756 }
1757
PreserveAllTensorsExperimental()1758 TfLiteStatus Subgraph::PreserveAllTensorsExperimental() {
1759 if (memory_planner_) {
1760 ReportError(
1761 "PreserveAllTensorsExperimental called after memory was planned. ");
1762 return kTfLiteError;
1763 }
1764 preserve_all_tensors_ = true;
1765 return kTfLiteOk;
1766 }
1767
CreateGraphInfo()1768 std::unique_ptr<GraphInfo> Subgraph::CreateGraphInfo() {
1769 return std::unique_ptr<GraphInfo>(new InterpreterInfo(this));
1770 }
1771
1772 } // namespace tflite
1773