• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/core/subgraph.h"
17 
18 #include <stdarg.h>
19 #include <stddef.h>
20 
21 #include <algorithm>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <cstring>
25 #include <iterator>
26 #include <memory>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 #include "tensorflow/lite/allocation.h"
32 #include "tensorflow/lite/builtin_ops.h"
33 #include "tensorflow/lite/c/c_api_types.h"
34 #include "tensorflow/lite/c/common.h"
35 #include "tensorflow/lite/context_util.h"
36 #include "tensorflow/lite/core/api/error_reporter.h"
37 #include "tensorflow/lite/core/api/profiler.h"
38 #include "tensorflow/lite/core/api/tensor_utils.h"
39 #include "tensorflow/lite/core/macros.h"
40 #include "tensorflow/lite/experimental/resource/resource_base.h"
41 #include "tensorflow/lite/graph_info.h"
42 #include "tensorflow/lite/memory_planner.h"
43 #include "tensorflow/lite/minimal_logging.h"
44 #include "tensorflow/lite/schema/schema_generated.h"
45 #include "tensorflow/lite/util.h"
46 #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
47 #include "tensorflow/lite/simple_planner.h"
48 #else
49 #include "tensorflow/lite/arena_planner.h"
50 #endif
51 
52 namespace tflite {
53 
54 namespace {
55 
56 struct TfLiteQuantizationDeleter {
operator ()tflite::__anon8bdb3e260111::TfLiteQuantizationDeleter57   void operator()(TfLiteQuantization* q) {
58     if (q) TfLiteQuantizationFree(q);
59   }
60 };
61 
62 using ScopedTfLiteQuantization =
63     std::unique_ptr<TfLiteQuantization, TfLiteQuantizationDeleter>;
64 
65 struct TfLiteSparsityDeleter {
operator ()tflite::__anon8bdb3e260111::TfLiteSparsityDeleter66   void operator()(TfLiteSparsity* s) {
67     if (s) TfLiteSparsityFree(s);
68   }
69 };
70 
71 using ScopedTfLiteSparsity =
72     std::unique_ptr<TfLiteSparsity, TfLiteSparsityDeleter>;
73 
ReportOpError(TfLiteContext * context,const TfLiteNode & node,const TfLiteRegistration & registration,int node_index,const char * message)74 TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node,
75                            const TfLiteRegistration& registration,
76                            int node_index, const char* message) {
77   context->ReportError(
78       context, "Node number %d (%s) %s.\n", node_index,
79       registration.custom_name
80           ? registration.custom_name
81           : EnumNameBuiltinOperator(
82                 static_cast<BuiltinOperator>(registration.builtin_code)),
83       message);
84   return kTfLiteError;
85 }
86 
87 // Stub method which returns kTfLiteError when the function is forbidden.
88 // We're registering this function to several different function to save
89 // compiled binary size. Please note the restrictions:
90 // * The type of first parameter have to be `TfLiteContext*`.
91 // * All parameters must be trivially destructible. (E.g. No C++ class)
ForbiddenContextFunction(TfLiteContext * context,...)92 TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) {
93   context->ReportError(context,
94                        "The function is forbidden if not calling in delegate.");
95   return kTfLiteError;
96 }
97 
98 // Set the ForbiddenContextFunction to a compatible function pointer.
99 template <typename FunctionType>
SetForbiddenContextFunction(FunctionType * func)100 void SetForbiddenContextFunction(FunctionType* func) {
101   *func = reinterpret_cast<FunctionType>(ForbiddenContextFunction);
102 }
103 
104 // Returns true if at least one tensor in the given list is kTfLiteDynamic.
105 template <typename TensorIntArray>
HasDynamicTensorImpl(const TfLiteContext & context,const TensorIntArray & int_array)106 bool HasDynamicTensorImpl(const TfLiteContext& context,
107                           const TensorIntArray& int_array) {
108   for (int i : int_array) {
109     if (i == kTfLiteOptionalTensor) continue;
110     const TfLiteTensor& tensor = context.tensors[i];
111     if (tensor.allocation_type == kTfLiteDynamic) {
112       return true;
113     }
114   }
115   return false;
116 }
117 
HasDynamicTensor(const TfLiteContext & context,const TfLiteIntArray * int_array)118 bool HasDynamicTensor(const TfLiteContext& context,
119                       const TfLiteIntArray* int_array) {
120   return HasDynamicTensorImpl(context, TfLiteIntArrayView{int_array});
121 }
122 
123 // Gets the legacy TfLiteQuantizationParams from the current TfLiteQuantization.
GetLegacyQuantization(const TfLiteQuantization & quantization)124 TfLiteQuantizationParams GetLegacyQuantization(
125     const TfLiteQuantization& quantization) {
126   TfLiteQuantizationParams legacy_quantization;
127   legacy_quantization.scale = 0;
128   legacy_quantization.zero_point = 0;
129 
130   // If the quantization type isn't affine, return the empty
131   // legacy_quantization.
132   if (quantization.type != kTfLiteAffineQuantization) {
133     return legacy_quantization;
134   }
135 
136   auto* affine_quantization =
137       static_cast<TfLiteAffineQuantization*>(quantization.params);
138   if (!affine_quantization || !affine_quantization->scale ||
139       !affine_quantization->zero_point ||
140       affine_quantization->scale->size != 1 ||
141       affine_quantization->zero_point->size != 1) {
142     return legacy_quantization;
143   }
144 
145   // We know its per-layer quantization now.
146   legacy_quantization.scale = affine_quantization->scale->data[0];
147   legacy_quantization.zero_point = affine_quantization->zero_point->data[0];
148   return legacy_quantization;
149 }
150 
151 static constexpr const char kUnknownCustomOpName[] = "UnknownCustomOp";
GetTFLiteOpName(const TfLiteRegistration & op_reg)152 const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) {
153   if (op_reg.builtin_code == tflite::BuiltinOperator_CUSTOM) {
154     const char* const custom_name = op_reg.custom_name;
155     return custom_name ? custom_name : kUnknownCustomOpName;
156   }
157   if (op_reg.builtin_code == tflite::BuiltinOperator_DELEGATE &&
158       op_reg.custom_name) {
159     return op_reg.custom_name;
160   }
161   return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code];
162 }
163 
164 }  // namespace
165 
166 // A trivial implementation of GraphInfo around the Interpreter.
167 // NOTE: this interpreter info represents the subset of the
168 // graph that is executed according to execution plan. Thus,
169 // the indices are execution plan indices rather than raw node
170 // indices.
171 class InterpreterInfo : public GraphInfo {
172  public:
InterpreterInfo(Subgraph * subgraph)173   explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {}
174 
num_tensors() const175   size_t num_tensors() const override { return subgraph_->tensors_size(); }
tensor(size_t index)176   TfLiteTensor* tensor(size_t index) override {
177     return subgraph_->tensor(index);
178   }
num_execution_nodes() const179   size_t num_execution_nodes() const override {
180     return subgraph_->execution_plan().size();
181   }
num_total_nodes() const182   size_t num_total_nodes() const override { return subgraph_->nodes_size(); }
node(size_t index) const183   const TfLiteNode& node(size_t index) const override {
184     int node_index = subgraph_->execution_plan()[index];
185     return subgraph_->nodes_and_registration()[node_index].first;
186   }
node_index(size_t index) const187   size_t node_index(size_t index) const override {
188     return subgraph_->execution_plan()[index];
189   }
inputs() const190   const std::vector<int>& inputs() const override {
191     return subgraph_->inputs();
192   }
outputs() const193   const std::vector<int>& outputs() const override {
194     return subgraph_->outputs();
195   }
variables() const196   const std::vector<int>& variables() const override {
197     return subgraph_->variables();
198   }
199 
200  public:
201   Subgraph* subgraph_;
202 };
203 
Subgraph(ErrorReporter * error_reporter,TfLiteExternalContext ** external_contexts,std::vector<std::unique_ptr<Subgraph>> * subgraphs,resource::ResourceMap * resources,resource::ResourceIDMap * resource_ids,resource::InitializationStatusMap * initialization_status_map)204 Subgraph::Subgraph(ErrorReporter* error_reporter,
205                    TfLiteExternalContext** external_contexts,
206                    std::vector<std::unique_ptr<Subgraph>>* subgraphs,
207                    resource::ResourceMap* resources,
208                    resource::ResourceIDMap* resource_ids,
209                    resource::InitializationStatusMap* initialization_status_map)
210     : external_contexts_(external_contexts),
211       error_reporter_(error_reporter),
212       next_execution_plan_index_to_prepare_(0),
213       next_execution_plan_index_to_plan_allocation_(0),
214       subgraphs_(subgraphs),
215       resources_(resources),
216       resource_ids_(resource_ids),
217       initialization_status_map_(initialization_status_map) {
218   // TODO(b/161272052): Consider a better TfLiteContext initialization pattern:
219   context_.impl_ = static_cast<void*>(this);
220   context_.ResizeTensor = ResizeTensor;
221   context_.ReportError = ReportErrorC;
222   context_.AddTensors = AddTensors;
223   context_.tensors = nullptr;
224   context_.tensors_size = 0;
225   context_.allow_fp32_relax_to_fp16 = false;
226   context_.recommended_num_threads = -1;
227   context_.GetExternalContext = GetExternalContext;
228   context_.SetExternalContext = SetExternalContext;
229   context_.profiler = nullptr;
230   context_.GetTensor = nullptr;
231   context_.GetEvalTensor = nullptr;
232   context_.GetModelMetadata = GetModelMetadata;
233 
234   // Reserve some space for the tensors to avoid excessive resizing.
235   tensors_.reserve(kTensorsReservedCapacity);
236   nodes_and_registration_.reserve(kTensorsReservedCapacity);
237   // Invalid to call these except from TfLiteDelegate
238   SwitchToKernelContext();
239 }
240 
~Subgraph()241 Subgraph::~Subgraph() {
242   for (int node_index = 0; node_index < nodes_and_registration_.size();
243        ++node_index) {
244     CleanupNode(node_index);
245   }
246 
247   for (size_t i = 0; i < context_.tensors_size; i++) {
248     TfLiteTensor* tensor = &context_.tensors[i];
249     if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
250         tensor->delegate->FreeBufferHandle != nullptr) {
251       tensor->delegate->FreeBufferHandle(&context_, tensor->delegate,
252                                          &tensor->buffer_handle);
253     }
254     TfLiteTensorFree(tensor);
255   }
256 }
257 
CleanupNode(int node_index)258 void Subgraph::CleanupNode(int node_index) {
259   TfLiteNode& node = nodes_and_registration_[node_index].first;
260   const TfLiteRegistration& registration =
261       nodes_and_registration_[node_index].second;
262   TfLiteIntArrayFree(node.inputs);
263   TfLiteIntArrayFree(node.outputs);
264   TfLiteIntArrayFree(node.temporaries);
265   TfLiteIntArrayFree(node.intermediates);
266   if (node.builtin_data) free(node.builtin_data);
267   OpFree(registration, node.user_data);
268   node.builtin_data = nullptr;
269 }
270 
ReplaceNodeSubsetsWithDelegateKernels(TfLiteContext * context,TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)271 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
272     TfLiteContext* context, TfLiteRegistration registration,
273     const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
274   return static_cast<Subgraph*>(context->impl_)
275       ->ReplaceNodeSubsetsWithDelegateKernels(registration, nodes_to_replace,
276                                               delegate);
277 }
278 
279 namespace {
280 
281 // Copy a std::vector<int> to an existing TfLiteIntArray.
282 // This is a low-level data manipulation function, and it's caller's
283 // responsibility to ensure TfLiteIntArray has enough size.
CopyVectorToTfLiteIntArray(const std::vector<int> & vec,TfLiteIntArray * arr)284 void CopyVectorToTfLiteIntArray(const std::vector<int>& vec,
285                                 TfLiteIntArray* arr) {
286   arr->size = vec.size();
287   memcpy(arr->data, vec.data(), sizeof(int) * arr->size);
288 }
289 
290 // This function allocates a continuous memory space that contains a
291 // TfLiteDelegateParams followed by a several TfLiteIntArray.
292 // When calling `free` at TfLiteDelegateParams*, all the allocated space
293 // will be freed together.
294 //
295 // +-----------------------------------+
296 // | TfLiteDelegateParams              |
297 // | TfLiteDelegate* delegate;         |
298 // | TfLiteIntArray* nodes_to_replace; |--\
299 // | TfLiteIntArray* input_tensors;    |--+--\
300 // | TfLiteIntArray* output_tensors;   |--+--+--\
301 // +-----------------------------------+  |  |  |
302 // | TfLiteIntArray (variable size)    |<-/  |  |
303 // +-----------------------------------+     |  |
304 // | TfLiteIntArray (variable size)    |<----/  |
305 // +-----------------------------------+        |
306 // | TfLiteIntArray (variable size)    |<-------/
307 // +-----------------------------------+
CreateDelegateParams(TfLiteDelegate * delegate,const NodeSubset & node_subset)308 TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate,
309                                            const NodeSubset& node_subset) {
310   // Step 1: Calculate the allocation size.
311   int allocation_size = sizeof(TfLiteDelegateParams);
312 
313   int nodes_to_replace_size =
314       TfLiteIntArrayGetSizeInBytes(node_subset.nodes.size());
315   allocation_size += nodes_to_replace_size;
316 
317   int input_tensors_size =
318       TfLiteIntArrayGetSizeInBytes(node_subset.input_tensors.size());
319   allocation_size += input_tensors_size;
320 
321   int output_tensors_size =
322       TfLiteIntArrayGetSizeInBytes(node_subset.output_tensors.size());
323   allocation_size += output_tensors_size;
324 
325   // Step 2: Allocate the memory.
326   // Use `char*` for conveniently step through the allocated space by bytes.
327   char* allocation = static_cast<char*>(malloc(allocation_size));
328 
329   // Step 3: Fill all data structures.
330   TfLiteDelegateParams* params =
331       reinterpret_cast<TfLiteDelegateParams*>(allocation);
332   params->delegate = delegate;
333   allocation += sizeof(TfLiteDelegateParams);
334 
335   params->nodes_to_replace = reinterpret_cast<TfLiteIntArray*>(allocation);
336   CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
337   allocation += nodes_to_replace_size;
338 
339   params->input_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
340   CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
341   allocation += input_tensors_size;
342 
343   params->output_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
344   CopyVectorToTfLiteIntArray(node_subset.output_tensors,
345                              params->output_tensors);
346   allocation += output_tensors_size;
347 
348   return params;
349 }
350 
351 // Assumes that params is not nullptr.
PopulatePreviewDelegateParams(const NodeSubset & node_subset,TfLiteDelegateParams * params)352 void PopulatePreviewDelegateParams(const NodeSubset& node_subset,
353                                    TfLiteDelegateParams* params) {
354   // Since these params are used for previewing partitioning, params->delegate
355   // is not required.
356   params->delegate = nullptr;
357 
358   params->nodes_to_replace = TfLiteIntArrayCreate(node_subset.nodes.size());
359   CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
360 
361   params->input_tensors =
362       TfLiteIntArrayCreate(node_subset.input_tensors.size());
363   CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
364 
365   params->output_tensors =
366       TfLiteIntArrayCreate(node_subset.output_tensors.size());
367   CopyVectorToTfLiteIntArray(node_subset.output_tensors,
368                              params->output_tensors);
369 }
370 
371 }  // namespace
372 
ReplaceNodeSubsetsWithDelegateKernels(TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)373 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
374     TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace,
375     TfLiteDelegate* delegate) {
376   // Ignore empty node replacement sets.
377   if (!nodes_to_replace->size) {
378     return kTfLiteOk;
379   }
380 
381   // Annotate the registration as DELEGATE op.
382   registration.builtin_code = BuiltinOperator_DELEGATE;
383 
384   // Analyze the graph to find all independent node_subsets that are either
385   // fully not-this-delegate or this-delegate computation.
386   InterpreterInfo info(this);
387   std::vector<NodeSubset> node_subsets;
388   PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
389                                            &node_subsets);
390 
391 #ifdef __ANDROID__
392   // On Android the log message below is used for diagnosing delegation success
393   // also in production builds. Delegation happens sufficiently rarely that the
394   // message isn't spammy.
395   TFLITE_LOG_PROD(
396       tflite::TFLITE_LOG_INFO,
397       "Replacing %d node(s) with delegate (%s) node, yielding %zu partitions.",
398       nodes_to_replace->size,
399       registration.custom_name ? registration.custom_name : "unknown",
400       node_subsets.size());
401 #else   // !__ANDROID__
402   // Server-side, delegation may happen so often as to make logging spammy + we
403   // don't have a clear need for the diagnostic in production builds.
404   TFLITE_LOG(
405       tflite::TFLITE_LOG_INFO,
406       "Replacing %d node(s) with delegate (%s) node, yielding %zu partitions.",
407       nodes_to_replace->size,
408       registration.custom_name ? registration.custom_name : "unknown",
409       node_subsets.size());
410 #endif  // __ANDROID__
411 
412   execution_plan_.clear();
413 
414   for (auto& node_subset : node_subsets) {
415     // Subsets claimed by the delegate should have a "macro" op created, the
416     // other node_subsets (kTfNonPartition) just have their nodes added back to
417     // the execution plan.
418     switch (node_subset.type) {
419       case NodeSubset::kTfNonPartition:
420         for (auto it = node_subset.nodes.begin(); it != node_subset.nodes.end();
421              ++it) {
422           execution_plan_.push_back(*it);
423         }
424         break;
425       case NodeSubset::kTfPartition: {
426         int node_index;
427 
428         TfLiteDelegateParams* params =
429             CreateDelegateParams(delegate, node_subset);
430         TF_LITE_ENSURE_STATUS(AddNodeWithParameters(
431             node_subset.input_tensors, node_subset.output_tensors, {}, nullptr,
432             0, params, &registration, &node_index));
433 
434         // Initialize the output tensors's delegate-related fields.
435         for (int tensor_index : node_subset.output_tensors) {
436           TfLiteTensor* tensor = &tensors_[tensor_index];
437           TF_LITE_ENSURE(&context_, tensor->delegate == nullptr ||
438                                         tensor->delegate == delegate);
439           tensor->delegate = delegate;
440         }
441 
442         // Associate the node with the delegate.
443         TfLiteNode* node = &nodes_and_registration_[node_index].first;
444         node->delegate = delegate;
445       } break;
446       case NodeSubset::kTfUnexplored:
447         return kTfLiteError;
448         break;
449     }
450   }
451   return kTfLiteOk;
452 }
453 
GetExternalContext(TfLiteExternalContextType type)454 TfLiteExternalContext* Subgraph::GetExternalContext(
455     TfLiteExternalContextType type) {
456   if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
457     return external_contexts_[type];
458   }
459   return nullptr;
460 }
461 
GetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type)462 TfLiteExternalContext* Subgraph::GetExternalContext(
463     struct TfLiteContext* context, TfLiteExternalContextType type) {
464   return static_cast<Subgraph*>(context->impl_)->GetExternalContext(type);
465 }
466 
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)467 void Subgraph::SetExternalContext(TfLiteExternalContextType type,
468                                   TfLiteExternalContext* ctx) {
469   if (static_cast<int>(type) >= 0 && type < kTfLiteMaxExternalContexts) {
470     external_contexts_[type] = ctx;
471   }
472 }
473 
SetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type,TfLiteExternalContext * ctx)474 void Subgraph::SetExternalContext(struct TfLiteContext* context,
475                                   TfLiteExternalContextType type,
476                                   TfLiteExternalContext* ctx) {
477   return static_cast<Subgraph*>(context->impl_)->SetExternalContext(type, ctx);
478 }
479 
480 // Gets an TfLiteIntArray* representing the execution plan. The interpreter owns
481 // this memory and it is only guaranteed to exist during the invocation of the
482 // delegate prepare.
GetExecutionPlan(TfLiteIntArray ** execution_plan)483 TfLiteStatus Subgraph::GetExecutionPlan(TfLiteIntArray** execution_plan) {
484   plan_cache_.reset(TfLiteIntArrayCreate(execution_plan_.size()));
485   *execution_plan = plan_cache_.get();
486   static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]),
487                 "TfLiteIntArray and execution_plan do not contain same type.");
488   std::memcpy(plan_cache_->data, execution_plan_.data(),
489               sizeof(plan_cache_->data[0]) * execution_plan_.size());
490   return kTfLiteOk;
491 }
492 
493 // WARNING: This is an experimental interface that is subject to change.
494 // Entry point for C node plugin API to get the execution plan
GetExecutionPlan(struct TfLiteContext * context,TfLiteIntArray ** execution_plan)495 TfLiteStatus Subgraph::GetExecutionPlan(struct TfLiteContext* context,
496                                         TfLiteIntArray** execution_plan) {
497   return static_cast<Subgraph*>(context->impl_)
498       ->GetExecutionPlan(execution_plan);
499 }
500 
FreeDelegatePartitioningData()501 void Subgraph::FreeDelegatePartitioningData() {
502   for (auto& params : partitioning_preview_cache_) {
503     TfLiteIntArrayFree(params.nodes_to_replace);
504     TfLiteIntArrayFree(params.input_tensors);
505     TfLiteIntArrayFree(params.output_tensors);
506   }
507   partitioning_preview_cache_.clear();
508 }
509 
GetModelMetadata(const char * name,const char ** ptr,size_t * bytes)510 TfLiteStatus Subgraph::GetModelMetadata(const char* name, const char** ptr,
511                                         size_t* bytes) {
512   TF_LITE_ENSURE(&context_, ptr != nullptr);
513   TF_LITE_ENSURE(&context_, bytes != nullptr);
514   *ptr = nullptr;
515   *bytes = 0;
516   if (!metadata_) return kTfLiteError;
517   const std::string name_str = name;
518   auto itr = metadata_->find(name_str);
519   if (itr != metadata_->end()) {
520     *ptr = itr->second.c_str();
521     *bytes = itr->second.size();
522     return kTfLiteOk;
523   }
524   return kTfLiteError;
525 }
526 
GetModelMetadata(const struct TfLiteContext * context,const char * name,const char ** ptr,size_t * bytes)527 TfLiteStatus Subgraph::GetModelMetadata(const struct TfLiteContext* context,
528                                         const char* name, const char** ptr,
529                                         size_t* bytes) {
530   return static_cast<Subgraph*>(context->impl_)
531       ->GetModelMetadata(name, ptr, bytes);
532 }
533 
PreviewDelegatePartitioning(const TfLiteIntArray * nodes_to_replace,TfLiteDelegateParams ** partition_params_array,int * num_partitions)534 TfLiteStatus Subgraph::PreviewDelegatePartitioning(
535     const TfLiteIntArray* nodes_to_replace,
536     TfLiteDelegateParams** partition_params_array, int* num_partitions) {
537   // Ensure partitioning cache is empty.
538   FreeDelegatePartitioningData();
539   // Defaults.
540   if (!partition_params_array || !num_partitions) return kTfLiteError;
541   *partition_params_array = nullptr;
542   *num_partitions = 0;
543   if (!nodes_to_replace->size) {
544     return kTfLiteOk;
545   }
546 
547   // Partition the execution plan into node subsets.
548   InterpreterInfo info(this);
549   std::vector<NodeSubset> node_subsets;
550   PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
551                                            &node_subsets);
552 
553   // Create one TfLiteDelegateParams per node-subset which would be delegated.
554   for (auto& node_subset : node_subsets) {
555     if (node_subset.type != NodeSubset::kTfPartition) {
556       continue;
557     }
558     partitioning_preview_cache_.emplace_back();
559     PopulatePreviewDelegateParams(node_subset,
560                                   &partitioning_preview_cache_.back());
561     ++*num_partitions;
562   }
563 
564   *partition_params_array = partitioning_preview_cache_.data();
565   return kTfLiteOk;
566 }
567 
PreviewDelegatePartitioning(struct TfLiteContext * context,const TfLiteIntArray * nodes_to_replace,TfLiteDelegateParams ** partition_params_array,int * num_partitions)568 TfLiteStatus Subgraph::PreviewDelegatePartitioning(
569     struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
570     TfLiteDelegateParams** partition_params_array, int* num_partitions) {
571   return static_cast<Subgraph*>(context->impl_)
572       ->PreviewDelegatePartitioning(nodes_to_replace, partition_params_array,
573                                     num_partitions);
574 }
575 
SetInputs(std::vector<int> inputs)576 TfLiteStatus Subgraph::SetInputs(std::vector<int> inputs) {
577   TF_LITE_ENSURE_OK(&context_,
578                     CheckTensorIndices("inputs", inputs.data(), inputs.size()));
579   inputs_ = std::move(inputs);
580   return kTfLiteOk;
581 }
582 
SetOutputs(std::vector<int> outputs)583 TfLiteStatus Subgraph::SetOutputs(std::vector<int> outputs) {
584   TF_LITE_ENSURE_OK(
585       &context_, CheckTensorIndices("outputs", outputs.data(), outputs.size()));
586   outputs_ = std::move(outputs);
587   return kTfLiteOk;
588 }
589 
SetVariables(std::vector<int> variables)590 TfLiteStatus Subgraph::SetVariables(std::vector<int> variables) {
591   TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("variables", variables.data(),
592                                                   variables.size()));
593   variables_ = std::move(variables);
594   return kTfLiteOk;
595 }
596 
SetMetadata(const std::map<std::string,std::string> * metadata)597 TfLiteStatus Subgraph::SetMetadata(
598     const std::map<std::string, std::string>* metadata) {
599   metadata_ = metadata;
600   // TODO(b/188185962): Set context_.allow_fp32_relax_to_fp16 based on metadata.
601   return kTfLiteOk;
602 }
603 
SetCancellationFunction(void * data,bool (* check_cancelled_func)(void *))604 void Subgraph::SetCancellationFunction(void* data,
605                                        bool (*check_cancelled_func)(void*)) {
606   cancellation_data_ = data;
607   check_cancelled_func_ = check_cancelled_func;
608 }
609 
IsCancelled()610 bool Subgraph::IsCancelled() {
611   return (check_cancelled_func_ != nullptr) &&
612          (*check_cancelled_func_)(cancellation_data_);
613 }
614 
ReserveNodes(int count)615 void Subgraph::ReserveNodes(int count) {
616   nodes_and_registration_.reserve(count);
617 }
618 
CheckTensorIndices(const char * label,const int * indices,int length)619 TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
620                                           int length) {
621   // Making sure kTfLiteOptionalTensor is not re-defined to something other than
622   // -1.
623   static_assert(kTfLiteOptionalTensor == -1,
624                 "kTfLiteOptionalTensor should be defined -1");
625 
626   for (int i = 0; i < length; i++) {
627     int index = indices[i];
628     // Continue if index == kTfLiteOptionalTensor before additional comparisons
629     // below, size_t(-1) is always >= context_tensors_size.
630     if (index == kTfLiteOptionalTensor) {
631       continue;
632     }
633     if (index < 0 || static_cast<size_t>(index) >= context_.tensors_size) {
634       ReportError(
635           "Invalid tensor index %d in %s. The subgraph has %d tensors\n", index,
636           label, context_.tensors_size);
637       consistent_ = false;
638       return kTfLiteError;
639     }
640   }
641   return kTfLiteOk;
642 }
643 
644 // We have two arrays and we need to check that elements from one array don't
645 // show up in the other. We could sort both arrays and then iterate with two
646 // pointers from start to finish always increasing the smaller one but since
647 // these arrays are usually short (<25 elements for inputs, usually <3 for
648 // outputs), this might be slower than the naive approach (if arrays have size n
649 // and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is
650 // O(n)). Plus, sorting the input and output arrays might not be something we
651 // want as it destroys ordering of elements.
652 //
653 // If it turns out that this is an issue, we can switch to the other algorithm.
CheckInputAndOutputForOverlap(const int * input_indices,int num_inputs,const int * output_indices,int num_outputs)654 TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices,
655                                                      int num_inputs,
656                                                      const int* output_indices,
657                                                      int num_outputs) {
658   for (int i = 0; i < num_inputs; i++) {
659     for (int j = 0; j < num_outputs; j++) {
660       if (input_indices[i] == output_indices[j]) {
661         ReportError("Tensor %d is both input %d and output %d\n",
662                     input_indices[i], i, j);
663         consistent_ = false;
664         return kTfLiteError;
665       }
666     }
667   }
668   return kTfLiteOk;
669 }
670 
671 namespace {
672 // Multiply two sizes and return true if overflow occurred;
673 // This is based off tensorflow/overflow.h but is simpler as we already
674 // have unsigned numbers. It is also generalized to work where sizeof(size_t)
675 // is not 8.
MultiplyAndCheckOverflow(size_t a,size_t b,size_t * product)676 TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product) {
677   // Multiplying a * b where a and b are size_t cannot result in overflow in a
678   // size_t accumulator if both numbers have no non-zero bits in their upper
679   // half.
680   constexpr size_t size_t_bits = 8 * sizeof(size_t);
681   constexpr size_t overflow_upper_half_bit_position = size_t_bits / 2;
682   *product = a * b;
683   // If neither integers have non-zero bits past 32 bits can't overflow.
684   // Otherwise check using slow devision.
685   if (TFLITE_EXPECT_FALSE((a | b) >> overflow_upper_half_bit_position != 0)) {
686     if (a != 0 && *product / a != b) return kTfLiteError;
687   }
688   return kTfLiteOk;
689 }
690 }  // namespace
691 
BytesRequired(TfLiteType type,const int * dims,size_t dims_size,size_t * bytes)692 TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
693                                      size_t dims_size, size_t* bytes) {
694   TF_LITE_ENSURE(&context_, bytes != nullptr);
695   // When 'dims_size' is 0, we simply assume it's a scalar. Therefore, we start
696   // 'count' as 1.
697   size_t count = 1;
698   for (int k = 0; k < dims_size; k++) {
699     size_t old_count = count;
700     TF_LITE_ENSURE_MSG(
701         &context_,
702         MultiplyAndCheckOverflow(old_count, dims[k], &count) == kTfLiteOk,
703         "BytesRequired number of elements overflowed.\n");
704   }
705   size_t type_size = 0;
706   TF_LITE_ENSURE_OK(&context_, GetSizeOfType(&context_, type, &type_size));
707   TF_LITE_ENSURE_MSG(
708       &context_, MultiplyAndCheckOverflow(type_size, count, bytes) == kTfLiteOk,
709       "BytesRequired number of bytes overflowed.\n");
710   return kTfLiteOk;
711 }
712 
AllocateTensors()713 TfLiteStatus Subgraph::AllocateTensors() {
714   TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "AllocateTensors");
715   if (!consistent_) {
716     ReportError("AllocateTensors() called on inconsistent model.");
717     return kTfLiteError;
718   }
719 
720   // Restore delegation state if applicable.
721   TF_LITE_ENSURE_STATUS(RedoAllDelegates());
722 
723   // Explicit (re)allocation is necessary if nodes have been changed or tensors
724   // have been resized. For inputs marked as dynamic, we can't short-circuit the
725   // allocation as the client may have done the resize manually.
726   if (state_ != kStateUninvokable &&
727       !HasDynamicTensorImpl(context_, inputs())) {
728     if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
729       // If the only change was the release of non-persistent memory via
730       // ReleaseNonPersistentMemory(), just re-allocate it. For any other type
731       // of memory-planning change (for eg, ResizeInputTensor), the state would
732       // be kStateUninvokable.
733       memory_planner_->AcquireNonPersistentMemory();
734     }
735     return kTfLiteOk;
736   }
737 
738   next_execution_plan_index_to_prepare_ = 0;
739   next_execution_plan_index_to_plan_allocation_ = 0;
740   next_original_execution_plan_index_to_prepare_ = 0;
741   if (memory_planner_) {
742     TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
743   }
744 
745   TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
746 
747   state_ = kStateInvokable;
748 
749   // Reset the variable tensors to zero after (re)allocating the tensors.
750   // Developers shouldn't rely on the side effect of this function to reset
751   // variable tensors. They should call `ResetVariableTensors` directly
752   // instead.
753   ResetVariableTensors();
754 
755   return kTfLiteOk;
756 }
757 
758 // TODO(b/115961645): Support non-zero default values.
ResetVariableTensors()759 TfLiteStatus Subgraph::ResetVariableTensors() {
760   for (auto& tensor : tensors_) {
761     if (!tensor.is_variable) {
762       continue;
763     }
764 
765     if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
766       // If variable tensors allocation type is `kTfLiteArenaRwPersistent`, then
767       // they must be allocated after the initial `PrepareOpsAndTensors()` is
768       // called.
769       TF_LITE_ENSURE(&context_, tensor.data.raw != nullptr);
770       tflite::ResetVariableTensor(&tensor);
771     } else {
772       // If variable tensors allocation type is not `kTfLiteArenaRwPersistent`,
773       // then it can only be `kTfLiteCustom` in which case, we do not reset it.
774       TF_LITE_ENSURE_EQ(&context_, tensor.allocation_type, kTfLiteCustom);
775     }
776   }
777   return kTfLiteOk;
778 }
779 
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const std::vector<int> & intermediates,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)780 TfLiteStatus Subgraph::AddNodeWithParameters(
781     const std::vector<int>& inputs, const std::vector<int>& outputs,
782     const std::vector<int>& intermediates, const char* init_data,
783     size_t init_data_size, void* builtin_data,
784     const TfLiteRegistration* registration, int* node_index) {
785   std::unique_ptr<void, decltype(free)*> builtin_data_deleter(builtin_data,
786                                                               free);
787   if (state_ == kStateInvokableAndImmutable) {
788     ReportError("AddNodeWithParameters is disallowed when graph is immutable.");
789     return kTfLiteError;
790   }
791   state_ = kStateUninvokable;
792 
793   TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("node inputs", inputs.data(),
794                                                   inputs.size()));
795   TF_LITE_ENSURE_OK(
796       &context_,
797       CheckTensorIndices("node outputs", outputs.data(), outputs.size()));
798 
799   // For builtin ops, inputs and outputs must not overlap. Custom ops must do
800   // this check by themselves if they don't support overlapping tensors. This
801   // distinction is to allow custom ops to just forward a tensor, reusing it as
802   // both input and output.
803   if (builtin_data != nullptr) {
804     TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap(
805                                      inputs.data(), inputs.size(),
806                                      outputs.data(), outputs.size()));
807   }
808 
809   int new_node_index = nodes_and_registration_.size();
810   if (node_index) *node_index = new_node_index;
811   nodes_and_registration_.emplace_back();
812   auto& node_and_reg = nodes_and_registration_.back();
813   TfLiteNode& node = node_and_reg.first;
814 
815   // NOTE, here we are not using move semantics yet, since our internal
816   // representation isn't std::vector, but in the future we would like to avoid
817   // copies, so we want the interface to take r-value references now.
818   node.inputs = ConvertVectorToTfLiteIntArray(inputs);
819   node.outputs = ConvertVectorToTfLiteIntArray(outputs);
820   node.intermediates = ConvertVectorToTfLiteIntArray(intermediates);
821   node.temporaries = TfLiteIntArrayCreate(0);
822   if (init_data) {
823     node.user_data = OpInit(*registration, init_data, init_data_size);
824   } else {
825     node.user_data = OpInit(
826         *registration, static_cast<const char*>(builtin_data_deleter.get()), 0);
827   }
828 
829   node.builtin_data = builtin_data_deleter.release();
830 
831   if (registration->builtin_code == BuiltinOperator_CUSTOM) {
832     // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer
833     // `Operator` table is passed in.
834     node.custom_initial_data = init_data;
835     node.custom_initial_data_size = init_data_size;
836   } else {
837     node.custom_initial_data = nullptr;
838     node.custom_initial_data_size = 0;
839   }
840   node.might_have_side_effect = OpMightHaveSideEffect(&node, registration);
841 
842   node.delegate = nullptr;
843   // Copying of registration is required to support unresolved custom ops.
844   node_and_reg.second = *registration;
845   execution_plan_.push_back(new_node_index);
846   return kTfLiteOk;
847 }
848 
849 namespace {
850 // Returns true if any tensor identified by indexes in 'tensor_indexes' is
851 // of type 'kTfLiteResource'. False otherwise.
AnyTensorOfTypeResource(const std::vector<TfLiteTensor> & tensors,const TfLiteIntArray * tensor_indexes)852 bool AnyTensorOfTypeResource(const std::vector<TfLiteTensor>& tensors,
853                              const TfLiteIntArray* tensor_indexes) {
854   for (int i = 0; i < tensor_indexes->size; ++i) {
855     int tensor_index = tensor_indexes->data[i];
856     if (tensor_index >= 0 && tensor_index < tensors.size() &&
857         tensors[tensor_index].type == kTfLiteResource)
858       return true;
859   }
860   return false;
861 }
862 
863 }  // namespace
864 
OpMightHaveSideEffect(const TfLiteNode * node,const TfLiteRegistration * registration) const865 bool Subgraph::OpMightHaveSideEffect(
866     const TfLiteNode* node, const TfLiteRegistration* registration) const {
867   // Check if any of the input tensors are of type resource.
868   if (AnyTensorOfTypeResource(tensors_, node->inputs)) return true;
869   // Check if any of the output tensors are of type resource.
870   if (AnyTensorOfTypeResource(tensors_, node->outputs)) return true;
871   // Consider control flow ops has side effect, some ops in the control flow
872   // subgraph can have side effect.
873   if (registration->builtin_code == kTfLiteBuiltinIf ||
874       registration->builtin_code == kTfLiteBuiltinWhile ||
875       registration->builtin_code == kTfLiteBuiltinCallOnce)
876     return true;
877   return false;
878 }
879 
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)880 TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index,
881                                          const std::vector<int>& dims) {
882   const bool delegates_applied = !pre_delegation_execution_plan_.empty();
883   const bool graph_is_immutable = state_ == kStateInvokableAndImmutable;
884   if (graph_is_immutable && !delegates_applied) {
885     ReportError("ResizeInputTensor is disallowed when graph is immutable.");
886     return kTfLiteError;
887   }
888 
889   TF_LITE_ENSURE(&context_,
890                  tensor_index < context_.tensors_size && tensor_index >= 0);
891   TfLiteTensor* tensor = &context_.tensors[tensor_index];
892 
893   // Short-circuit the state change if the dimensions don't change, avoiding
894   // unnecessary (re)allocations.
895   //
896   // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
897   // the subgraph won't allocate memory for a dynamic tensor when its size
898   // is equal to the original tensor size.
899   if (tensor->data.raw != nullptr &&
900       EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
901     return kTfLiteOk;
902   }
903 
904   if (graph_is_immutable) {
905     // Undo delegation if it resulted in the graph being immutable.
906     TF_LITE_ENSURE_STATUS(UndoAllDelegates());
907   }
908   state_ = kStateUninvokable;
909   return ResizeTensorImpl(tensor, ConvertVectorToTfLiteIntArray(dims));
910 }
911 
ResizeInputTensorStrict(int tensor_index,const std::vector<int> & dims)912 TfLiteStatus Subgraph::ResizeInputTensorStrict(int tensor_index,
913                                                const std::vector<int>& dims) {
914   TF_LITE_ENSURE(&context_,
915                  tensor_index < context_.tensors_size && tensor_index >= 0);
916   TfLiteTensor* tensor = &context_.tensors[tensor_index];
917 
918   // Ensure that only unknown dimensions can be resized.
919   TF_LITE_ENSURE_EQ(&context_, tensor->dims->size, dims.size());
920   for (size_t idx = 0; idx < dims.size(); idx++) {
921     // `dims_signature` is not defined when no unknown dimensions are present.
922     int dim_signature;
923     if (tensor->dims_signature && tensor->dims_signature->size) {
924       dim_signature = tensor->dims_signature->data[idx];
925     } else {
926       dim_signature = tensor->dims->data[idx];
927     }
928 
929     if (dim_signature != -1 && dim_signature != dims[idx]) {
930       ReportError(
931           "Attempting to resize dimension %d of tensor %d with value %d to %d. "
932           "ResizeInputTensorStrict only allows mutating unknown dimensions "
933           "identified by -1.",
934           idx, tensor_index, dim_signature, dims[idx]);
935       return kTfLiteError;
936     }
937   }
938 
939   return ResizeInputTensor(tensor_index, dims);
940 }
941 
ReleaseNonPersistentMemory()942 TfLiteStatus Subgraph::ReleaseNonPersistentMemory() {
943   if (memory_planner_) {
944     TF_LITE_ENSURE_STATUS(memory_planner_->ReleaseNonPersistentMemory());
945   }
946   return kTfLiteOk;
947 }
948 
OpPrepare(const TfLiteRegistration & op_reg,TfLiteNode * node)949 TfLiteStatus Subgraph::OpPrepare(const TfLiteRegistration& op_reg,
950                                  TfLiteNode* node) {
951   if (op_reg.prepare == nullptr) {
952     // Check if it's an unresolved custom op.
953     if (IsUnresolvedCustomOp(op_reg)) {
954       if (IsFlexOp(op_reg.custom_name)) {
955         ReportError(
956             "Select TensorFlow op(s), included in the given model, is(are) not "
957             "supported by this interpreter. Make sure you apply/link the Flex "
958             "delegate before inference. For the Android, it can be resolved by "
959             "adding \"org.tensorflow:tensorflow-lite-select-tf-ops\" "
960             "dependency. See instructions: "
961             "https://www.tensorflow.org/lite/guide/ops_select");
962       } else {
963         ReportError(
964             "Encountered unresolved custom op: %s.\nSee instructions: "
965             "https://www.tensorflow.org/lite/guide/ops_custom",
966             op_reg.custom_name ? op_reg.custom_name : "UnknownOp");
967       }
968       return kTfLiteError;
969     }
970     // Resolved ops can have a null Prepare function.
971     return kTfLiteOk;
972   }
973   return op_reg.prepare(&context_, node);
974 }
975 
PrepareOpsStartingAt(int first_execution_plan_index,const std::vector<int> & execution_plan,int * last_execution_plan_index_prepared)976 TfLiteStatus Subgraph::PrepareOpsStartingAt(
977     int first_execution_plan_index, const std::vector<int>& execution_plan,
978     int* last_execution_plan_index_prepared) {
979   if (first_execution_plan_index == 0) {
980     // Forwarding inputs without modification won't be not evaluated in the
981     // operators. So, it needs to look up the subgraph's output tensors at the
982     // beginning.
983     has_dynamic_tensors_ = HasDynamicTensorImpl(context_, outputs());
984   }
985   for (int execution_plan_index = first_execution_plan_index;
986        execution_plan_index < execution_plan.size(); execution_plan_index++) {
987     int node_index = execution_plan[execution_plan_index];
988     TfLiteNode& node = nodes_and_registration_[node_index].first;
989     const TfLiteRegistration& registration =
990         nodes_and_registration_[node_index].second;
991     EnsureTensorsVectorCapacity();
992     if (OpPrepare(registration, &node) != kTfLiteOk) {
993       return ReportOpError(&context_, node, registration, node_index,
994                            "failed to prepare");
995     }
996 
997     *last_execution_plan_index_prepared = execution_plan_index;
998 
999     // Discontinue if the node has dynamic outputs. Note that we don't
1000     // stop for dynamic temporary tensors since they won't affect the
1001     // sizes of other tensors in the graph.
1002     if (HasDynamicTensor(context_, node.outputs)) {
1003       has_dynamic_tensors_ = true;
1004       return kTfLiteOk;
1005     }
1006   }
1007   return kTfLiteOk;
1008 }
1009 
PrepareOpsAndTensors()1010 TfLiteStatus Subgraph::PrepareOpsAndTensors() {
1011   if (!memory_planner_) {
1012 #ifdef TFLITE_USE_SIMPLE_MEMORY_PLANNER
1013     memory_planner_.reset(new SimplePlanner(&context_, CreateGraphInfo()));
1014 #else
1015     memory_planner_.reset(new ArenaPlanner(&context_, CreateGraphInfo(),
1016                                            preserve_all_tensors_,
1017                                            kDefaultTensorAlignment));
1018 #endif
1019     memory_planner_->PlanAllocations();
1020   }
1021 
1022   // Prepare original execution plan if any applied delegate wants it.
1023   // If any of the delegates is immutable, this won't be triggered
1024   // post-delegation (since we undo/redo delegation). For all other cases, other
1025   // delegates that do shape propagation themselves would still be able to.
1026   bool prepare_original_plan = false;
1027   if (!pre_delegation_execution_plan_.empty()) {
1028     for (int i = 0; i < delegates_applied_.size(); ++i) {
1029       if ((delegates_applied_[i]->flags &
1030            kTfLiteDelegateFlagsRequirePropagatedShapes)) {
1031         prepare_original_plan = true;
1032         break;
1033       }
1034     }
1035   }
1036   if (prepare_original_plan) {
1037     int last_original_exec_plan_index_prepared = 0;
1038     TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1039         next_execution_plan_index_to_prepare_, pre_delegation_execution_plan_,
1040         &last_original_exec_plan_index_prepared));
1041     next_original_execution_plan_index_to_prepare_ =
1042         last_original_exec_plan_index_prepared + 1;
1043   }
1044 
1045   int last_exec_plan_index_prepared = 0;
1046   TF_LITE_ENSURE_STATUS(
1047       PrepareOpsStartingAt(next_execution_plan_index_to_prepare_,
1048                            execution_plan_, &last_exec_plan_index_prepared));
1049   next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
1050 
1051   // Execute arena allocations.
1052   TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
1053       next_execution_plan_index_to_plan_allocation_,
1054       last_exec_plan_index_prepared));
1055 
1056   // Ensure custom allocations are large enough for applicable tensors.
1057   // This causes some extra validations for cases with dynamic tensors, but the
1058   // overhead should be minimal since the number of custom-allocated tensors
1059   // will typically be low.
1060   for (int i = 0; i < custom_allocations_.size(); ++i) {
1061     auto index_and_alloc = custom_allocations_[i];
1062     TfLiteTensor* tensor_at_index = tensor(index_and_alloc.first);
1063     const auto& alloc = index_and_alloc.second;
1064     TF_LITE_ENSURE_EQ(context(), tensor_at_index->allocation_type,
1065                       kTfLiteCustom);
1066     if (alloc.bytes < tensor_at_index->bytes) {
1067       ReportError("Custom allocation is too small for tensor idx: %d",
1068                   index_and_alloc.first);
1069       return kTfLiteError;
1070     }
1071   }
1072 
1073   next_execution_plan_index_to_plan_allocation_ =
1074       last_exec_plan_index_prepared + 1;
1075 
1076   return kTfLiteOk;
1077 }
1078 
Invoke()1079 TfLiteStatus Subgraph::Invoke() {
1080   if (!consistent_) {
1081     ReportError("Invoke called on model that is not consistent.");
1082     return kTfLiteError;
1083   }
1084 
1085   TfLiteStatus status = kTfLiteOk;
1086   if (state_ == kStateUninvokable) {
1087     ReportError("Invoke called on model that is not ready.");
1088     return kTfLiteError;
1089   } else if (memory_planner_ && !memory_planner_->HasNonPersistentMemory()) {
1090     ReportError("Non-persistent memory is not available.");
1091     return kTfLiteError;
1092   }
1093   TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), "Invoke");
1094 
1095   // Invocations are always done in node order.
1096   // Note that calling Invoke repeatedly will cause the original memory plan to
1097   // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
1098   // called.
1099   for (int execution_plan_index = 0;
1100        execution_plan_index < execution_plan_.size(); execution_plan_index++) {
1101     if (execution_plan_index == next_execution_plan_index_to_prepare_) {
1102       TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
1103       TF_LITE_ENSURE(&context_, next_execution_plan_index_to_prepare_ >=
1104                                     execution_plan_index);
1105     }
1106     int node_index = execution_plan_[execution_plan_index];
1107     TfLiteNode& node = nodes_and_registration_[node_index].first;
1108     const TfLiteRegistration& registration =
1109         nodes_and_registration_[node_index].second;
1110 
1111     const char* op_name = nullptr;
1112     if (profiler_) op_name = GetTFLiteOpName(registration);
1113     TFLITE_SCOPED_TAGGED_OPERATOR_PROFILE(profiler_.get(), op_name, node_index);
1114 
1115     for (int i = 0; i < node.inputs->size; ++i) {
1116       int tensor_index = node.inputs->data[i];
1117       if (tensor_index == kTfLiteOptionalTensor) {
1118         continue;
1119       }
1120       TfLiteTensor* tensor = &tensors_[tensor_index];
1121       if (tensor->delegate && tensor->delegate != node.delegate &&
1122           tensor->data_is_stale) {
1123         TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
1124       }
1125       if (tensor->data.raw == nullptr && tensor->bytes > 0) {
1126         if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 &&
1127             tensor->dims->size != 1) {
1128           // In general, having a tensor here with no buffer will be an error.
1129           // However, for the reshape operator, the second input tensor is
1130           // sometimes only used for the shape, not for the data. Thus, null
1131           // buffer is ok in this situation.
1132           // The situation where null buffer is not ok for reshape operator is
1133           // only when there are 2 inputs given to the node and the one
1134           // corresponding to the shape (i == 1) is a vector that contains all
1135           // dimensions. See `GetOutputShape()` function in
1136           // `tensorflow/lite/kernels/reshape.cc`
1137           continue;
1138         } else {
1139           // In all other cases, we need to return an error as otherwise we will
1140           // trigger a null pointer dereference (likely).
1141           ReportError("Input tensor %d lacks data", tensor_index);
1142           return kTfLiteError;
1143         }
1144       }
1145     }
1146 
1147     if (check_cancelled_func_ != nullptr &&
1148         check_cancelled_func_(cancellation_data_)) {
1149       ReportError("Client requested cancel during Invoke()");
1150       return kTfLiteError;
1151     }
1152 
1153     EnsureTensorsVectorCapacity();
1154     tensor_resized_since_op_invoke_ = false;
1155     if (OpInvoke(registration, &node) != kTfLiteOk) {
1156       return ReportOpError(&context_, node, registration, node_index,
1157                            "failed to invoke");
1158     }
1159 
1160     // Force execution prep for downstream ops if the latest op triggered the
1161     // resize of a dynamic tensor.
1162     if (tensor_resized_since_op_invoke_ &&
1163         HasDynamicTensor(context_, node.outputs)) {
1164       next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
1165 
1166       // This happens when an intermediate dynamic tensor is resized.
1167       // We don't have to prepare all the ops, but we need to recompute
1168       // the allocation plan.
1169       if (next_execution_plan_index_to_plan_allocation_ >
1170           next_execution_plan_index_to_prepare_) {
1171         next_execution_plan_index_to_plan_allocation_ =
1172             next_execution_plan_index_to_prepare_;
1173         if (memory_planner_) {
1174           TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocationsAfter(
1175               next_execution_plan_index_to_plan_allocation_ - 1));
1176         }
1177       }
1178     }
1179   }
1180 
1181   return status;
1182 }
1183 
ResizeTensor(TfLiteContext * context,TfLiteTensor * tensor,TfLiteIntArray * new_size)1184 TfLiteStatus Subgraph::ResizeTensor(TfLiteContext* context,
1185                                     TfLiteTensor* tensor,
1186                                     TfLiteIntArray* new_size) {
1187   // If the dimensions don't change, avoiding
1188   // unnecessary (re)allocations.
1189   //
1190   // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
1191   // the subgraph won't allocate memory for a dynamic tensor when its size
1192   // is equal to the original tensor size.
1193   if (tensor->data.raw != nullptr &&
1194       EqualArrayAndTfLiteIntArray(tensor->dims, new_size->size,
1195                                   new_size->data)) {
1196     // A number of clients assume |new_size| remains valid upon success, so
1197     // swap it in as the new (but logically identical) tensor dims.
1198     TfLiteIntArrayFree(tensor->dims);
1199     tensor->dims = new_size;
1200     return kTfLiteOk;
1201   }
1202 
1203   // Note here that context->impl_ is recovering the this pointer for an
1204   // instance of Interpreter to call into the member function ResizeTensorImpl
1205   // (this function is static).
1206   return static_cast<Subgraph*>(context->impl_)
1207       ->ResizeTensorImpl(tensor, new_size);
1208 }
1209 
ReportErrorImpl(const char * format,va_list args)1210 void Subgraph::ReportErrorImpl(const char* format, va_list args) {
1211   error_reporter_->Report(format, args);
1212 }
1213 
ReportErrorC(TfLiteContext * context,const char * format,...)1214 void Subgraph::ReportErrorC(TfLiteContext* context, const char* format, ...) {
1215   va_list args;
1216   va_start(args, format);
1217   auto* f = static_cast<Subgraph*>(context->impl_);
1218   // Note here that context->impl_ is recovering the this pointer for an
1219   // instance of Subgraph to call into the member function ReportErrorImpl
1220   // (this function is static).
1221   f->ReportErrorImpl(format, args);
1222   va_end(args);
1223 }
1224 
1225 // Entry point for C node plugin API to report an error.
ReportError(const char * format,...)1226 void Subgraph::ReportError(const char* format, ...) {
1227   va_list args;
1228   va_start(args, format);
1229   auto* f = static_cast<Subgraph*>(context_.impl_);
1230   // Note here that context->impl_ is recovering the this pointer for an
1231   // instance of Subgraph to call into the member function ReportErrorImpl
1232   // (this function is static).
1233   f->ReportErrorImpl(format, args);
1234   va_end(args);
1235 }
1236 
AddTensors(int tensors_to_add,int * first_new_tensor_index)1237 TfLiteStatus Subgraph::AddTensors(int tensors_to_add,
1238                                   int* first_new_tensor_index) {
1239   const size_t base_index = tensors_.size();
1240   if (first_new_tensor_index) *first_new_tensor_index = base_index;
1241   tensors_.resize(tensors_.size() + tensors_to_add);
1242   for (size_t i = base_index; i < tensors_.size(); i++) {
1243     memset(&tensors_[i], 0, sizeof(tensors_[i]));
1244     tensors_[i].buffer_handle = kTfLiteNullBufferHandle;
1245   }
1246   context_.tensors = tensors_.data();
1247   context_.tensors_size = tensors_.size();
1248   return kTfLiteOk;
1249 }
1250 
AddTensors(TfLiteContext * context,int tensors_to_add,int * first_new_tensor_index)1251 TfLiteStatus Subgraph::AddTensors(TfLiteContext* context, int tensors_to_add,
1252                                   int* first_new_tensor_index) {
1253   // Note here that context->impl_ is recovering the this pointer for an
1254   // instance of Interpreter to call into the member function AddTensors
1255   // (this function is static).
1256   return static_cast<Subgraph*>(context->impl_)
1257       ->AddTensors(tensors_to_add, first_new_tensor_index);
1258 }
1259 
GetNodeAndRegistration(int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)1260 TfLiteStatus Subgraph::GetNodeAndRegistration(
1261     int node_index, TfLiteNode** node, TfLiteRegistration** registration) {
1262   TF_LITE_ENSURE(&context_, node_index >= 0);
1263   auto nodes_size = nodes_and_registration_.size();
1264   TF_LITE_ENSURE(&context_, static_cast<size_t>(node_index) < nodes_size);
1265   TF_LITE_ENSURE(&context_, node != nullptr && registration != nullptr);
1266   auto& node_and_reg = nodes_and_registration_[node_index];
1267   *node = &node_and_reg.first;
1268   *registration = &node_and_reg.second;
1269   return kTfLiteOk;
1270 }
1271 
GetNodeAndRegistration(struct TfLiteContext * context,int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)1272 TfLiteStatus Subgraph::GetNodeAndRegistration(
1273     struct TfLiteContext* context, int node_index, TfLiteNode** node,
1274     TfLiteRegistration** registration) {
1275   return static_cast<Subgraph*>(context->impl_)
1276       ->GetNodeAndRegistration(node_index, node, registration);
1277 }
1278 
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation,TfLiteSparsity * sparsity)1279 TfLiteStatus Subgraph::SetTensorParametersReadOnly(
1280     int tensor_index, TfLiteType type, const char* name, const size_t rank,
1281     const int* dims, TfLiteQuantization quantization, const char* buffer,
1282     size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity) {
1283   // Ensure quantization cleanup on failure.
1284   ScopedTfLiteQuantization scoped_quantization(&quantization);
1285   ScopedTfLiteSparsity scoped_sparsity(sparsity);
1286   if (state_ == kStateInvokableAndImmutable) {
1287     ReportError(
1288         "SetTensorParametersReadOnly is disallowed when graph is immutable.");
1289     return kTfLiteError;
1290   }
1291 
1292   TF_LITE_ENSURE(&context_,
1293                  tensor_index < context_.tensors_size && tensor_index >= 0);
1294 
1295   // For most tensors we know exactly how much memory is necessary so we can
1296   // ensure the buffer is large enough. However, we need to skip string tensors
1297   // and sparse tensors because their sizes change with the contents.
1298   // TODO(b/145615516): Extend BytesRequired to check sparse tensors.
1299   if (type != kTfLiteString && type != kTfLiteResource &&
1300       type != kTfLiteVariant && sparsity == nullptr) {
1301     size_t required_bytes;
1302     TF_LITE_ENSURE_OK(&context_,
1303                       BytesRequired(type, dims, rank, &required_bytes));
1304     TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes);
1305   }
1306 
1307   TfLiteTensor& tensor = context_.tensors[tensor_index];
1308   if (type == tensor.type &&
1309       EqualArrayAndTfLiteIntArray(tensor.dims, rank, dims)) {
1310     // Fast path which does not invalidate the invokable property.
1311     TfLiteTensorDataFree(&tensor);
1312     TfLiteQuantizationFree(&tensor.quantization);
1313     tensor.data.raw = const_cast<char*>(buffer);
1314     if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(rank, dims);
1315     tensor.params = GetLegacyQuantization(quantization);
1316     tensor.quantization = *scoped_quantization.release();
1317     tensor.sparsity = scoped_sparsity.release();
1318     tensor.allocation_type = kTfLiteMmapRo;
1319     tensor.allocation = allocation;
1320   } else {
1321     state_ = kStateUninvokable;
1322     TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
1323                       GetLegacyQuantization(quantization),
1324                       const_cast<char*>(buffer), bytes, kTfLiteMmapRo,
1325                       allocation, false, &tensor);
1326     // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
1327     // if there are other required callers.
1328     tensor.quantization = *scoped_quantization.release();
1329     tensor.sparsity = scoped_sparsity.release();
1330   }
1331   return kTfLiteOk;
1332 }
1333 
1334 // Set description of inputs/outputs/data/fptrs for node `node_index`.
1335 // This variant assumes an external buffer has been allocated of size
1336 // bytes. The lifetime of buffer must be ensured to be greater or equal
1337 // to Interpreter.
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,bool is_variable,const size_t rank_dims_signature,const int * dims_signature)1338 TfLiteStatus Subgraph::SetTensorParametersReadWrite(
1339     int tensor_index, TfLiteType type, const char* name, const size_t rank,
1340     const int* dims, TfLiteQuantization quantization, bool is_variable,
1341     const size_t rank_dims_signature, const int* dims_signature) {
1342   // Ensure quantization cleanup on failure.
1343   ScopedTfLiteQuantization scoped_quantization(&quantization);
1344   if (state_ == kStateInvokableAndImmutable) {
1345     ReportError(
1346         "SetTensorParametersReadWrite is disallowed when graph is immutable.");
1347     return kTfLiteError;
1348   }
1349   TF_LITE_ENSURE(&context_,
1350                  tensor_index < context_.tensors_size && tensor_index >= 0);
1351   size_t required_bytes = 0;
1352   if (type != kTfLiteString && type != kTfLiteResource &&
1353       type != kTfLiteVariant) {
1354     // These types will be allocated in our arena so we need to record how
1355     // many bytes we will need based on the dimensions. String tensors are
1356     // allocated dynamically and we can't know ahead of time how much space
1357     // they will require.
1358     TF_LITE_ENSURE_OK(&context_,
1359                       BytesRequired(type, dims, rank, &required_bytes));
1360   }
1361 
1362   TfLiteAllocationType allocation_type = kTfLiteArenaRw;
1363   if (type == kTfLiteString || type == kTfLiteResource ||
1364       type == kTfLiteVariant) {
1365     if (is_variable) {
1366       // We don't have a real use case for string variable tensor.
1367       ReportError("String variable tensor isn't supported.");
1368       return kTfLiteError;
1369     }
1370     allocation_type = kTfLiteDynamic;
1371   } else if (is_variable) {
1372     allocation_type = kTfLiteArenaRwPersistent;
1373   }
1374 
1375   TfLiteTensor& tensor = context_.tensors[tensor_index];
1376   TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
1377                     GetLegacyQuantization(quantization),
1378                     /*buffer=*/nullptr, required_bytes, allocation_type,
1379                     nullptr, is_variable, &tensor);
1380   // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
1381   // if there are other required callers.
1382   tensor.quantization = *scoped_quantization.release();
1383   tensor.dims_signature =
1384       ConvertArrayToTfLiteIntArray(rank_dims_signature, dims_signature);
1385   return kTfLiteOk;
1386 }
1387 
SetExecutionPlan(const std::vector<int> & new_plan)1388 TfLiteStatus Subgraph::SetExecutionPlan(const std::vector<int>& new_plan) {
1389   for (int node_index : new_plan) {
1390     TF_LITE_ENSURE(&context_, node_index >= 0 &&
1391                                   node_index < nodes_and_registration_.size());
1392   }
1393   execution_plan_ = new_plan;
1394   return kTfLiteOk;
1395 }
1396 
ResizeTensorImpl(TfLiteTensor * tensor,TfLiteIntArray * new_size)1397 TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
1398                                         TfLiteIntArray* new_size) {
1399   // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too.
1400   if (tensor->allocation_type == kTfLiteArenaRw ||
1401       tensor->allocation_type == kTfLiteDynamic ||
1402       tensor->allocation_type == kTfLiteArenaRwPersistent ||
1403       tensor->allocation_type == kTfLitePersistentRo ||
1404       tensor->allocation_type == kTfLiteCustom) {
1405     tensor_resized_since_op_invoke_ |=
1406         TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
1407     if (tensor->type != kTfLiteString && tensor->type != kTfLiteResource &&
1408         tensor->type != kTfLiteVariant) {
1409       size_t bytesRequired;
1410       TfLiteStatus status = BytesRequired(tensor->type, new_size->data,
1411                                           new_size->size, &bytesRequired);
1412       if (status != kTfLiteOk) {
1413         TfLiteIntArrayFree(new_size);
1414         return kTfLiteError;
1415       }
1416 
1417       // Realloc space for heap-allocated tensors.
1418       TfLiteTensorRealloc(bytesRequired, tensor);
1419       tensor->bytes = bytesRequired;
1420     }
1421     if (tensor->dims) TfLiteIntArrayFree(tensor->dims);
1422     tensor->dims = new_size;
1423 
1424     // Reset arena-allocated tensors; they will be allocated later.
1425     if (tensor->allocation_type == kTfLiteArenaRw ||
1426         tensor->allocation_type == kTfLiteArenaRwPersistent) {
1427       tensor->data.raw = nullptr;
1428     }
1429   } else {
1430     // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore
1431     // of fixed size.
1432     TfLiteIntArrayFree(new_size);
1433     ReportError("Attempting to resize a fixed-size tensor.");
1434     return kTfLiteError;
1435   }
1436   return kTfLiteOk;
1437 }
1438 
SwitchToDelegateContext()1439 void Subgraph::SwitchToDelegateContext() {
1440   context_.GetNodeAndRegistration = GetNodeAndRegistration;
1441   context_.ReplaceNodeSubsetsWithDelegateKernels =
1442       ReplaceNodeSubsetsWithDelegateKernels;
1443   context_.GetExecutionPlan = GetExecutionPlan;
1444   context_.PreviewDelegatePartitioning = PreviewDelegatePartitioning;
1445 }
1446 
SwitchToKernelContext()1447 void Subgraph::SwitchToKernelContext() {
1448   context_.GetNodeAndRegistration = [](struct TfLiteContext* context,
1449                                        int node_index, TfLiteNode** node,
1450                                        TfLiteRegistration** registration) {
1451     return ForbiddenContextFunction(context);
1452   };
1453   context_.ReplaceNodeSubsetsWithDelegateKernels =
1454       [](TfLiteContext* context, TfLiteRegistration registration,
1455          const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
1456         return ForbiddenContextFunction(context);
1457       };
1458   context_.GetExecutionPlan = [](struct TfLiteContext* context,
1459                                  TfLiteIntArray**) {
1460     return ForbiddenContextFunction(context);
1461   };
1462   context_.PreviewDelegatePartitioning =
1463       [](struct TfLiteContext* context, const TfLiteIntArray* nodes_to_replace,
1464          TfLiteDelegateParams** partition_params_array,
1465          int* num_partitions) { return ForbiddenContextFunction(context); };
1466   // Free any memory that might have been allocated by
1467   // PreviewDelegatePartitioning.
1468   FreeDelegatePartitioningData();
1469 }
1470 
UndoAllDelegates()1471 TfLiteStatus Subgraph::UndoAllDelegates() {
1472   // Return early if there is nothing to reset to.
1473   if (pre_delegation_execution_plan_.empty()) return kTfLiteOk;
1474 
1475   // First free all delegate nodes.
1476   for (int execution_plan_index = 0;
1477        execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1478     int node_index = execution_plan_[execution_plan_index];
1479     TfLiteNode& node = nodes_and_registration_[node_index].first;
1480     if (node.delegate == nullptr) {
1481       continue;
1482     }
1483     CleanupNode(node_index);
1484   }
1485 
1486   // Reset execution plan.
1487   execution_plan_ = pre_delegation_execution_plan_;
1488   pre_delegation_execution_plan_.clear();
1489 
1490   // Handling FP16 delegation (if applies).
1491   //
1492   // First pass through execution plan to remember mapping of FP16
1493   // dequantizations in the graph.
1494   // This is required because delegates that support FP16 could remap supported
1495   // nodes' inputs to point to their fp16 versions (if delegate supports fp16
1496   // acceleration). This remapping is performed in FP16GraphPartitionHelper in
1497   // delegates/utils. We need to undo this remapping to ensure CPU kernels work.
1498   std::vector<int> fp16_to_fp32(tensors_size(), -1);
1499   for (int execution_plan_index = 0;
1500        execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1501     int node_index = execution_plan_[execution_plan_index];
1502     auto& node_and_reg = nodes_and_registration_[node_index];
1503     const TfLiteNode& node = node_and_reg.first;
1504     const TfLiteRegistration& reg = node_and_reg.second;
1505     if (reg.builtin_code == kTfLiteBuiltinDequantize &&
1506         node.inputs->size == 1 && node.outputs->size == 1) {
1507       const int input_idx = node.inputs->data[0];
1508       if (tensors_[input_idx].type == kTfLiteFloat16) {
1509         fp16_to_fp32[input_idx] = node.outputs->data[0];
1510       }
1511     }
1512   }
1513   // Second pass through the execution plan to remap applicable nodes' fp16
1514   // inputs to their original fp32 versions. Note that if a CPU kernel does
1515   // support fp16, the model will not contain a DEQUANTIZE for its constant
1516   // input.
1517   for (int execution_plan_index = 0;
1518        execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1519     int node_index = execution_plan_[execution_plan_index];
1520     auto& node_and_reg = nodes_and_registration_[node_index];
1521     const TfLiteNode& node = node_and_reg.first;
1522     const TfLiteRegistration& reg = node_and_reg.second;
1523     if (reg.builtin_code == kTfLiteBuiltinDequantize) continue;
1524     for (int i = 0; i < node.inputs->size; ++i) {
1525       const int original_input_idx = node.inputs->data[i];
1526       if (original_input_idx == kTfLiteOptionalTensor) continue;
1527       if (tensors_[original_input_idx].type == kTfLiteFloat16) {
1528         node.inputs->data[i] = fp16_to_fp32[original_input_idx];
1529       }
1530     }
1531   }
1532 
1533   // Delegate nodes are appended to nodes_and_registration_. Therefore,
1534   // cleanup nodes_and_registration_ to only contain nodes from
1535   // pre_delegation_execution_plan_.
1536   int max_retained_node_index = 0;
1537   for (int execution_plan_index = 0;
1538        execution_plan_index < execution_plan_.size(); ++execution_plan_index) {
1539     max_retained_node_index = std::max(max_retained_node_index,
1540                                        execution_plan_[execution_plan_index]);
1541   }
1542   nodes_and_registration_.resize(max_retained_node_index + 1);
1543   // After undoing delegates, the graph is uninvokable, but mutable.
1544   state_ = kStateUninvokable;
1545 
1546   delegates_undone_ = true;
1547   return kTfLiteOk;
1548 }
1549 
RedoAllDelegates()1550 TfLiteStatus Subgraph::RedoAllDelegates() {
1551   if (!delegates_undone_) return kTfLiteOk;
1552 
1553   delegates_undone_ = false;
1554   std::vector<TfLiteDelegate*> delegates_to_apply;
1555   delegates_applied_.swap(delegates_to_apply);
1556   for (auto* delegate : delegates_to_apply) {
1557     TF_LITE_ENSURE_STATUS(ModifyGraphWithDelegate(delegate));
1558   }
1559   return kTfLiteOk;
1560 }
1561 
RemoveAllDelegates()1562 TfLiteStatus Subgraph::RemoveAllDelegates() {
1563   TF_LITE_ENSURE_STATUS(UndoAllDelegates());
1564   delegates_applied_.clear();
1565   delegates_undone_ = false;
1566   TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1567   return kTfLiteOk;
1568 }
1569 
HasDelegates()1570 bool Subgraph::HasDelegates() { return !delegates_applied_.empty(); }
1571 
EnsureTensorsVectorCapacity()1572 void Subgraph::EnsureTensorsVectorCapacity() {
1573   const size_t required_capacity = tensors_.size() + kTensorsCapacityHeadroom;
1574   if (required_capacity > tensors_.capacity()) {
1575     // Whenever it's required to increase the vector capacity, make it at
1576     // least twice bigger. The behavior is consistent with the default
1577     // behavior of GCC STL's `std::vector::resize()`. This avoids frequently
1578     // allocating and copying the underlying buffer.
1579     size_t reserved_capacity =
1580         std::max(required_capacity, tensors_.capacity() * 2);
1581     tensors_.reserve(reserved_capacity);
1582     context_.tensors = tensors_.data();
1583   }
1584 }
1585 
EnsureMemoryAllocations()1586 TfLiteStatus Subgraph::EnsureMemoryAllocations() {
1587   if (memory_planner_) {
1588     state_ = kStateUninvokable;
1589     TF_LITE_ENSURE_OK(&context_, memory_planner_->PlanAllocations());
1590   }
1591   TF_LITE_ENSURE_OK(&context_, AllocateTensors());
1592   TF_LITE_ENSURE_EQ(&context_, state_, kStateInvokable);
1593   return kTfLiteOk;
1594 }
1595 
ModifyGraphWithDelegate(TfLiteDelegate * delegate)1596 TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
1597   TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(),
1598                                        "ModifyGraphWithDelegate");
1599 
1600   if (delegate == nullptr) {
1601     ReportError("Null delegate.");
1602     return kTfLiteDelegateError;
1603   }
1604 
1605   // Resets delegation & leaves graph in consistent state if delegate status is
1606   // not okay.
1607   auto reset_delegation_if_not_ok = [this](TfLiteStatus status) {
1608     if (status != kTfLiteOk) {
1609       TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1610       ReportError(
1611           "Restored original execution plan after delegate application "
1612           "failure.");
1613       return kTfLiteDelegateError;
1614     }
1615     return kTfLiteOk;
1616   };
1617 
1618   // STEP 1: Verify & prepare graph for delegation.
1619   // ==============================================
1620 
1621   // Restore delegation state if applicable.
1622   TF_LITE_ENSURE_STATUS(RedoAllDelegates());
1623 
1624   const bool delegate_supports_dynamic_shapes =
1625       delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors;
1626   const auto pre_delegation_state = state_;
1627 
1628   if (state_ == kStateInvokableAndImmutable) {
1629     // A delegate that doesn't support dynamic shapes was already applied, so
1630     // we can assume tensor shapes have been propagated & there are no dynamic
1631     // tensors.
1632     // Reset the state to force tensor/op reallocation.
1633     state_ = kStateUninvokable;
1634   } else if (!delegate_supports_dynamic_shapes) {
1635     // Check if graph has dynamic tensors by preparing ops.
1636     int last_execution_plan_index_prepared;
1637     TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
1638         0, execution_plan_, &last_execution_plan_index_prepared));
1639     if (has_dynamic_tensors_) {
1640       TF_LITE_ENSURE_STATUS(EnsureMemoryAllocations());
1641       ReportError(
1642           "Attempting to use a delegate that only supports static-sized "
1643           "tensors with a graph that has dynamic-sized tensors.");
1644       return kTfLiteApplicationError;
1645     }
1646   }
1647 
1648   if (delegates_applied_.empty()) {
1649     // This is the first delegate being applied, so remember original execution
1650     // plan.
1651     // TODO(b/119623453): Restore execution plan to this state if delegate
1652     // application fails.
1653     pre_delegation_execution_plan_ = execution_plan_;
1654   }
1655 
1656   // STEP 2: Delegate replaces applicable nodes with delegate kernels.
1657   // =================================================================
1658 
1659   // Setup additional context interface.
1660   SwitchToDelegateContext();
1661   TfLiteStatus status = delegate->Prepare(&context_, delegate);
1662   // Remove additional context info.
1663   SwitchToKernelContext();
1664   TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(status));
1665 
1666   // STEP 3: Leave graph in consistent state based on delegate & previous state.
1667   // ===========================================================================
1668 
1669   if (!delegate_supports_dynamic_shapes) {
1670     // CASE 1: Current delegate does not support dynamic shapes.
1671     // Reset the state to force tensor/op reallocation.
1672     state_ = kStateUninvokable;
1673     TF_LITE_ENSURE_STATUS(
1674         reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1675     // After using a delegate which doesn't support dynamic tensors, make the
1676     // entire graph immutable.
1677     state_ = kStateInvokableAndImmutable;
1678   } else if (pre_delegation_state == kStateInvokableAndImmutable) {
1679     // CASE 2: Current delegate supports dynamic shapes, but a previous one
1680     // does not.
1681     // Make sure new delegate didn't mark a tensor as dynamic.
1682     int last_execution_plan_index_prepared;
1683     TF_LITE_ENSURE_STATUS(reset_delegation_if_not_ok(PrepareOpsStartingAt(
1684         0, execution_plan_, &last_execution_plan_index_prepared)));
1685     if (has_dynamic_tensors_) {
1686       TF_LITE_ENSURE_STATUS(RemoveAllDelegates());
1687       ReportError(
1688           "Cannot allow dynamic tensors due to previous delegation, resetting "
1689           "to original execution plan.");
1690       return kTfLiteApplicationError;
1691     }
1692     // Redo memory allocations & ensure state is set back to original value.
1693     TF_LITE_ENSURE_STATUS(
1694         reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1695     state_ = kStateInvokableAndImmutable;
1696   } else if (pre_delegation_state == kStateInvokable) {
1697     // CASE 3: Current delegate supports dynamic shapes, and the graph was
1698     // previously invokable.
1699     // Flush allocation now to leave it in a consistent state.
1700     TF_LITE_ENSURE_STATUS(
1701         reset_delegation_if_not_ok(EnsureMemoryAllocations()));
1702   }
1703   delegates_applied_.push_back(delegate);
1704 
1705   return status;
1706 }
1707 
SetCustomAllocationForTensor(int tensor_index,const TfLiteCustomAllocation & allocation,int64_t flags)1708 TfLiteStatus Subgraph::SetCustomAllocationForTensor(
1709     int tensor_index, const TfLiteCustomAllocation& allocation, int64_t flags) {
1710   TfLiteTensor* tensor = &context_.tensors[tensor_index];
1711   TF_LITE_ENSURE(context(),
1712                  (tensor->allocation_type == kTfLiteArenaRw ||
1713                   tensor->allocation_type == kTfLiteArenaRwPersistent ||
1714                   tensor->allocation_type == kTfLiteCustom));
1715   // Don't check allocation.bytes here, we do that after all ops are prepared
1716   // to allow tensor shape propagation.
1717   TF_LITE_ENSURE(context(), allocation.data != nullptr);
1718   if (!(flags & kTfLiteCustomAllocationFlagsSkipAlignCheck)) {
1719     const intptr_t data_ptr_value = reinterpret_cast<intptr_t>(allocation.data);
1720     TF_LITE_ENSURE(context(), data_ptr_value % kDefaultTensorAlignment == 0);
1721   }
1722 
1723   // If tensor already has a custom alloc, just reassign.
1724   const auto alloc_it = std::find_if(
1725       custom_allocations_.begin(), custom_allocations_.end(),
1726       [tensor_index](
1727           const std::pair<int, TfLiteCustomAllocation>& existing_alloc) {
1728         return existing_alloc.first == tensor_index;
1729       });
1730   if (alloc_it == custom_allocations_.end()) {
1731     custom_allocations_.emplace_back(tensor_index, allocation);
1732   } else {
1733     // If tensor already has a custom alloc, just reassign.
1734     alloc_it->second = allocation;
1735   }
1736 
1737   tensor->allocation_type = kTfLiteCustom;
1738   tensor->data.data = allocation.data;
1739 
1740   return kTfLiteOk;
1741 }
1742 
SetName(const char * name)1743 void Subgraph::SetName(const char* name) {
1744   if (name) {
1745     name_ = name;
1746   } else {
1747     name_ = "";
1748   }
1749 }
1750 
GetName() const1751 const std::string& Subgraph::GetName() const { return name_; }
1752 
DumpMemoryPlannerDebugInfo() const1753 void Subgraph::DumpMemoryPlannerDebugInfo() const {
1754   if (memory_planner_ == nullptr) return;
1755   memory_planner_->DumpDebugInfo(execution_plan());
1756 }
1757 
PreserveAllTensorsExperimental()1758 TfLiteStatus Subgraph::PreserveAllTensorsExperimental() {
1759   if (memory_planner_) {
1760     ReportError(
1761         "PreserveAllTensorsExperimental called after memory was planned. ");
1762     return kTfLiteError;
1763   }
1764   preserve_all_tensors_ = true;
1765   return kTfLiteOk;
1766 }
1767 
CreateGraphInfo()1768 std::unique_ptr<GraphInfo> Subgraph::CreateGraphInfo() {
1769   return std::unique_ptr<GraphInfo>(new InterpreterInfo(this));
1770 }
1771 
1772 }  // namespace tflite
1773