1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/core/subgraph.h"
17 #include "tensorflow/lite/arena_planner.h"
18 #include "tensorflow/lite/c/c_api_internal.h"
19 #include "tensorflow/lite/context_util.h"
20 #include "tensorflow/lite/graph_info.h"
21 #include "tensorflow/lite/nnapi_delegate.h"
22 #include "tensorflow/lite/schema/schema_generated.h"
23
24 namespace tflite {
25
26 namespace {
ReportOpError(TfLiteContext * context,const TfLiteNode & node,const TfLiteRegistration & registration,int node_index,const char * message)27 TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node,
28 const TfLiteRegistration& registration,
29 int node_index, const char* message) {
30 context->ReportError(
31 context, "Node number %d (%s) %s.\n", node_index,
32 registration.custom_name
33 ? registration.custom_name
34 : EnumNameBuiltinOperator(
35 static_cast<BuiltinOperator>(registration.builtin_code)),
36 message);
37 return kTfLiteError;
38 }
39
40 // Stub method which returns kTfLiteError when the function is forbidden.
41 // We're registrating this function to several different function to save
42 // compiled binary size. Please note the restrictions:
43 // * The type of first parameter have to be `TfLiteContext*`.
44 // * All paramteters must be trivailly destructible. (E.g. No C++ class)
ForbiddenContextFunction(TfLiteContext * context,...)45 TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) {
46 context->ReportError(context,
47 "The function is forbidden if not calling in delegate.");
48 return kTfLiteError;
49 }
50
51 // Set the ForbiddenContextFunction to a compatible function pointer.
52 template <typename FunctionType>
SetForbiddenContextFunction(FunctionType * func)53 void SetForbiddenContextFunction(FunctionType* func) {
54 *func = reinterpret_cast<FunctionType>(ForbiddenContextFunction);
55 }
56
57 // Returns true if at least one tensor in the given list is kTfLiteDynamic.
58 template <typename TensorIntArray>
HasDynamicTensorImpl(const TfLiteContext & context,const TensorIntArray & int_array)59 bool HasDynamicTensorImpl(const TfLiteContext& context,
60 const TensorIntArray& int_array) {
61 for (int i : int_array) {
62 const TfLiteTensor& tensor = context.tensors[i];
63 if (tensor.allocation_type == kTfLiteDynamic) {
64 return true;
65 }
66 }
67 return false;
68 }
69
HasDynamicTensor(const TfLiteContext & context,const TfLiteIntArray * int_array)70 bool HasDynamicTensor(const TfLiteContext& context,
71 const TfLiteIntArray* int_array) {
72 return HasDynamicTensorImpl(context, TfLiteIntArrayView{int_array});
73 }
74
75 // Gets the legacy TfLiteQuantizationParams from the current TfLiteQuantization.
GetLegacyQuantization(const TfLiteQuantization & quantization)76 TfLiteQuantizationParams GetLegacyQuantization(
77 const TfLiteQuantization& quantization) {
78 TfLiteQuantizationParams legacy_quantization;
79 legacy_quantization.scale = 0;
80 legacy_quantization.zero_point = 0;
81
82 // If the quantization type isn't affine, return the empty
83 // legacy_quantization.
84 if (quantization.type != kTfLiteAffineQuantization) {
85 return legacy_quantization;
86 }
87
88 auto* affine_quantization =
89 reinterpret_cast<TfLiteAffineQuantization*>(quantization.params);
90 if (!affine_quantization || !affine_quantization->scale ||
91 !affine_quantization->zero_point ||
92 affine_quantization->scale->size != 1 ||
93 affine_quantization->zero_point->size != 1) {
94 return legacy_quantization;
95 }
96
97 // We know its per-layer quantization now.
98 legacy_quantization.scale = affine_quantization->scale->data[0];
99 legacy_quantization.zero_point = affine_quantization->zero_point->data[0];
100 return legacy_quantization;
101 }
102
103 } // namespace
104
105 // A trivial implementation of GraphInfo around the Interpreter.
106 // NOTE: this interpreter info represents the subset of the
107 // graph that is executed according to execution plan. Thus,
108 // the indices are execution plan indices rather than raw node
109 // indices.
110 class InterpreterInfo : public GraphInfo {
111 public:
InterpreterInfo(Subgraph * subgraph)112 explicit InterpreterInfo(Subgraph* subgraph) : subgraph_(subgraph) {}
113
num_tensors() const114 size_t num_tensors() const override { return subgraph_->tensors().size(); }
tensor(size_t index)115 TfLiteTensor* tensor(size_t index) override {
116 return &subgraph_->tensors()[index];
117 }
num_nodes() const118 size_t num_nodes() const override {
119 return subgraph_->execution_plan().size();
120 }
node(size_t index) const121 const TfLiteNode& node(size_t index) const override {
122 int node_index = subgraph_->execution_plan()[index];
123 return subgraph_->nodes_and_registration()[node_index].first;
124 }
inputs() const125 const std::vector<int>& inputs() const override {
126 return subgraph_->inputs();
127 }
outputs() const128 const std::vector<int>& outputs() const override {
129 return subgraph_->outputs();
130 }
variables() const131 const std::vector<int>& variables() const override {
132 return subgraph_->variables();
133 }
134
135 public:
136 Subgraph* subgraph_;
137 };
138
Subgraph(ErrorReporter * error_reporter,TfLiteExternalContext ** external_contexts,std::vector<std::unique_ptr<Subgraph>> * subgraphs)139 Subgraph::Subgraph(ErrorReporter* error_reporter,
140 TfLiteExternalContext** external_contexts,
141 std::vector<std::unique_ptr<Subgraph>>* subgraphs)
142 : context_(&owned_context_),
143 error_reporter_(error_reporter),
144 next_execution_plan_index_to_prepare_(0),
145 external_contexts_(external_contexts),
146 subgraphs_(subgraphs) {
147 context_->impl_ = static_cast<void*>(this);
148 context_->ResizeTensor = ResizeTensor;
149 context_->ReportError = ReportErrorC;
150 context_->AddTensors = AddTensors;
151 context_->tensors = nullptr;
152 context_->tensors_size = 0;
153 context_->allow_fp32_relax_to_fp16 = false;
154 context_->recommended_num_threads = -1;
155 context_->GetExternalContext = GetExternalContext;
156 context_->SetExternalContext = SetExternalContext;
157 context_->profiler = nullptr;
158
159 // Reserve some space for the tensors to avoid excessive resizing.
160 tensors_.reserve(kTensorsReservedCapacity);
161 nodes_and_registration().reserve(kTensorsReservedCapacity);
162 // Invalid to call these these except from TfLiteDelegate
163 SwitchToKernelContext();
164 }
165
~Subgraph()166 Subgraph::~Subgraph() {
167 for (auto& node_and_reg : nodes_and_registration_) {
168 TfLiteNode& node = node_and_reg.first;
169 TfLiteIntArrayFree(node.inputs);
170 TfLiteIntArrayFree(node.outputs);
171 TfLiteIntArrayFree(node.temporaries);
172 if (node.builtin_data) free(node.builtin_data);
173 OpFree(node_and_reg.second, node.user_data);
174 node.builtin_data = nullptr;
175 }
176
177 for (size_t i = 0; i < context_->tensors_size; i++) {
178 TfLiteTensor* tensor = &context_->tensors[i];
179 if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
180 tensor->delegate->FreeBufferHandle != nullptr) {
181 tensor->delegate->FreeBufferHandle(context_, tensor->delegate,
182 &tensor->buffer_handle);
183 }
184 TfLiteTensorFree(tensor);
185 }
186 }
187
ReplaceNodeSubsetsWithDelegateKernels(TfLiteContext * context,TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)188 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
189 TfLiteContext* context, TfLiteRegistration registration,
190 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
191 return static_cast<Subgraph*>(context->impl_)
192 ->ReplaceNodeSubsetsWithDelegateKernels(registration, nodes_to_replace,
193 delegate);
194 }
195
196 namespace {
197
198 // Copy a std::vector<int> to an existing TfLiteIntArray.
199 // This is a low-level data manipulation function, and it's caller's
200 // responsibility to ensure TfLiteIntArray has enough size.
CopyVectorToTfLiteIntArray(const std::vector<int> & vec,TfLiteIntArray * arr)201 void CopyVectorToTfLiteIntArray(const std::vector<int>& vec,
202 TfLiteIntArray* arr) {
203 arr->size = vec.size();
204 memcpy(arr->data, vec.data(), sizeof(int) * arr->size);
205 }
206
207 // This function allocates a continuous memory space that contains a
208 // TfLiteDelegateParams followed by a several TfLiteIntArray.
209 // When calling `free` at TfLiteDelegateParams*, all the allocated space
210 // will be freed together.
211 //
212 // +-----------------------------------+
213 // | TfLiteDelegateParams |
214 // | TfLiteDelegate* delegate; |
215 // | TfLiteIntArray* nodes_to_replace; |--\
216 // | TfLiteIntArray* input_tensors; |--+--\
217 // | TfLiteIntArray* output_tensors; |--+--+--\
218 // +-----------------------------------+ | | |
219 // | TfLiteIntArray (variable size) |<-/ | |
220 // +-----------------------------------+ | |
221 // | TfLiteIntArray (variable size) |<----/ |
222 // +-----------------------------------+ |
223 // | TfLiteIntArray (variable size) |<-------/
224 // +-----------------------------------+
CreateDelegateParams(TfLiteDelegate * delegate,const NodeSubset & node_subset)225 TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate,
226 const NodeSubset& node_subset) {
227 // Step 1: Calculate the allocation size.
228 int allocation_size = sizeof(TfLiteDelegateParams);
229
230 int nodes_to_replace_size =
231 TfLiteIntArrayGetSizeInBytes(node_subset.nodes.size());
232 allocation_size += nodes_to_replace_size;
233
234 int input_tensors_size =
235 TfLiteIntArrayGetSizeInBytes(node_subset.input_tensors.size());
236 allocation_size += input_tensors_size;
237
238 int output_tensors_size =
239 TfLiteIntArrayGetSizeInBytes(node_subset.output_tensors.size());
240 allocation_size += output_tensors_size;
241
242 // Step 2: Allocate the memory.
243 // Use `char*` for conveniently step through the allocated space by bytes.
244 char* allocation = reinterpret_cast<char*>(malloc(allocation_size));
245
246 // Step 3: Fill all data structures structures.
247 TfLiteDelegateParams* params =
248 reinterpret_cast<TfLiteDelegateParams*>(allocation);
249 params->delegate = delegate;
250 allocation += sizeof(TfLiteDelegateParams);
251
252 params->nodes_to_replace = reinterpret_cast<TfLiteIntArray*>(allocation);
253 CopyVectorToTfLiteIntArray(node_subset.nodes, params->nodes_to_replace);
254 allocation += nodes_to_replace_size;
255
256 params->input_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
257 CopyVectorToTfLiteIntArray(node_subset.input_tensors, params->input_tensors);
258 allocation += input_tensors_size;
259
260 params->output_tensors = reinterpret_cast<TfLiteIntArray*>(allocation);
261 CopyVectorToTfLiteIntArray(node_subset.output_tensors,
262 params->output_tensors);
263 allocation += output_tensors_size;
264
265 return params;
266 }
267
268 } // namespace
269
ReplaceNodeSubsetsWithDelegateKernels(TfLiteRegistration registration,const TfLiteIntArray * nodes_to_replace,TfLiteDelegate * delegate)270 TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
271 TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace,
272 TfLiteDelegate* delegate) {
273 // Annotate the registration as DELEGATE op.
274 registration.builtin_code = BuiltinOperator_DELEGATE;
275
276 // Analyze the graph to find all independent node_subsets that are either
277 // fully not-this-delegate or this-delegate computation.
278 InterpreterInfo info(this);
279 std::vector<NodeSubset> node_subsets;
280 PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
281 &node_subsets);
282
283 execution_plan_.clear();
284
285 for (auto& node_subset : node_subsets) {
286 // Subsets calimed by the delegate should have a "macro" op created, the
287 // other node_subsets (kTfNonPartition) just have their nodes added back to
288 // the execution plan.
289 switch (node_subset.type) {
290 case NodeSubset::kTfNonPartition:
291 for (auto it = node_subset.nodes.begin(); it != node_subset.nodes.end();
292 ++it) {
293 execution_plan_.push_back(*it);
294 }
295 break;
296 case NodeSubset::kTfPartition: {
297 int node_index;
298
299 TfLiteDelegateParams* params =
300 CreateDelegateParams(delegate, node_subset);
301 TF_LITE_ENSURE_STATUS(AddNodeWithParameters(
302 node_subset.input_tensors, node_subset.output_tensors, nullptr, 0,
303 params, ®istration, &node_index));
304
305 // Initialize the output tensors's delegate-related fields.
306 for (int tensor_index : node_subset.output_tensors) {
307 TfLiteTensor* tensor = &tensors_[tensor_index];
308 TF_LITE_ENSURE(context_, tensor->delegate == nullptr ||
309 tensor->delegate == delegate);
310 tensor->delegate = delegate;
311 }
312
313 // Associate the node with the delegate.
314 TfLiteNode* node = &nodes_and_registration_[node_index].first;
315 node->delegate = delegate;
316 } break;
317 case NodeSubset::kTfUnexplored:
318 return kTfLiteError;
319 break;
320 }
321 }
322 return kTfLiteOk;
323 }
324
GetExternalContext(TfLiteExternalContextType type)325 TfLiteExternalContext* Subgraph::GetExternalContext(
326 TfLiteExternalContextType type) {
327 if (type >= 0 && type < kTfLiteMaxExternalContexts) {
328 return external_contexts_[type];
329 }
330 return nullptr;
331 }
332
GetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type)333 TfLiteExternalContext* Subgraph::GetExternalContext(
334 struct TfLiteContext* context, TfLiteExternalContextType type) {
335 return static_cast<Subgraph*>(context->impl_)->GetExternalContext(type);
336 }
337
SetExternalContext(TfLiteExternalContextType type,TfLiteExternalContext * ctx)338 void Subgraph::SetExternalContext(TfLiteExternalContextType type,
339 TfLiteExternalContext* ctx) {
340 if (type >= 0 && type < kTfLiteMaxExternalContexts) {
341 external_contexts_[type] = ctx;
342 }
343 }
344
SetExternalContext(struct TfLiteContext * context,TfLiteExternalContextType type,TfLiteExternalContext * ctx)345 void Subgraph::SetExternalContext(struct TfLiteContext* context,
346 TfLiteExternalContextType type,
347 TfLiteExternalContext* ctx) {
348 return static_cast<Subgraph*>(context->impl_)->SetExternalContext(type, ctx);
349 }
350
351 // Gets an TfLiteIntArray* representing the execution plan. The interpreter owns
352 // this memory and it is only guaranteed to exist during the invocation of the
353 // delegate prepare.
GetExecutionPlan(TfLiteIntArray ** execution_plan)354 TfLiteStatus Subgraph::GetExecutionPlan(TfLiteIntArray** execution_plan) {
355 // TODO(aselle): Do not make a copy here
356 plan_cache_.reset(TfLiteIntArrayCreate(execution_plan_.size()));
357 *execution_plan = plan_cache_.get();
358 static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]),
359 "TfLiteIntArray and execution_plan do not contain same type.");
360 std::memcpy(plan_cache_->data, execution_plan_.data(),
361 sizeof(plan_cache_->data[0]) * execution_plan_.size());
362 return kTfLiteOk;
363 }
364
365 // WARNING: This is an experimental interface that is subject to change.
366 // Entry point for C node plugin API to get the execution plan
GetExecutionPlan(struct TfLiteContext * context,TfLiteIntArray ** execution_plan)367 TfLiteStatus Subgraph::GetExecutionPlan(struct TfLiteContext* context,
368 TfLiteIntArray** execution_plan) {
369 return static_cast<Subgraph*>(context->impl_)
370 ->GetExecutionPlan(execution_plan);
371 }
372
SetInputs(std::vector<int> inputs)373 TfLiteStatus Subgraph::SetInputs(std::vector<int> inputs) {
374 TF_LITE_ENSURE_OK(&context_,
375 CheckTensorIndices("inputs", inputs.data(), inputs.size()));
376 inputs_ = std::move(inputs);
377 return kTfLiteOk;
378 }
379
SetOutputs(std::vector<int> outputs)380 TfLiteStatus Subgraph::SetOutputs(std::vector<int> outputs) {
381 TF_LITE_ENSURE_OK(
382 &context_, CheckTensorIndices("outputs", outputs.data(), outputs.size()));
383 outputs_ = std::move(outputs);
384 return kTfLiteOk;
385 }
386
SetVariables(std::vector<int> variables)387 TfLiteStatus Subgraph::SetVariables(std::vector<int> variables) {
388 TF_LITE_ENSURE_OK(&context_, CheckTensorIndices("variables", variables.data(),
389 variables.size()));
390 variables_ = std::move(variables);
391 return kTfLiteOk;
392 }
393
SetCancellationFunction(void * data,bool (* check_cancelled_func)(void *))394 void Subgraph::SetCancellationFunction(void* data,
395 bool (*check_cancelled_func)(void*)) {
396 cancellation_data_ = data;
397 check_cancelled_func_ = check_cancelled_func;
398 }
399
ReserveNodes(int count)400 void Subgraph::ReserveNodes(int count) {
401 nodes_and_registration_.reserve(count);
402 }
403
CheckTensorIndices(const char * label,const int * indices,int length)404 TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
405 int length) {
406 // Making sure kOptionalTensor is not re-defined to something other than -1.
407 static_assert(kOptionalTensor == -1, "kOptionalTensor should be defined -1");
408
409 for (int i = 0; i < length; i++) {
410 int index = indices[i];
411 // Continue if index == kOptionalTensor before additional comparisons below,
412 // size_t(-1) is always >= context_tensors_size.
413 if (index == kOptionalTensor) {
414 continue;
415 }
416 if (index < 0 || static_cast<size_t>(index) >= context_->tensors_size) {
417 ReportError(
418 "Invalid tensor index %d in %s. The subgraph has %d tensors\n", index,
419 label, context_->tensors_size);
420 consistent_ = false;
421 return kTfLiteError;
422 }
423 }
424 return kTfLiteOk;
425 }
426
BytesRequired(TfLiteType type,const int * dims,size_t dims_size,size_t * bytes)427 TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
428 size_t dims_size, size_t* bytes) {
429 // TODO(aselle): Check for overflow here using overflow.h in TensorFlow
430 // MultiplyWithoutOverflow.
431 TF_LITE_ENSURE(context_, bytes != nullptr);
432 size_t count = 1;
433 for (int k = 0; k < dims_size; k++) count *= dims[k];
434 switch (type) {
435 case kTfLiteFloat32:
436 *bytes = sizeof(float) * count;
437 break;
438 case kTfLiteInt16:
439 *bytes = sizeof(int16_t) * count;
440 break;
441 case kTfLiteInt32:
442 *bytes = sizeof(int32_t) * count;
443 break;
444 case kTfLiteUInt8:
445 *bytes = sizeof(uint8_t) * count;
446 break;
447 case kTfLiteInt64:
448 *bytes = sizeof(int64_t) * count;
449 break;
450 case kTfLiteBool:
451 *bytes = sizeof(bool) * count;
452 break;
453 case kTfLiteComplex64:
454 *bytes = sizeof(std::complex<float>) * count;
455 break;
456 case kTfLiteInt8:
457 *bytes = sizeof(int8_t) * count;
458 break;
459 default:
460 ReportError(
461 "Only float32, int8, int16, int32, int64, uint8, bool, complex64 "
462 "supported currently.");
463 return kTfLiteError;
464 }
465 return kTfLiteOk;
466 }
467
AllocateTensors()468 TfLiteStatus Subgraph::AllocateTensors() {
469 if (!consistent_) {
470 ReportError("AllocateTensors() called on inconsistent model.");
471 return kTfLiteError;
472 }
473
474 // Explicit (re)allocation is necessary if nodes have been changed or tensors
475 // have been resized. For inputs marked as dynamic, we can't short-circuit the
476 // allocation as the client may have done the resize manually.
477 if (state_ != kStateUninvokable &&
478 !HasDynamicTensorImpl(*context_, inputs())) {
479 return kTfLiteOk;
480 }
481
482 next_execution_plan_index_to_prepare_ = 0;
483 if (memory_planner_) {
484 TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
485 }
486
487 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
488
489 state_ = kStateInvokable;
490
491 // Reset the variable tensors to zero after (re)allocating the tensors.
492 // Developers shouldn't rely on the side effect of this function to reset
493 // variable tesnsors. They should call `ResetVariableTensors` directly
494 // instead.
495 ResetVariableTensors();
496
497 return kTfLiteOk;
498 }
499
500 // TODO(ycling): Support non-zero default values.
ResetVariableTensors()501 TfLiteStatus Subgraph::ResetVariableTensors() {
502 for (auto& tensor : tensors_) {
503 if (!tensor.is_variable) {
504 continue;
505 }
506
507 // Variable tensors have to be `kTfLiteArenaRwPersistent`, and must be
508 // allocated after the initial `PrepareOpsAndTensors()` is called.
509 TF_LITE_ENSURE_EQ(context_, tensor.allocation_type,
510 kTfLiteArenaRwPersistent);
511 TF_LITE_ENSURE(context_, tensor.data.raw != nullptr);
512
513 memset(tensor.data.raw, 0, tensor.bytes);
514 }
515 return kTfLiteOk;
516 }
517
AddNodeWithParameters(const std::vector<int> & inputs,const std::vector<int> & outputs,const char * init_data,size_t init_data_size,void * builtin_data,const TfLiteRegistration * registration,int * node_index)518 TfLiteStatus Subgraph::AddNodeWithParameters(
519 const std::vector<int>& inputs, const std::vector<int>& outputs,
520 const char* init_data, size_t init_data_size, void* builtin_data,
521 const TfLiteRegistration* registration, int* node_index) {
522 if (state_ == kStateInvokableAndImmutable) {
523 ReportError("AddNodeWithParameters is disallowed when graph is immutable.");
524 return kTfLiteError;
525 }
526 state_ = kStateUninvokable;
527
528 std::unique_ptr<void, decltype(free)*> builtin_data_deleter(builtin_data,
529 free);
530
531 TF_LITE_ENSURE_OK(context_, CheckTensorIndices("node inputs", inputs.data(),
532 inputs.size()));
533 TF_LITE_ENSURE_OK(
534 &context_,
535 CheckTensorIndices("node outputs", outputs.data(), outputs.size()));
536
537 int new_node_index = nodes_and_registration_.size();
538 if (node_index) *node_index = new_node_index;
539 nodes_and_registration_.resize(nodes_and_registration_.size() + 1);
540 auto& node_and_reg = nodes_and_registration_.back();
541 TfLiteNode& node = node_and_reg.first;
542 if (node.inputs) TfLiteIntArrayFree(node.inputs);
543 if (node.outputs) TfLiteIntArrayFree(node.outputs);
544 if (node.temporaries) TfLiteIntArrayFree(node.temporaries);
545
546 // NOTE, here we are not using move semantics yet, since our internal
547 // representation isn't std::vector, but in the future we would like to avoid
548 // copies, so we want the interface to take r-value references now.
549 node.inputs = ConvertVectorToTfLiteIntArray(inputs);
550 node.outputs = ConvertVectorToTfLiteIntArray(outputs);
551 node.temporaries = TfLiteIntArrayCreate(0);
552 if (init_data) {
553 node.user_data = OpInit(*registration, init_data, init_data_size);
554 } else {
555 node.user_data =
556 OpInit(*registration,
557 reinterpret_cast<const char*>(builtin_data_deleter.get()), 0);
558 }
559
560 node.builtin_data = builtin_data_deleter.release();
561 // TODO(ycling): Filling `custom_initial_data` and `custom_initial_data_size`
562 // properly for nodes generated by ReplaceNodeSubsetsWithDelegateKernels.
563
564 if (registration->builtin_code == BuiltinOperator_CUSTOM) {
565 // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer
566 // `Operator` table is passed in.
567 node.custom_initial_data = init_data;
568 node.custom_initial_data_size = init_data_size;
569 } else {
570 node.custom_initial_data = nullptr;
571 node.custom_initial_data_size = 0;
572 }
573
574 node.delegate = nullptr;
575 node_and_reg.second = *registration;
576 execution_plan_.push_back(new_node_index);
577 return kTfLiteOk;
578 }
579
ResizeInputTensor(int tensor_index,const std::vector<int> & dims)580 TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index,
581 const std::vector<int>& dims) {
582 if (state_ == kStateInvokableAndImmutable) {
583 ReportError("ResizeInputTensor is disallowed when graph is immutable.");
584 return kTfLiteError;
585 }
586
587 // TODO(aselle): All bounds checks can be implemented as one-sided bounds
588 // checks by casting to unsigned for efficiency. Profile before doing this.
589 TF_LITE_ENSURE(context_,
590 tensor_index < context_->tensors_size && tensor_index >= 0);
591 TfLiteTensor* tensor = &context_->tensors[tensor_index];
592
593 // Short-circuit the state change if the dimensions don't change, avoiding
594 // unnecessary (re)allocations.
595 //
596 // Note that it's required to check `tensor->data.raw != nullptr`. Otherwise
597 // the subgraph won't allocate memory for a dynamic tensor when its size
598 // is equal to the original tensor size.
599 if (tensor->data.raw != nullptr &&
600 EqualArrayAndTfLiteIntArray(tensor->dims, dims.size(), dims.data())) {
601 return kTfLiteOk;
602 }
603
604 state_ = kStateUninvokable;
605 return ResizeTensorImpl(tensor, ConvertVectorToTfLiteIntArray(dims));
606 }
607
PrepareOpsStartingAt(int first_execution_plan_index,int * last_execution_plan_index_prepared)608 TfLiteStatus Subgraph::PrepareOpsStartingAt(
609 int first_execution_plan_index, int* last_execution_plan_index_prepared) {
610 if (first_execution_plan_index == 0) {
611 has_dynamic_tensors_ = false;
612 }
613 for (int execution_plan_index = first_execution_plan_index;
614 execution_plan_index < execution_plan_.size(); execution_plan_index++) {
615 int node_index = execution_plan_[execution_plan_index];
616 TfLiteNode& node = nodes_and_registration_[node_index].first;
617 const TfLiteRegistration& registration =
618 nodes_and_registration_[node_index].second;
619 EnsureTensorsVectorCapacity();
620 if (OpPrepare(registration, &node) == kTfLiteError) {
621 return ReportOpError(context_, node, registration, node_index,
622 "failed to prepare");
623 }
624
625 *last_execution_plan_index_prepared = execution_plan_index;
626
627 // Discontinue if the node has dynamic outputs. Note that we don't
628 // stop for dynamic temporary tensors since they won't affect the
629 // sizes of other tensors in the graph.
630 if (HasDynamicTensor(*context_, node.outputs)) {
631 has_dynamic_tensors_ = true;
632 return kTfLiteOk;
633 }
634 }
635 return kTfLiteOk;
636 }
637
PrepareOpsAndTensors()638 TfLiteStatus Subgraph::PrepareOpsAndTensors() {
639 if (!memory_planner_) {
640 memory_planner_.reset(new ArenaPlanner(
641 context_, std::unique_ptr<GraphInfo>(new InterpreterInfo(this)),
642 /*preserve_inputs=*/true, /*preserve_intermediates*/ false));
643 memory_planner_->PlanAllocations();
644 }
645
646 int last_exec_plan_index_prepared = 0;
647
648 TF_LITE_ENSURE_STATUS(PrepareOpsStartingAt(
649 next_execution_plan_index_to_prepare_, &last_exec_plan_index_prepared));
650 TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
651 next_execution_plan_index_to_prepare_, last_exec_plan_index_prepared));
652
653 next_execution_plan_index_to_prepare_ = last_exec_plan_index_prepared + 1;
654 return kTfLiteOk;
655 }
656
Invoke()657 TfLiteStatus Subgraph::Invoke() {
658 if (!consistent_) {
659 ReportError("Invoke called on model that is not consistent.");
660 return kTfLiteError;
661 }
662
663 TfLiteStatus status = kTfLiteOk;
664 if (state_ == kStateUninvokable) {
665 ReportError("Invoke called on model that is not ready.");
666 return kTfLiteError;
667 }
668
669 if (nnapi_delegate_) {
670 if (next_execution_plan_index_to_prepare_ == execution_plan_.size()) {
671 TF_LITE_ENSURE_OK(context_, nnapi_delegate_->Invoke(this));
672 return kTfLiteOk;
673 } else {
674 // TODO(aselle): In the future, we would like this to be an
675 // automatic tflite CPU fallback.
676 ReportError(
677 "NNAPI was requested, but dependent sized tensors "
678 "being used.\n");
679 return kTfLiteError;
680 }
681 }
682
683 // Invocations are always done in node order.
684 // Note that calling Invoke repeatedly will cause the original memory plan to
685 // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
686 // called.
687 for (int execution_plan_index = 0;
688 execution_plan_index < execution_plan_.size(); execution_plan_index++) {
689 if (execution_plan_index == next_execution_plan_index_to_prepare_) {
690 TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
691 TF_LITE_ENSURE(context_, next_execution_plan_index_to_prepare_ >=
692 execution_plan_index);
693 }
694 int node_index = execution_plan_[execution_plan_index];
695 TfLiteNode& node = nodes_and_registration_[node_index].first;
696 const TfLiteRegistration& registration =
697 nodes_and_registration_[node_index].second;
698 SCOPED_OPERATOR_PROFILE(profiler_, node_index);
699
700 // TODO(ycling): This is an extra loop through inputs to check if the data
701 // need to be copied from Delegate buffer to raw memory, which is often not
702 // needed. We may want to cache this in prepare to know if this needs to be
703 // done for a node or not.
704 for (int i = 0; i < node.inputs->size; ++i) {
705 int tensor_index = node.inputs->data[i];
706 if (tensor_index == kOptionalTensor) {
707 continue;
708 }
709 TfLiteTensor* tensor = &tensors_[tensor_index];
710 if (tensor->delegate && tensor->delegate != node.delegate &&
711 tensor->data_is_stale) {
712 TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
713 }
714 }
715
716 if (check_cancelled_func_ != nullptr &&
717 check_cancelled_func_(cancellation_data_)) {
718 ReportError("Client requested cancel during Invoke()");
719 return kTfLiteError;
720 }
721
722 EnsureTensorsVectorCapacity();
723 tensor_resized_since_op_invoke_ = false;
724 if (OpInvoke(registration, &node) == kTfLiteError) {
725 return ReportOpError(context_, node, registration, node_index,
726 "failed to invoke");
727 }
728
729 // Force execution prep for downstream ops if the latest op triggered the
730 // resize of a dynamic tensor.
731 if (tensor_resized_since_op_invoke_ &&
732 HasDynamicTensor(*context_, node.outputs)) {
733 next_execution_plan_index_to_prepare_ = execution_plan_index + 1;
734 }
735 }
736
737 return status;
738 }
739
ResizeTensor(TfLiteContext * context,TfLiteTensor * tensor,TfLiteIntArray * new_size)740 TfLiteStatus Subgraph::ResizeTensor(TfLiteContext* context,
741 TfLiteTensor* tensor,
742 TfLiteIntArray* new_size) {
743 // Note here that context->impl_ is recovering the this pointer for an
744 // instance of Interpreter to call into the member function ResizeTensorImpl
745 // (this function is static).
746 return static_cast<Subgraph*>(context->impl_)
747 ->ResizeTensorImpl(tensor, new_size);
748 }
749
ReportErrorImpl(const char * format,va_list args)750 void Subgraph::ReportErrorImpl(const char* format, va_list args) {
751 error_reporter_->Report(format, args);
752 }
753
ReportErrorC(TfLiteContext * context,const char * format,...)754 void Subgraph::ReportErrorC(TfLiteContext* context, const char* format, ...) {
755 va_list args;
756 va_start(args, format);
757 auto* f = static_cast<Subgraph*>(context->impl_);
758 // Note here that context->impl_ is recovering the this pointer for an
759 // instance of Subgraph to call into the member function ReportErrorImpl
760 // (this function is static).
761 f->ReportErrorImpl(format, args);
762 va_end(args);
763 }
764
765 // Entry point for C node plugin API to report an error.
ReportError(const char * format,...)766 void Subgraph::ReportError(const char* format, ...) {
767 va_list args;
768 va_start(args, format);
769 auto* f = static_cast<Subgraph*>(context_->impl_);
770 // Note here that context->impl_ is recovering the this pointer for an
771 // instance of Subgraph to call into the member function ReportErrorImpl
772 // (this function is static).
773 f->ReportErrorImpl(format, args);
774 va_end(args);
775 }
776
AddTensors(int tensors_to_add,int * first_new_tensor_index)777 TfLiteStatus Subgraph::AddTensors(int tensors_to_add,
778 int* first_new_tensor_index) {
779 const size_t base_index = tensors_.size();
780 if (first_new_tensor_index) *first_new_tensor_index = base_index;
781 tensors_.resize(tensors_.size() + tensors_to_add);
782 for (size_t i = base_index; i < tensors_.size(); i++) {
783 memset(&tensors_[i], 0, sizeof(tensors_[i]));
784 tensors_[i].buffer_handle = kTfLiteNullBufferHandle;
785 }
786 context_->tensors = tensors_.data();
787 context_->tensors_size = tensors_.size();
788 return kTfLiteOk;
789 }
790
AddTensors(TfLiteContext * context,int tensors_to_add,int * first_new_tensor_index)791 TfLiteStatus Subgraph::AddTensors(TfLiteContext* context, int tensors_to_add,
792 int* first_new_tensor_index) {
793 // Note here that context->impl_ is recovering the this pointer for an
794 // instance of Interpreter to call into the member function AddTensors
795 // (this function is static).
796 return static_cast<Subgraph*>(context->impl_)
797 ->AddTensors(tensors_to_add, first_new_tensor_index);
798 }
799
GetNodeAndRegistration(int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)800 TfLiteStatus Subgraph::GetNodeAndRegistration(
801 int node_index, TfLiteNode** node, TfLiteRegistration** registration) {
802 TF_LITE_ENSURE(context_, node_index >= 0);
803 auto nodes_size = nodes_and_registration_.size();
804 TF_LITE_ENSURE(context_, static_cast<size_t>(node_index) < nodes_size);
805 TF_LITE_ENSURE(context_, node != nullptr && registration != nullptr);
806 auto& node_and_reg = nodes_and_registration_[node_index];
807 *node = &node_and_reg.first;
808 *registration = &node_and_reg.second;
809 return kTfLiteOk;
810 }
811
GetNodeAndRegistration(struct TfLiteContext * context,int node_index,TfLiteNode ** node,TfLiteRegistration ** registration)812 TfLiteStatus Subgraph::GetNodeAndRegistration(
813 struct TfLiteContext* context, int node_index, TfLiteNode** node,
814 TfLiteRegistration** registration) {
815 return static_cast<Subgraph*>(context->impl_)
816 ->GetNodeAndRegistration(node_index, node, registration);
817 }
818
SetTensorParametersReadOnly(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,const char * buffer,size_t bytes,const Allocation * allocation)819 TfLiteStatus Subgraph::SetTensorParametersReadOnly(
820 int tensor_index, TfLiteType type, const char* name, const size_t rank,
821 const int* dims, TfLiteQuantization quantization, const char* buffer,
822 size_t bytes, const Allocation* allocation) {
823 if (state_ == kStateInvokableAndImmutable) {
824 ReportError(
825 "SetTensorParametersReadOnly is disallowed when graph is immutable.");
826 return kTfLiteError;
827 }
828
829 TF_LITE_ENSURE(context_,
830 tensor_index < context_->tensors_size && tensor_index >= 0);
831 // For most tensors we know exactly how much memory is necessary so we can
832 // ensure the buffer is large enough. However, we need to skip string tensors
833 // because their sizes change with the contents of the individual strings.
834 if (type != kTfLiteString) {
835 size_t required_bytes;
836 TF_LITE_ENSURE_OK(context_,
837 BytesRequired(type, dims, rank, &required_bytes));
838 TF_LITE_ENSURE_EQ(context_, required_bytes, bytes);
839 }
840
841 TfLiteTensor& tensor = context_->tensors[tensor_index];
842 if (type == tensor.type &&
843 EqualArrayAndTfLiteIntArray(tensor.dims, rank, dims)) {
844 // Fast path which does not invalidate the invokable property.
845 TfLiteTensorDataFree(&tensor);
846 TfLiteQuantizationFree(&tensor.quantization);
847 tensor.data.raw = const_cast<char*>(buffer);
848 if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(rank, dims);
849 tensor.params = GetLegacyQuantization(quantization);
850 tensor.quantization = quantization;
851 tensor.allocation_type = kTfLiteMmapRo;
852 tensor.allocation = allocation;
853 } else {
854 state_ = kStateUninvokable;
855 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
856 GetLegacyQuantization(quantization),
857 const_cast<char*>(buffer), bytes, kTfLiteMmapRo,
858 allocation, false, &tensor);
859 // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
860 // if there are other required callers.
861 tensor.quantization = quantization;
862 }
863 return kTfLiteOk;
864 }
865
866 // Set description of inputs/outputs/data/fptrs for node `node_index`.
867 // This variant assumes an external buffer has been allocated of size
868 // bytes. The lifetime of buffer must be ensured to be greater or equal
869 // to Interpreter.
SetTensorParametersReadWrite(int tensor_index,TfLiteType type,const char * name,const size_t rank,const int * dims,TfLiteQuantization quantization,bool is_variable)870 TfLiteStatus Subgraph::SetTensorParametersReadWrite(
871 int tensor_index, TfLiteType type, const char* name, const size_t rank,
872 const int* dims, TfLiteQuantization quantization, bool is_variable) {
873 if (state_ == kStateInvokableAndImmutable) {
874 ReportError(
875 "SetTensorParametersReadWrite is disallowed when graph is immutable.");
876 return kTfLiteError;
877 }
878 TF_LITE_ENSURE(context_,
879 tensor_index < context_->tensors_size && tensor_index >= 0);
880 size_t required_bytes = 0;
881 if (type != kTfLiteString) {
882 // These types will be allocated in our arena so we need to record how
883 // many bytes we will need based on the dimensions. String tensors are
884 // allocated dynamically and we can't know ahead of time how much space
885 // they will require.
886 TF_LITE_ENSURE_OK(context_,
887 BytesRequired(type, dims, rank, &required_bytes));
888 }
889
890 TfLiteAllocationType allocation_type = kTfLiteArenaRw;
891 if (type == kTfLiteString) {
892 if (is_variable) {
893 // We don't have a real use case for string variable tensor.
894 ReportError("String variable tensor isn't supported.");
895 return kTfLiteError;
896 }
897 allocation_type = kTfLiteDynamic;
898 } else if (is_variable) {
899 allocation_type = kTfLiteArenaRwPersistent;
900 }
901
902 TfLiteTensor& tensor = context_->tensors[tensor_index];
903 TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims),
904 GetLegacyQuantization(quantization),
905 /*buffer=*/nullptr, required_bytes, allocation_type,
906 nullptr, is_variable, &tensor);
907 // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
908 // if there are other required callers.
909 tensor.quantization = quantization;
910 return kTfLiteOk;
911 }
912
SetExecutionPlan(const std::vector<int> & new_plan)913 TfLiteStatus Subgraph::SetExecutionPlan(const std::vector<int>& new_plan) {
914 for (int node_index : new_plan) {
915 TF_LITE_ENSURE(context_, node_index >= 0 &&
916 node_index < nodes_and_registration_.size());
917 }
918 execution_plan_ = new_plan;
919 return kTfLiteOk;
920 }
921
ResizeTensorImpl(TfLiteTensor * tensor,TfLiteIntArray * new_size)922 TfLiteStatus Subgraph::ResizeTensorImpl(TfLiteTensor* tensor,
923 TfLiteIntArray* new_size) {
924 // Note that in theory we could resize kTfLiteArenaRwPersistent tensors too.
925 if (tensor->allocation_type == kTfLiteArenaRw ||
926 tensor->allocation_type == kTfLiteDynamic ||
927 tensor->allocation_type == kTfLiteArenaRwPersistent) {
928 tensor_resized_since_op_invoke_ |=
929 TfLiteIntArrayEqual(tensor->dims, new_size) == 0;
930 if (tensor->type != kTfLiteString) {
931 size_t bytesRequired;
932 TfLiteStatus status = BytesRequired(tensor->type, new_size->data,
933 new_size->size, &bytesRequired);
934 if (status != kTfLiteOk) {
935 TfLiteIntArrayFree(new_size);
936 return kTfLiteError;
937 }
938
939 // Realloc space for kTfLiteDynamic tensors.
940 TfLiteTensorRealloc(bytesRequired, tensor);
941 tensor->bytes = bytesRequired;
942 }
943 if (tensor->dims) TfLiteIntArrayFree(tensor->dims);
944 tensor->dims = new_size;
945
946 if (tensor->allocation_type != kTfLiteDynamic) {
947 tensor->data.raw = nullptr;
948 }
949 } else {
950 // kTfLiteMmapRo tensors are stored in the flatbuffer and are therefore
951 // of fixed size.
952 TfLiteIntArrayFree(new_size);
953 ReportError("Attempting to resize a fixed-size tensor.");
954 return kTfLiteError;
955 }
956 return kTfLiteOk;
957 }
958
UseNNAPI(bool enable)959 void Subgraph::UseNNAPI(bool enable) {
960 // TODO(aselle): This is a workaround for finding if NNAPI exists.
961 // We also need to make sure getLibraryHandle() is renamed to be NNAPI
962 // prefixed.
963 if (!NNAPIDelegate::IsSupported()) enable = false;
964 if (!enable) {
965 nnapi_delegate_.reset();
966 } else if (!nnapi_delegate_) {
967 nnapi_delegate_.reset(new NNAPIDelegate);
968 }
969 }
970
SwitchToDelegateContext()971 void Subgraph::SwitchToDelegateContext() {
972 context_->GetNodeAndRegistration = GetNodeAndRegistration;
973 context_->ReplaceNodeSubsetsWithDelegateKernels =
974 ReplaceNodeSubsetsWithDelegateKernels;
975 context_->GetExecutionPlan = GetExecutionPlan;
976 }
977
SwitchToKernelContext()978 void Subgraph::SwitchToKernelContext() {
979 context_->GetNodeAndRegistration = [](struct TfLiteContext* context,
980 int node_index, TfLiteNode** node,
981 TfLiteRegistration** registration) {
982 return ForbiddenContextFunction(context);
983 };
984 context_->ReplaceNodeSubsetsWithDelegateKernels =
985 [](TfLiteContext* context, TfLiteRegistration registration,
986 const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) {
987 return ForbiddenContextFunction(context);
988 };
989 context_->GetExecutionPlan = [](struct TfLiteContext* context,
990 TfLiteIntArray**) {
991 return ForbiddenContextFunction(context);
992 };
993 }
994
ModifyGraphWithDelegate(TfLiteDelegate * delegate)995 TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
996 if (state_ == kStateInvokableAndImmutable) {
997 ReportError(
998 "ModifyGraphWithDelegate is disallowed when graph is immutable.");
999 return kTfLiteError;
1000 }
1001
1002 if (!(delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors)) {
1003 int last_execution_plan_index_prepared;
1004 TF_LITE_ENSURE_OK(&context_, PrepareOpsStartingAt(
1005 0, &last_execution_plan_index_prepared));
1006 if (has_dynamic_tensors_) {
1007 ReportError(
1008 "Attempting to use a delegate that only supports static-sized "
1009 "tensors with a graph that has dynamic-sized tensors.");
1010 return kTfLiteError;
1011 }
1012 }
1013
1014 const bool was_invokable_before_delegate = state_ == kStateInvokable;
1015
1016 // TODO(aselle): Consider if it is worth storing pointers to delegates.
1017 // Setup additional context interface.
1018 SwitchToDelegateContext();
1019
1020 TfLiteStatus status = delegate->Prepare(context_, delegate);
1021
1022 // Remove additional context info.
1023 SwitchToKernelContext();
1024
1025 TF_LITE_ENSURE_OK(context_, status);
1026
1027 // If the memory planner has already been created, we need to execute
1028 // planning again to account for the updated graph topology.
1029 if (memory_planner_) {
1030 state_ = kStateUninvokable;
1031 TF_LITE_ENSURE_OK(context_, memory_planner_->PlanAllocations());
1032 }
1033
1034 if (!(delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors)) {
1035 // Reset the state to force tensor/op reallocation.
1036 state_ = kStateUninvokable;
1037 TF_LITE_ENSURE_OK(context_, AllocateTensors());
1038 TF_LITE_ENSURE_EQ(context_, state_, kStateInvokable);
1039 // After using a delegate which doesn't support dynamic tensors, make the
1040 // entire graph immutable.
1041 state_ = kStateInvokableAndImmutable;
1042 } else if (was_invokable_before_delegate) {
1043 // If the graph was invokable prior to delegate application, flush
1044 // allocation now to leave it in a consistent state.
1045 TF_LITE_ENSURE_OK(context_, AllocateTensors());
1046 TF_LITE_ENSURE_EQ(context_, state_, kStateInvokable);
1047 }
1048
1049 return status;
1050 }
1051
1052 } // namespace tflite
1053