1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/arena_planner.h"
16
17 #include <stddef.h>
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <utility>
24 #include <vector>
25
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/graph_info.h"
28 #include "tensorflow/lite/simple_memory_arena.h"
29
30 namespace tflite {
31 namespace {
32
33 constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
34
35 } // namespace
36
ArenaPlanner(TfLiteContext * context,std::unique_ptr<GraphInfo> graph_info,bool preserve_all_tensors,int tensor_alignment)37 ArenaPlanner::ArenaPlanner(TfLiteContext* context,
38 std::unique_ptr<GraphInfo> graph_info,
39 bool preserve_all_tensors, int tensor_alignment)
40 : context_(context),
41 graph_info_(std::move(graph_info)),
42 arena_(kDefaultArenaAlignment),
43 persistent_arena_(kDefaultArenaAlignment),
44 preserve_all_tensors_(preserve_all_tensors),
45 tensor_alignment_(tensor_alignment) {}
46
~ArenaPlanner()47 ArenaPlanner::~ArenaPlanner() {}
48
BasePointer(TfLiteAllocationType type)49 std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
50 if (type == kTfLiteArenaRwPersistent) {
51 return persistent_arena_.BasePointer();
52 }
53 if (type == kTfLiteArenaRw) {
54 return arena_.BasePointer();
55 }
56 return 0;
57 }
58
ResetAllocations()59 TfLiteStatus ArenaPlanner::ResetAllocations() {
60 TF_LITE_ENSURE_STATUS(arena_.ClearPlan());
61 TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
62 allocs_.clear();
63 allocs_.resize(graph_info_->num_tensors());
64 return kTfLiteOk;
65 }
66
ResetAllocationsAfter(int node)67 TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
68 for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
69 if (allocs_[i].first_node > node && allocs_[i].size > 0) {
70 TfLiteTensor& tensor = *graph_info_->tensor(i);
71 if (tensor.allocation_type == kTfLiteArenaRw) {
72 TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i]));
73 allocs_[i].reset();
74 tensor.data.raw = nullptr;
75 }
76 }
77 }
78
79 return kTfLiteOk;
80 }
81
PlanAllocations()82 TfLiteStatus ArenaPlanner::PlanAllocations() {
83 // Invalidate any existing data.
84 TF_LITE_ENSURE_STATUS(ResetAllocations());
85 // Maybe other verb instead of 'Assigned'
86 alloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
87 dealloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
88
89 // Keeps track of references to each tensor.
90 std::vector<int> refcounts(graph_info_->num_tensors(), 0);
91
92 auto allocate = [this](int node, int tensor) -> TfLiteStatus {
93 if (alloc_node_[tensor] != kNodeNotAssigned) {
94 // Tensor has already been allocated.
95 return kTfLiteOk;
96 }
97 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
98 alloc_node_[tensor] = node;
99 return kTfLiteOk;
100 };
101
102 auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
103 if (alloc_node_[tensor] == kNodeNotAssigned) {
104 // We don't need to deallocate the tensor, that is never allocated.
105 // This happened with the constant tensors.
106 return kTfLiteOk;
107 }
108 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
109 dealloc_node_[tensor] = node;
110 return kTfLiteOk;
111 };
112
113 // We must make sure the output tensors are never overwritten. We do that by
114 // artificially adding one to their ref-counts so they are never selected
115 // for deallocation.
116 for (int tensor_index : graph_info_->outputs()) {
117 refcounts[tensor_index]++;
118 }
119
120 // Variable tensors also should be ensured to be never overwritten and need to
121 // be alive all the time.
122 for (int tensor_index : graph_info_->variables()) {
123 // Increase the reference count for variable tensors by one, so it will
124 // never be deallocated.
125 refcounts[tensor_index]++;
126 // `variables` is a subgraph-level list and it should never be
127 // kTfLiteOptionalTensor.
128 TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor);
129 // Variable tensor should be allocated at the very beginning.
130 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
131 }
132
133 // Queue all graph inputs for allocation and make sure they are never
134 // overwritten.
135 for (int tensor_index : graph_info_->inputs()) {
136 if (tensor_index != kTfLiteOptionalTensor) {
137 refcounts[tensor_index]++;
138 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
139 }
140 }
141
142 // Count references to node input tensors.
143 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
144 const TfLiteNode& node = graph_info_->node(i);
145 TfLiteIntArray* node_inputs = node.inputs;
146 for (int j = 0; j < node_inputs->size; ++j) {
147 int tensor_index = node_inputs->data[j];
148 if (tensor_index != kTfLiteOptionalTensor) {
149 refcounts[tensor_index]++;
150 }
151 }
152 }
153
154 // Go through the graph in execution order.
155 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
156 const TfLiteNode& node = graph_info_->node(i);
157
158 // First queue output tensors for allocation.
159 TfLiteIntArray* node_outputs = node.outputs;
160 for (int j = 0; j < node_outputs->size; ++j) {
161 int tensor_index = node_outputs->data[j];
162 TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
163 }
164
165 // Then update the ref-counts of the node's inputs, and if necessary queue
166 // them for deallocation.
167 if (!preserve_all_tensors_) {
168 TfLiteIntArray* node_inputs = node.inputs;
169 for (int j = 0; j < node_inputs->size; ++j) {
170 int tensor_index = node_inputs->data[j];
171 if (tensor_index != kTfLiteOptionalTensor) {
172 refcounts[tensor_index]--;
173 if (refcounts[tensor_index] == 0) {
174 TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
175 }
176 }
177 }
178 }
179 }
180
181 // Note that graph outputs will never be scheduled for deallocation. We
182 // could do that here for completeness, but it won't have any effect.
183 return kTfLiteOk;
184 }
185
ExecuteAllocations(int first_node,int last_node)186 TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
187 // Grow the size of `allocs_` if necessary. This allows allocating temporary
188 // tensors in op's `prepare` function.
189 TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
190 alloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
191 dealloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
192 allocs_.resize(graph_info_->num_tensors());
193 // Set allocation and deallocation for temporary tensors.
194 for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
195 i < graph_info_->num_execution_nodes();
196 ++i) {
197 const TfLiteNode& node = graph_info_->node(i);
198 TfLiteIntArray* node_temporaries = node.temporaries;
199 for (int j = 0; j < node_temporaries->size; ++j) {
200 int tensor_index = node_temporaries->data[j];
201 alloc_node_[tensor_index] = i;
202 if (!preserve_all_tensors_) {
203 dealloc_node_[tensor_index] = i;
204 }
205 }
206 }
207
208 TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
209 TF_LITE_ENSURE_STATUS(Commit());
210
211 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
212 // TODO(ahentz): we could do this only for the tensors that were modified
213 // in CalculateAllocations(), instead of redoing it for tensors that
214 // already had proper pointers. However we must be very careful, because
215 // SimpleMemoryArena::Commit() could move the base pointer.
216 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
217 }
218
219 return kTfLiteOk;
220 }
221
ReleaseNonPersistentMemory()222 TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
223 // Clear non-persistent arena's buffer.
224 TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
225 // Set data pointers for all non-persistent tensors to nullptr.
226 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
227 TfLiteTensor& tensor = *graph_info_->tensor(i);
228 if (tensor.allocation_type == kTfLiteArenaRw) {
229 tensor.data.raw = nullptr;
230 }
231 }
232 return kTfLiteOk;
233 }
234
AcquireNonPersistentMemory()235 TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
236 // First commit arena_ to allocate underlying buffer.
237 TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
238 // Resolve allocations for all tensors not on the persistent arena.
239 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
240 TfLiteTensor& tensor = *graph_info_->tensor(i);
241 if (tensor.allocation_type == kTfLiteArenaRw) {
242 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
243 }
244 }
245 return kTfLiteOk;
246 }
247
HasNonPersistentMemory()248 bool ArenaPlanner::HasNonPersistentMemory() {
249 return arena_.GetBufferSize() != 0;
250 }
251
DumpDebugInfo(const std::vector<int> & execution_plan) const252 void ArenaPlanner::DumpDebugInfo(const std::vector<int>& execution_plan) const {
253 arena_.DumpDebugInfo("kTfLiteArenaRw Dump:", execution_plan);
254 persistent_arena_.DumpDebugInfo("kTfLiteArenaRwPersistent Dump:",
255 execution_plan);
256 }
257
Commit()258 TfLiteStatus ArenaPlanner::Commit() {
259 TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
260 TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
261 return kTfLiteOk;
262 }
263
CreateTensorAllocationVector(int first_node,int last_node)264 std::vector<int32_t> ArenaPlanner::CreateTensorAllocationVector(int first_node,
265 int last_node) {
266 auto tensor_compare = [this](int idx1, int idx2) {
267 // Tensors that have lifespan through the whole model inference time are
268 // allocated at the beginning of memory slice. Their respective order
269 // doesn't matter in fact, so here they are sorted by index.
270 if (this->alloc_node_[idx1] == 0 &&
271 this->dealloc_node_[idx1] == kNodeNotAssigned) {
272 if (this->alloc_node_[idx2] == 0 &&
273 this->dealloc_node_[idx2] == kNodeNotAssigned) {
274 return idx1 < idx2;
275 }
276 return true;
277 }
278 if (this->alloc_node_[idx2] == 0 &&
279 this->dealloc_node_[idx2] == kNodeNotAssigned) {
280 return false;
281 }
282
283 // All other tensors are sorted in non-increasing order of their size.
284 auto size1 = this->graph_info_->tensor(idx1)->bytes;
285 auto size2 = this->graph_info_->tensor(idx2)->bytes;
286 if (size1 != size2) {
287 return size1 > size2;
288 }
289 // Tensors with equal size are sorted in order of their allocation time.
290 return this->alloc_node_[idx1] < this->alloc_node_[idx2];
291 };
292
293 std::vector<int32_t> tensor_order;
294 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
295 if (alloc_node_[i] >= first_node && alloc_node_[i] <= last_node) {
296 tensor_order.push_back(i);
297 }
298 }
299 // Indices of tensors in order their allocation offsets will be calculated.
300 std::sort(tensor_order.begin(), tensor_order.end(), tensor_compare);
301
302 return tensor_order;
303 }
304
CalculateAllocations(int first_node,int last_node)305 TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
306 // Indices of tensors in order their allocation offsets will be calculated.
307 const std::vector<int32_t> tensor_order =
308 CreateTensorAllocationVector(first_node, last_node);
309
310 // Deallocate if the tensor was already allocated.
311 for (const auto& tensor_index : tensor_order) {
312 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
313 if (tensor.allocation_type == kTfLiteArenaRw &&
314 allocs_[tensor_index].size != 0) {
315 TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
316 }
317 }
318
319 // Vector of ids of already allocated tensors, ordered by offset.
320 for (const auto& tensor_index : tensor_order) {
321 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
322 if (tensor.allocation_type == kTfLiteArenaRw) {
323 TF_LITE_ENSURE_STATUS(
324 arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
325 tensor_index, alloc_node_[tensor_index],
326 dealloc_node_[tensor_index], &allocs_[tensor_index]));
327 }
328 // Check allocs_[].size to prevent from reallocation of persistent tensors.
329 if (tensor.allocation_type == kTfLiteArenaRwPersistent &&
330 allocs_[tensor_index].size == 0) {
331 TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
332 context_, tensor_alignment_, tensor.bytes, tensor_index,
333 /*first_node=*/alloc_node_[tensor_index],
334 /*last_node=*/std::numeric_limits<int32_t>::max(),
335 &allocs_[tensor_index]));
336 }
337 }
338 return kTfLiteOk;
339 }
340
ResolveTensorAllocation(int tensor_index)341 TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
342 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
343 if (tensor.allocation_type == kTfLiteArenaRw) {
344 // Skip resolution if the size of the tensor is zero, leaving it as a
345 // nullptr.
346 if (allocs_[tensor_index].size != 0) {
347 TF_LITE_ENSURE_STATUS(arena_.ResolveAlloc(context_, allocs_[tensor_index],
348 &tensor.data.raw));
349 }
350 }
351 if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
352 TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
353 context_, allocs_[tensor_index], &tensor.data.raw));
354 }
355 return kTfLiteOk;
356 }
357
358 } // namespace tflite
359