1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/arena_planner.h"
16
17 #include <stddef.h>
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <utility>
24 #include <vector>
25
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/graph_info.h"
28 #include "tensorflow/lite/simple_memory_arena.h"
29
30 namespace tflite {
31 namespace {
32
33 constexpr int32_t kNodeNotAssigned = std::numeric_limits<int32_t>::max();
34
35 } // namespace
36
ArenaPlanner(TfLiteContext * context,std::unique_ptr<GraphInfo> graph_info,bool preserve_inputs,bool preserve_intermediates,int tensor_alignment)37 ArenaPlanner::ArenaPlanner(TfLiteContext* context,
38 std::unique_ptr<GraphInfo> graph_info,
39 bool preserve_inputs, bool preserve_intermediates,
40 int tensor_alignment)
41 : context_(context),
42 graph_info_(std::move(graph_info)),
43 arena_(kDefaultArenaAlignment),
44 persistent_arena_(kDefaultArenaAlignment),
45 preserve_inputs_(preserve_inputs),
46 preserve_intermediates_(preserve_intermediates),
47 tensor_alignment_(tensor_alignment) {}
48
~ArenaPlanner()49 ArenaPlanner::~ArenaPlanner() {}
50
BasePointer(TfLiteAllocationType type)51 std::intptr_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
52 if (type == kTfLiteArenaRwPersistent) {
53 return persistent_arena_.BasePointer();
54 }
55 if (type == kTfLiteArenaRw) {
56 return arena_.BasePointer();
57 }
58 return 0;
59 }
60
ResetAllocations()61 TfLiteStatus ArenaPlanner::ResetAllocations() {
62 TF_LITE_ENSURE_STATUS(arena_.ClearPlan());
63 TF_LITE_ENSURE_STATUS(persistent_arena_.ClearPlan());
64 allocs_.clear();
65 allocs_.resize(graph_info_->num_tensors());
66 return kTfLiteOk;
67 }
68
ResetAllocationsAfter(int node)69 TfLiteStatus ArenaPlanner::ResetAllocationsAfter(int node) {
70 for (int i = 0; i < static_cast<int>(allocs_.size()); ++i) {
71 if (allocs_[i].first_node > node && allocs_[i].size > 0) {
72 TfLiteTensor& tensor = *graph_info_->tensor(i);
73 if (tensor.allocation_type == kTfLiteArenaRw) {
74 TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[i]));
75 allocs_[i].reset();
76 tensor.data.raw = nullptr;
77 }
78 }
79 }
80
81 return kTfLiteOk;
82 }
83
PlanAllocations()84 TfLiteStatus ArenaPlanner::PlanAllocations() {
85 // Invalidate any existing data.
86 TF_LITE_ENSURE_STATUS(ResetAllocations());
87 // Maybe other verb instead of 'Assigned'
88 alloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
89 dealloc_node_.assign(graph_info_->num_tensors(), kNodeNotAssigned);
90
91 // Keeps track of references to each tensor.
92 std::vector<int> refcounts(graph_info_->num_tensors(), 0);
93
94 auto allocate = [this](int node, int tensor) -> TfLiteStatus {
95 if (alloc_node_[tensor] != kNodeNotAssigned) {
96 // Tensor has already been allocated.
97 return kTfLiteOk;
98 }
99 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
100 alloc_node_[tensor] = node;
101 return kTfLiteOk;
102 };
103
104 auto deallocate = [this](int node, int tensor) -> TfLiteStatus {
105 if (alloc_node_[tensor] == kNodeNotAssigned) {
106 // We don't need to deallocate the tensor, that is never allocated.
107 // This happened with the constant tensors.
108 return kTfLiteOk;
109 }
110 TF_LITE_ENSURE(context_, dealloc_node_[tensor] == kNodeNotAssigned);
111 dealloc_node_[tensor] = node;
112 return kTfLiteOk;
113 };
114
115 // We must make sure the output tensors are never overwritten. We do that by
116 // artificially adding one to their ref-counts so they are never selected
117 // for deallocation.
118 for (int tensor_index : graph_info_->outputs()) {
119 refcounts[tensor_index]++;
120 }
121
122 // Variable tensors also should be ensured to be never overwritten and need to
123 // be alive all the time.
124 for (int tensor_index : graph_info_->variables()) {
125 // Increase the reference count for variable tensors by one, so it will
126 // never be deallocated.
127 refcounts[tensor_index]++;
128 // `variables` is a subgraph-level list and it should never be
129 // kTfLiteOptionalTensor.
130 TF_LITE_ENSURE(context_, tensor_index != kTfLiteOptionalTensor);
131 // Variable tensor should be allocated at the very beginning.
132 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
133 }
134
135 // Queue all graph inputs for allocation. If preserve_inputs_ is true, make
136 // sure they never be overwritten.
137 for (int tensor_index : graph_info_->inputs()) {
138 if (tensor_index != kTfLiteOptionalTensor) {
139 if (preserve_inputs_) {
140 refcounts[tensor_index]++;
141 }
142 TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
143 }
144 }
145
146 // Count references to node input tensors.
147 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
148 const TfLiteNode& node = graph_info_->node(i);
149 TfLiteIntArray* node_inputs = node.inputs;
150 for (int j = 0; j < node_inputs->size; ++j) {
151 int tensor_index = node_inputs->data[j];
152 if (tensor_index != kTfLiteOptionalTensor) {
153 refcounts[tensor_index]++;
154 }
155 }
156 }
157
158 // Go through the graph in execution order.
159 for (size_t i = 0; i < graph_info_->num_execution_nodes(); ++i) {
160 const TfLiteNode& node = graph_info_->node(i);
161
162 // First queue output tensors for allocation.
163 TfLiteIntArray* node_outputs = node.outputs;
164 for (int j = 0; j < node_outputs->size; ++j) {
165 int tensor_index = node_outputs->data[j];
166 TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
167 }
168
169 // Then update the ref-counts of the node's inputs, and if necessary queue
170 // them for deallocation.
171 if (!preserve_intermediates_) {
172 TfLiteIntArray* node_inputs = node.inputs;
173 for (int j = 0; j < node_inputs->size; ++j) {
174 int tensor_index = node_inputs->data[j];
175 if (tensor_index != kTfLiteOptionalTensor) {
176 refcounts[tensor_index]--;
177 if (refcounts[tensor_index] == 0) {
178 TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
179 }
180 }
181 }
182 }
183 }
184
185 // Note that graph outputs will never be scheduled for deallocation. We
186 // could do that here for completeness, but it won't have any effect.
187 return kTfLiteOk;
188 }
189
ExecuteAllocations(int first_node,int last_node)190 TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
191 // Grow the size of `allocs_` if necessary. This allows allocating temporary
192 // tensors in op's `prepare` function.
193 TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
194 alloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
195 dealloc_node_.resize(graph_info_->num_tensors(), kNodeNotAssigned);
196 allocs_.resize(graph_info_->num_tensors());
197 // Set allocation and deallocation for temporary tensors.
198 for (size_t i = first_node; i <= static_cast<size_t>(last_node) &&
199 i < graph_info_->num_execution_nodes();
200 ++i) {
201 const TfLiteNode& node = graph_info_->node(i);
202 TfLiteIntArray* node_temporaries = node.temporaries;
203 for (int j = 0; j < node_temporaries->size; ++j) {
204 int tensor_index = node_temporaries->data[j];
205 alloc_node_[tensor_index] = i;
206 if (!preserve_intermediates_) {
207 dealloc_node_[tensor_index] = i;
208 }
209 }
210 }
211
212 TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
213 TF_LITE_ENSURE_STATUS(Commit());
214
215 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
216 // TODO(ahentz): we could do this only for the tensors that were modified
217 // in CalculateAllocations(), instead of redoing it for tensors that
218 // already had proper pointers. However we must be very careful, because
219 // SimpleMemoryArena::Commit() could move the base pointer.
220 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
221 }
222
223 return kTfLiteOk;
224 }
225
ReleaseNonPersistentMemory()226 TfLiteStatus ArenaPlanner::ReleaseNonPersistentMemory() {
227 // Clear non-persistent arena's buffer.
228 TF_LITE_ENSURE_STATUS(arena_.ReleaseBuffer());
229 // Set data pointers for all non-persistent tensors to nullptr.
230 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
231 TfLiteTensor& tensor = *graph_info_->tensor(i);
232 if (tensor.allocation_type == kTfLiteArenaRw) {
233 tensor.data.raw = nullptr;
234 }
235 }
236 return kTfLiteOk;
237 }
238
AcquireNonPersistentMemory()239 TfLiteStatus ArenaPlanner::AcquireNonPersistentMemory() {
240 // First commit arena_ to allocate underlying buffer.
241 TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
242 // Resolve allocations for all tensors not on the persistent arena.
243 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
244 TfLiteTensor& tensor = *graph_info_->tensor(i);
245 if (tensor.allocation_type == kTfLiteArenaRw) {
246 TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
247 }
248 }
249 return kTfLiteOk;
250 }
251
HasNonPersistentMemory()252 bool ArenaPlanner::HasNonPersistentMemory() {
253 return arena_.GetBufferSize() != 0;
254 }
255
Commit()256 TfLiteStatus ArenaPlanner::Commit() {
257 TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
258 TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
259 return kTfLiteOk;
260 }
261
CreateTensorAllocationVector(int first_node,int last_node)262 std::vector<int32_t> ArenaPlanner::CreateTensorAllocationVector(int first_node,
263 int last_node) {
264 auto tensor_compare = [this](int idx1, int idx2) {
265 // Tensors that have lifespan through the whole model inference time are
266 // allocated at the beginning of memory slice. Their respective order
267 // doesn't matter in fact, so here they are sorted by index.
268 if (this->alloc_node_[idx1] == 0 &&
269 this->dealloc_node_[idx1] == kNodeNotAssigned) {
270 if (this->alloc_node_[idx2] == 0 &&
271 this->dealloc_node_[idx2] == kNodeNotAssigned) {
272 return idx1 < idx2;
273 }
274 return true;
275 }
276 if (this->alloc_node_[idx2] == 0 &&
277 this->dealloc_node_[idx2] == kNodeNotAssigned) {
278 return false;
279 }
280
281 // All other tensors are sorted in non-increasing order of their size.
282 auto size1 = this->graph_info_->tensor(idx1)->bytes;
283 auto size2 = this->graph_info_->tensor(idx2)->bytes;
284 if (size1 != size2) {
285 return size1 > size2;
286 }
287 // Tensors with equal size are sorted in order of their allocation time.
288 return this->alloc_node_[idx1] < this->alloc_node_[idx2];
289 };
290
291 std::vector<int32_t> tensor_order;
292 for (int i = 0; i < static_cast<int>(graph_info_->num_tensors()); ++i) {
293 if (alloc_node_[i] >= first_node && alloc_node_[i] <= last_node) {
294 tensor_order.push_back(i);
295 }
296 }
297 // Indices of tensors in order their allocation offsets will be calculated.
298 std::sort(tensor_order.begin(), tensor_order.end(), tensor_compare);
299
300 return tensor_order;
301 }
302
CalculateAllocations(int first_node,int last_node)303 TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
304 // Indices of tensors in order their allocation offsets will be calculated.
305 const std::vector<int32_t> tensor_order =
306 CreateTensorAllocationVector(first_node, last_node);
307
308 // Deallocate if the tensor was already allocated.
309 for (const auto& tensor_index : tensor_order) {
310 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
311 if (tensor.allocation_type == kTfLiteArenaRw &&
312 allocs_[tensor_index].size != 0) {
313 TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
314 }
315 }
316
317 // Vector of ids of already allocated tensors, ordered by offset.
318 for (const auto& tensor_index : tensor_order) {
319 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
320 if (tensor.allocation_type == kTfLiteArenaRw) {
321 TF_LITE_ENSURE_STATUS(
322 arena_.Allocate(context_, tensor_alignment_, tensor.bytes,
323 tensor_index, alloc_node_[tensor_index],
324 dealloc_node_[tensor_index], &allocs_[tensor_index]));
325 }
326 // Check allocs_[].size to prevent from reallocation of persistent tensors.
327 if (tensor.allocation_type == kTfLiteArenaRwPersistent &&
328 allocs_[tensor_index].size == 0) {
329 TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
330 context_, tensor_alignment_, tensor.bytes, tensor_index,
331 /*first_node=*/alloc_node_[tensor_index],
332 /*last_node=*/std::numeric_limits<int32_t>::max(),
333 &allocs_[tensor_index]));
334 }
335 }
336 return kTfLiteOk;
337 }
338
ResolveTensorAllocation(int tensor_index)339 TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
340 TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
341 if (tensor.allocation_type == kTfLiteArenaRw) {
342 // Skip resolution if the size of the tensor is zero, leaving it as a
343 // nullptr.
344 if (allocs_[tensor_index].size != 0) {
345 TF_LITE_ENSURE_STATUS(arena_.ResolveAlloc(context_, allocs_[tensor_index],
346 &tensor.data.raw));
347 }
348 }
349 if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
350 TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
351 context_, allocs_[tensor_index], &tensor.data.raw));
352 }
353 return kTfLiteOk;
354 }
355
356 } // namespace tflite
357