1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/micro/micro_allocator.h"
17
18 #include <cstddef>
19
20 #include "tensorflow/lite/c/common.h"
21 #include "tensorflow/lite/core/api/error_reporter.h"
22 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
23 #include "tensorflow/lite/core/api/op_resolver.h"
24 #include "tensorflow/lite/core/api/tensor_utils.h"
25 #include "tensorflow/lite/micro/compatibility.h"
26 #include "tensorflow/lite/micro/memory_helpers.h"
27 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
28 #include "tensorflow/lite/micro/simple_memory_allocator.h"
29
30 namespace tflite {
31
32 namespace {
33 // Used to hold information used during allocation calculations.
34 struct AllocationInfo {
35 size_t bytes;
36 int first_created;
37 int last_used;
38 bool needs_allocating;
39 void** output_ptr;
40 };
41
42 // We align tensor buffers to 16-byte boundaries, since this is a common
43 // requirement for SIMD extensions.
44 constexpr int kBufferAlignment = 16;
45
46 // If building with GNU clib from GCC 4.8.x or lower, `max_align_t` is not a
47 // member of `std`. If using a newer version of clib, we import `max_align_t`
48 // into the local anonymous namespace to be able to use it like the global
49 // `max_align_t` from the older clib.
50 #if defined(__GNUC__) && defined(__GNUC_PREREQ)
51 #if __GNUC_PREREQ(4, 9)
52 using std::max_align_t;
53 #endif
54 #else
55 // We assume other compiler/clib configurations don't have this issue.
56 using std::max_align_t;
57 #endif
58
59 class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
60 public:
MicroBuiltinDataAllocator(SimpleMemoryAllocator * memory_allocator)61 explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
62 : memory_allocator_(memory_allocator) {}
63
Allocate(size_t size)64 void* Allocate(size_t size) override {
65 // Align to an address that is proper for all primitive types, but no more
66 // than the size.
67 return memory_allocator_->AllocateFromTail(
68 size, std::min(size, alignof(max_align_t)));
69 }
Deallocate(void * data)70 void Deallocate(void* data) override {
71 // Do not deallocate, builtin data needs to be available for the life time
72 // of the model.
73 }
74
75 private:
76 SimpleMemoryAllocator* memory_allocator_;
77
78 TF_LITE_REMOVE_VIRTUAL_DELETE
79 };
80
AllocateVariables(const flatbuffers::Vector<flatbuffers::Offset<Tensor>> * flatbuffer_tensors,TfLiteTensor * runtime_tensors,SimpleMemoryAllocator * allocator)81 TfLiteStatus AllocateVariables(
82 const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* flatbuffer_tensors,
83 TfLiteTensor* runtime_tensors, SimpleMemoryAllocator* allocator) {
84 for (size_t i = 0; i < flatbuffer_tensors->size(); ++i) {
85 if (flatbuffer_tensors->Get(i)->is_variable()) {
86 runtime_tensors[i].data.uint8 = allocator->AllocateFromTail(
87 runtime_tensors[i].bytes, kBufferAlignment);
88 // Allocation failure.
89 if (runtime_tensors[i].data.uint8 == nullptr) {
90 return kTfLiteError;
91 }
92 }
93 tflite::ResetVariableTensor(&(runtime_tensors[i]));
94 }
95 return kTfLiteOk;
96 }
97
AllocateAndCalculateAllocationInfo(ErrorReporter * error_reporter,size_t allocation_info_size,const SubGraph * subgraph,TfLiteTensor * runtime_tensors,SimpleMemoryAllocator * allocator)98 AllocationInfo* AllocateAndCalculateAllocationInfo(
99 ErrorReporter* error_reporter, size_t allocation_info_size,
100 const SubGraph* subgraph, TfLiteTensor* runtime_tensors,
101 SimpleMemoryAllocator* allocator) {
102 AllocationInfo* allocation_info = reinterpret_cast<AllocationInfo*>(
103 allocator->AllocateFromTail(sizeof(AllocationInfo) * allocation_info_size,
104 alignof(AllocationInfo)));
105 if (allocation_info == nullptr) {
106 error_reporter->Report(
107 "Failed to allocate memory for allocation_info, %d bytes required",
108 sizeof(TfLiteTensor) * allocation_info_size);
109 return nullptr;
110 }
111
112 // Set up the runtime data structures for all tensors.
113 for (size_t i = 0; i < allocation_info_size; ++i) {
114 AllocationInfo* current = &allocation_info[i];
115 // TfLiteTensor.uint8 field is deprecated so use .data field instead.
116 current->output_ptr = &(runtime_tensors[i].data.data);
117 current->bytes = runtime_tensors[i].bytes;
118 current->first_created = -1;
119 current->last_used = -1;
120 current->needs_allocating = (runtime_tensors[i].data.raw == nullptr) &&
121 (!subgraph->tensors()->Get(i)->is_variable());
122 }
123
124 for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
125 const int tensor_index = subgraph->inputs()->Get(i);
126 AllocationInfo* current = &allocation_info[tensor_index];
127 current->first_created = 0;
128 }
129
130 // Mark all outputs as persistent to the end of the invocation.
131 for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
132 const int tensor_index = subgraph->outputs()->Get(i);
133 AllocationInfo* current = &allocation_info[tensor_index];
134 current->last_used = subgraph->operators()->size() - 1;
135 }
136
137 // Figure out when the first and last use of each tensor is.
138 for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
139 const auto* op = subgraph->operators()->Get(i);
140 for (size_t n = 0; n < op->inputs()->size(); ++n) {
141 const int tensor_index = op->inputs()->Get(n);
142 AllocationInfo* current = &allocation_info[tensor_index];
143 if (((current->last_used == -1) || (current->last_used < i))) {
144 current->last_used = i;
145 }
146 }
147 for (size_t n = 0; n < op->outputs()->size(); ++n) {
148 const int tensor_index = op->outputs()->Get(n);
149 AllocationInfo* current = &allocation_info[tensor_index];
150 if ((current->first_created == -1) || (current->first_created > i)) {
151 current->first_created = i;
152 }
153 }
154 }
155
156 // Work out which tensors need to be allocated.
157 for (size_t i = 0; i < allocation_info_size; ++i) {
158 AllocationInfo* current = &allocation_info[i];
159 const bool is_read_only =
160 (current->first_created == -1) && (current->last_used != -1);
161 if (is_read_only) {
162 current->needs_allocating = false;
163 }
164 const bool has_partial_lifetime =
165 !is_read_only &&
166 ((current->first_created == -1) || (current->last_used == -1));
167 if (has_partial_lifetime && current->needs_allocating) {
168 error_reporter->Report(
169 "Logic error in memory planner, tensor %d has an invalid lifetime: "
170 "first_created: %d, last_used: %d",
171 i, current->first_created, current->last_used);
172 return nullptr;
173 }
174 } // namespace
175
176 return allocation_info;
177 } // namespace tflite
178
CreatePlan(ErrorReporter * error_reporter,MemoryPlanner * planner,const AllocationInfo * allocation_info,size_t allocation_info_size)179 TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
180 const AllocationInfo* allocation_info,
181 size_t allocation_info_size) {
182 // Add the tensors to our allocation plan.
183 for (size_t i = 0; i < allocation_info_size; ++i) {
184 const AllocationInfo* current = &allocation_info[i];
185 if (current->needs_allocating) {
186 size_t aligned_bytes_required =
187 AlignSizeUp(current->bytes, kBufferAlignment);
188 TF_LITE_ENSURE_STATUS(
189 planner->AddBuffer(error_reporter, aligned_bytes_required,
190 current->first_created, current->last_used));
191 }
192 }
193 return kTfLiteOk;
194 }
195
CommitPlan(ErrorReporter * error_reporter,MemoryPlanner * planner,uint8_t * starting_point,AllocationInfo * allocation_info,size_t allocation_info_size)196 TfLiteStatus CommitPlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
197 uint8_t* starting_point,
198 AllocationInfo* allocation_info,
199 size_t allocation_info_size) {
200 // Figure out the actual memory addresses for each buffer, based on the plan.
201 int planner_index = 0;
202 for (size_t i = 0; i < allocation_info_size; ++i) {
203 AllocationInfo* current = &allocation_info[i];
204 if (current->needs_allocating) {
205 int offset = -1;
206 TF_LITE_ENSURE_STATUS(
207 planner->GetOffsetForBuffer(error_reporter, planner_index, &offset));
208 *current->output_ptr = reinterpret_cast<void*>(starting_point + offset);
209 ++planner_index;
210 }
211 }
212 return kTfLiteOk;
213 }
214 } // namespace
215
216 namespace internal {
217
InitializeRuntimeTensor(SimpleMemoryAllocator * allocator,const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers,ErrorReporter * error_reporter,TfLiteTensor * result)218 TfLiteStatus InitializeRuntimeTensor(
219 SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
220 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
221 ErrorReporter* error_reporter, TfLiteTensor* result) {
222 *result = {};
223 // Make sure the serialized type is one we know how to deal with, and convert
224 // it from a flatbuffer enum into a constant used by the kernel C API.
225 TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
226 &result->type, error_reporter));
227 // Make sure we remember if the serialized tensor is designated as a variable.
228 result->is_variable = flatbuffer_tensor.is_variable();
229
230 // We need to figure out where the actual contents of this tensor are stored
231 // in memory. We'll check to see if there's a serialized buffer (pretty much
232 // the same as a constant op in TensorFlow) associated with this tensor first,
233 // and if there is update the runtime structure to point to its location in
234 // memory.
235 // First see if there's any buffer information in the serialized tensor.
236 if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
237 // If we've found a buffer, does it have any data?
238 if (auto* array = buffer->data()) {
239 // If it has any data, is the data size larger than zero?
240 if (array->size()) {
241 // We've found a buffer with valid data, so update the runtime tensor
242 // data structure to point to it.
243 result->data.raw =
244 const_cast<char*>(reinterpret_cast<const char*>(array->data()));
245 // We set the data from a serialized buffer, so record tha.
246 result->allocation_type = kTfLiteMmapRo;
247 }
248 }
249 // TODO(petewarden): It's not clear in what circumstances we could have a
250 // buffer in the serialized tensor, but it doesn't have any data in it. Is
251 // that a validly-generated file, and if so what does it mean, or is it an
252 // error condition? It would be good to tighten up the specification to make
253 // it less ambiguous.
254 }
255
256 // TODO(petewarden): Some of these paths aren't getting enough testing
257 // coverage, so we should figure out some tests that exercise them.
258 if (!result->data.raw) {
259 // The tensor contents haven't been set from a serialized buffer, so
260 // make a note that they will be allocated from memory. The actual
261 // allocation won't happen until later.
262 result->allocation_type = kTfLiteArenaRw;
263 }
264
265 // Figure out what the size in bytes of the buffer is and store it.
266 size_t type_size;
267 TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
268 flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
269
270 // TFLM doesn't allow reshaping the tensor which requires dynamic memory
271 // allocation so it is safe to drop the const qualifier. In the future, if we
272 // really want to update the tensor shape, we can always pass in a new
273 // TfLiteIntArray - especially we have to do so if the dimension is changed.
274 result->dims = const_cast<TfLiteIntArray*>(
275 reinterpret_cast<const TfLiteIntArray*>(flatbuffer_tensor.shape()));
276
277 // Copy the quantization information from the serialized data.
278 const auto* src_quantization = flatbuffer_tensor.quantization();
279 if (src_quantization && src_quantization->scale() &&
280 (src_quantization->scale()->size() > 0) &&
281 src_quantization->zero_point() &&
282 (src_quantization->zero_point()->size() > 0)) {
283 // Always populate the TfLiteTensor.params field, even if there are
284 // per-channel quantization parameters.
285 result->params.scale = src_quantization->scale()->Get(0);
286 // Note that the zero_point field in the FlatBuffers schema is a 64-bit
287 // integer, but the zero_point field in the TfLiteQuantizationParams struct
288 // is a 32-bit integer.
289 result->params.zero_point =
290 static_cast<int32_t>(src_quantization->zero_point()->Get(0));
291
292 // Populate per-channel quantization params.
293 int channels = src_quantization->scale()->size();
294 TfLiteAffineQuantization* quantization =
295 reinterpret_cast<TfLiteAffineQuantization*>(
296 allocator->AllocateFromTail(sizeof(TfLiteAffineQuantization),
297 alignof(TfLiteAffineQuantization)));
298 quantization->zero_point =
299 reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
300 TfLiteIntArrayGetSizeInBytes(channels), alignof(TfLiteIntArray)));
301 quantization->scale = reinterpret_cast<TfLiteFloatArray*>(
302 allocator->AllocateFromTail(TfLiteFloatArrayGetSizeInBytes(channels),
303 alignof(TfLiteFloatArray)));
304 quantization->zero_point->size = channels;
305 quantization->scale->size = channels;
306 int* zero_point_data = quantization->zero_point->data;
307 float* scale_data = quantization->scale->data;
308 for (int i = 0; i < channels; i++) {
309 zero_point_data[i] = src_quantization->zero_point()->Get(i);
310 scale_data[i] = src_quantization->scale()->Get(i);
311 }
312 // TODO(rocky): Need to add a micro_allocator test case that fails when
313 // this is not copied:
314 quantization->quantized_dimension = src_quantization->quantized_dimension();
315
316 result->quantization = {kTfLiteAffineQuantization, quantization};
317 }
318 // Copy the name, if there is one.
319 if (flatbuffer_tensor.name()->c_str() != nullptr) {
320 result->name = flatbuffer_tensor.name()->c_str();
321 } else {
322 result->name = "<No name>";
323 }
324 return kTfLiteOk;
325 }
326 } // namespace internal
327
Init()328 TfLiteStatus MicroAllocator::Init() {
329 auto* subgraphs = model_->subgraphs();
330 if (subgraphs->size() != 1) {
331 error_reporter_->Report("Only 1 subgraph is currently supported.\n");
332 return kTfLiteError;
333 }
334 subgraph_ = (*subgraphs)[0];
335 tensors_ = subgraph_->tensors();
336 operators_ = subgraph_->operators();
337
338 context_->tensors_size = tensors_->size();
339 context_->tensors =
340 reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
341 sizeof(TfLiteTensor) * context_->tensors_size,
342 alignof(TfLiteTensor)));
343 if (context_->tensors == nullptr) {
344 error_reporter_->Report(
345 "Failed to allocate memory for context->tensors, %d bytes required",
346 sizeof(TfLiteTensor) * context_->tensors_size);
347 }
348
349 // Initialize runtime tensors in context_ using the flatbuffer.
350 for (size_t i = 0; i < tensors_->size(); ++i) {
351 TfLiteStatus status = internal::InitializeRuntimeTensor(
352 memory_allocator_, *tensors_->Get(i), model_->buffers(),
353 error_reporter_, &context_->tensors[i]);
354 if (status == kTfLiteError) {
355 error_reporter_->Report("Failed to initialize tensor %d", i);
356 return kTfLiteError;
357 }
358 }
359
360 return kTfLiteOk;
361 }
362
MicroAllocator(TfLiteContext * context,const Model * model,uint8_t * tensor_arena,size_t arena_size,ErrorReporter * error_reporter)363 MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
364 uint8_t* tensor_arena, size_t arena_size,
365 ErrorReporter* error_reporter)
366 : model_(model), error_reporter_(error_reporter), context_(context) {
367 uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
368 size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
369 // Creates a root memory allocator managing the arena. The allocator itself
370 // also locates in the arena buffer. This allocator doesn't need to be
371 // destructed as it's the root allocator.
372 SimpleMemoryAllocator* aligned_allocator =
373 CreateInPlaceSimpleMemoryAllocator(aligned_arena, aligned_arena_size);
374 memory_allocator_ = aligned_allocator;
375 TfLiteStatus status = Init();
376 // TODO(b/147871299): Consider improving this code. A better way of handling
377 // failures in the constructor is to have a static function that returns a
378 // pointer to the class. If allocation failed, a nullptr will be returned.
379 if (status != kTfLiteOk) {
380 error_reporter_->Report("MicroAllocator: Failed to initialize.");
381 active_ = false;
382 } else {
383 active_ = true;
384 }
385 }
386
AllocateNodeAndRegistrations(const OpResolver & op_resolver,NodeAndRegistration ** node_and_registrations)387 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
388 const OpResolver& op_resolver,
389 NodeAndRegistration** node_and_registrations) {
390 if (!active_) {
391 return kTfLiteError;
392 }
393
394 auto* output = reinterpret_cast<NodeAndRegistration*>(
395 memory_allocator_->AllocateFromTail(
396 sizeof(NodeAndRegistration) * operators_->size(),
397 alignof(NodeAndRegistration)));
398 if (output == nullptr) {
399 error_reporter_->Report(
400 "Failed to allocate memory for node_and_registrations.");
401 return kTfLiteError;
402 }
403 TfLiteStatus status = kTfLiteOk;
404 auto* opcodes = model_->operator_codes();
405 MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
406 for (size_t i = 0; i < operators_->size(); ++i) {
407 const auto* op = operators_->Get(i);
408 size_t index = op->opcode_index();
409 if (index >= opcodes->size()) {
410 error_reporter_->Report("Missing registration for opcode_index %d\n",
411 index);
412 return kTfLiteError;
413 }
414 auto* opcode = (*opcodes)[index];
415 status = GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
416 &(output[i].registration));
417 if (status != kTfLiteOk) {
418 error_reporter_->Report("Failed to get registration from op code % d\n ",
419 opcode);
420 return status;
421 }
422 const auto* registration = output[i].registration;
423 if (registration == nullptr) {
424 error_reporter_->Report("Skipping op for opcode_index %d\n", index);
425 return kTfLiteError;
426 }
427 BuiltinOperator op_type =
428 static_cast<BuiltinOperator>(registration->builtin_code);
429
430 if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) {
431 error_reporter_->Report(
432 "Unsupported behavior: found builtin operator %s with custom "
433 "options.\n",
434 EnumNameBuiltinOperator(op_type));
435 return kTfLiteError;
436 }
437
438 const char* custom_data = nullptr;
439 size_t custom_data_size = 0;
440 unsigned char* builtin_data = nullptr;
441 if (op->custom_options()) {
442 custom_data = reinterpret_cast<const char*>(op->custom_options()->data());
443 custom_data_size = op->custom_options()->size();
444 } else {
445 TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_,
446 &builtin_data_allocator,
447 (void**)(&builtin_data)));
448 }
449
450 // Disregard const qualifier to workaround with existing API.
451 TfLiteIntArray* inputs_array = const_cast<TfLiteIntArray*>(
452 reinterpret_cast<const TfLiteIntArray*>(op->inputs()));
453 TfLiteIntArray* outputs_array = const_cast<TfLiteIntArray*>(
454 reinterpret_cast<const TfLiteIntArray*>(op->outputs()));
455
456 TfLiteNode* node = &(output[i].node);
457 *node = {};
458 node->inputs = inputs_array;
459 node->outputs = outputs_array;
460 // This is OK for now as temporary array is not in used.
461 node->temporaries = nullptr;
462 node->user_data = nullptr; // Will be filled in after `init`
463 node->builtin_data = reinterpret_cast<void*>(builtin_data);
464 node->custom_initial_data = custom_data;
465 node->custom_initial_data_size = custom_data_size;
466 node->delegate = nullptr;
467 }
468 *node_and_registrations = output;
469 return kTfLiteOk;
470 }
471
FinishTensorAllocation()472 TfLiteStatus MicroAllocator::FinishTensorAllocation() {
473 if (!active_) {
474 return kTfLiteError;
475 }
476
477 // Create static memory plan. AllocationInfo is needed for creating the plan
478 // but is thrown away afterwards.
479 {
480 SimpleMemoryAllocator tmp_allocator =
481 memory_allocator_->CreateChildAllocator();
482 size_t allocation_info_size = tensors_->size();
483 AllocationInfo* allocation_info = AllocateAndCalculateAllocationInfo(
484 error_reporter_, allocation_info_size, subgraph_, context_->tensors,
485 &tmp_allocator);
486 if (allocation_info == nullptr) {
487 return kTfLiteError;
488 }
489
490 uint8_t* aligned_arena = memory_allocator_->GetBuffer();
491 size_t arena_size = memory_allocator_->GetMaxBufferSize();
492
493 // Remaining arena size that memory planner can use for calculating offsets.
494 // The remaining size should always be a positive number since the parent
495 // allocator is always bigger than the child allocator.
496 size_t remaining_arena_size = arena_size - tmp_allocator.GetDataSize();
497 GreedyMemoryPlanner planner(aligned_arena, remaining_arena_size);
498 TF_LITE_ENSURE_STATUS(CreatePlan(error_reporter_, &planner, allocation_info,
499 allocation_info_size));
500
501 // Actual size available for placing tensors. This includes memory held by
502 // the tensor info array, which will be released.
503 size_t actual_available_arena_size =
504 arena_size - memory_allocator_->GetDataSize();
505 // Make sure we have enough arena size.
506 if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
507 error_reporter_->Report(
508 "Arena size is too small for activation buffers. Needed %d but only "
509 "%d was available.",
510 planner.GetMaximumMemorySize(), remaining_arena_size);
511 return kTfLiteError;
512 }
513
514 TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner, aligned_arena,
515 allocation_info, allocation_info_size));
516 }
517
518 // Data in variables need to be kept for the next invocation so allocating
519 // them from the tail (persistent area).
520 if (AllocateVariables(tensors_, context_->tensors, memory_allocator_) !=
521 kTfLiteOk) {
522 error_reporter_->Report(
523 "Failed to allocate variables. Please increase arena size.");
524 return kTfLiteError;
525 }
526
527 active_ = false;
528 return kTfLiteOk;
529 }
530
531 } // namespace tflite
532