• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/micro/micro_allocator.h"
17 
18 #include <cstddef>
19 
20 #include "tensorflow/lite/c/common.h"
21 #include "tensorflow/lite/core/api/error_reporter.h"
22 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
23 #include "tensorflow/lite/core/api/op_resolver.h"
24 #include "tensorflow/lite/core/api/tensor_utils.h"
25 #include "tensorflow/lite/micro/compatibility.h"
26 #include "tensorflow/lite/micro/memory_helpers.h"
27 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
28 #include "tensorflow/lite/micro/simple_memory_allocator.h"
29 
30 namespace tflite {
31 
32 namespace {
33 // Used to hold information used during allocation calculations.
34 struct AllocationInfo {
35   size_t bytes;
36   int first_created;
37   int last_used;
38   bool needs_allocating;
39   void** output_ptr;
40 };
41 
42 // We align tensor buffers to 16-byte boundaries, since this is a common
43 // requirement for SIMD extensions.
44 constexpr int kBufferAlignment = 16;
45 
46 // If building with GNU clib from GCC 4.8.x or lower, `max_align_t` is not a
47 // member of `std`. If using a newer version of clib, we import `max_align_t`
48 // into the local anonymous namespace to be able to use it like the global
49 // `max_align_t` from the older clib.
50 #if defined(__GNUC__) && defined(__GNUC_PREREQ)
51 #if __GNUC_PREREQ(4, 9)
52 using std::max_align_t;
53 #endif
54 #else
55 // We assume other compiler/clib configurations don't have this issue.
56 using std::max_align_t;
57 #endif
58 
59 class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
60  public:
MicroBuiltinDataAllocator(SimpleMemoryAllocator * memory_allocator)61   explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
62       : memory_allocator_(memory_allocator) {}
63 
Allocate(size_t size)64   void* Allocate(size_t size) override {
65     // Align to an address that is proper for all primitive types, but no more
66     // than the size.
67     return memory_allocator_->AllocateFromTail(
68         size, std::min(size, alignof(max_align_t)));
69   }
Deallocate(void * data)70   void Deallocate(void* data) override {
71     // Do not deallocate, builtin data needs to be available for the life time
72     // of the model.
73   }
74 
75  private:
76   SimpleMemoryAllocator* memory_allocator_;
77 
78   TF_LITE_REMOVE_VIRTUAL_DELETE
79 };
80 
AllocateVariables(const flatbuffers::Vector<flatbuffers::Offset<Tensor>> * flatbuffer_tensors,TfLiteTensor * runtime_tensors,SimpleMemoryAllocator * allocator)81 TfLiteStatus AllocateVariables(
82     const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* flatbuffer_tensors,
83     TfLiteTensor* runtime_tensors, SimpleMemoryAllocator* allocator) {
84   for (size_t i = 0; i < flatbuffer_tensors->size(); ++i) {
85     if (flatbuffer_tensors->Get(i)->is_variable()) {
86       runtime_tensors[i].data.uint8 = allocator->AllocateFromTail(
87           runtime_tensors[i].bytes, kBufferAlignment);
88       // Allocation failure.
89       if (runtime_tensors[i].data.uint8 == nullptr) {
90         return kTfLiteError;
91       }
92     }
93     tflite::ResetVariableTensor(&(runtime_tensors[i]));
94   }
95   return kTfLiteOk;
96 }
97 
AllocateAndCalculateAllocationInfo(ErrorReporter * error_reporter,size_t allocation_info_size,const SubGraph * subgraph,TfLiteTensor * runtime_tensors,SimpleMemoryAllocator * allocator)98 AllocationInfo* AllocateAndCalculateAllocationInfo(
99     ErrorReporter* error_reporter, size_t allocation_info_size,
100     const SubGraph* subgraph, TfLiteTensor* runtime_tensors,
101     SimpleMemoryAllocator* allocator) {
102   AllocationInfo* allocation_info = reinterpret_cast<AllocationInfo*>(
103       allocator->AllocateFromTail(sizeof(AllocationInfo) * allocation_info_size,
104                                   alignof(AllocationInfo)));
105   if (allocation_info == nullptr) {
106     error_reporter->Report(
107         "Failed to allocate memory for allocation_info, %d bytes required",
108         sizeof(TfLiteTensor) * allocation_info_size);
109     return nullptr;
110   }
111 
112   // Set up the runtime data structures for all tensors.
113   for (size_t i = 0; i < allocation_info_size; ++i) {
114     AllocationInfo* current = &allocation_info[i];
115     // TfLiteTensor.uint8 field is deprecated so use .data field instead.
116     current->output_ptr = &(runtime_tensors[i].data.data);
117     current->bytes = runtime_tensors[i].bytes;
118     current->first_created = -1;
119     current->last_used = -1;
120     current->needs_allocating = (runtime_tensors[i].data.raw == nullptr) &&
121                                 (!subgraph->tensors()->Get(i)->is_variable());
122   }
123 
124   for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
125     const int tensor_index = subgraph->inputs()->Get(i);
126     AllocationInfo* current = &allocation_info[tensor_index];
127     current->first_created = 0;
128   }
129 
130   // Mark all outputs as persistent to the end of the invocation.
131   for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
132     const int tensor_index = subgraph->outputs()->Get(i);
133     AllocationInfo* current = &allocation_info[tensor_index];
134     current->last_used = subgraph->operators()->size() - 1;
135   }
136 
137   // Figure out when the first and last use of each tensor is.
138   for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
139     const auto* op = subgraph->operators()->Get(i);
140     for (size_t n = 0; n < op->inputs()->size(); ++n) {
141       const int tensor_index = op->inputs()->Get(n);
142       AllocationInfo* current = &allocation_info[tensor_index];
143       if (((current->last_used == -1) || (current->last_used < i))) {
144         current->last_used = i;
145       }
146     }
147     for (size_t n = 0; n < op->outputs()->size(); ++n) {
148       const int tensor_index = op->outputs()->Get(n);
149       AllocationInfo* current = &allocation_info[tensor_index];
150       if ((current->first_created == -1) || (current->first_created > i)) {
151         current->first_created = i;
152       }
153     }
154   }
155 
156   // Work out which tensors need to be allocated.
157   for (size_t i = 0; i < allocation_info_size; ++i) {
158     AllocationInfo* current = &allocation_info[i];
159     const bool is_read_only =
160         (current->first_created == -1) && (current->last_used != -1);
161     if (is_read_only) {
162       current->needs_allocating = false;
163     }
164     const bool has_partial_lifetime =
165         !is_read_only &&
166         ((current->first_created == -1) || (current->last_used == -1));
167     if (has_partial_lifetime && current->needs_allocating) {
168       error_reporter->Report(
169           "Logic error in memory planner, tensor %d has an invalid lifetime: "
170           "first_created: %d, last_used: %d",
171           i, current->first_created, current->last_used);
172       return nullptr;
173     }
174   }  // namespace
175 
176   return allocation_info;
177 }  // namespace tflite
178 
CreatePlan(ErrorReporter * error_reporter,MemoryPlanner * planner,const AllocationInfo * allocation_info,size_t allocation_info_size)179 TfLiteStatus CreatePlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
180                         const AllocationInfo* allocation_info,
181                         size_t allocation_info_size) {
182   // Add the tensors to our allocation plan.
183   for (size_t i = 0; i < allocation_info_size; ++i) {
184     const AllocationInfo* current = &allocation_info[i];
185     if (current->needs_allocating) {
186       size_t aligned_bytes_required =
187           AlignSizeUp(current->bytes, kBufferAlignment);
188       TF_LITE_ENSURE_STATUS(
189           planner->AddBuffer(error_reporter, aligned_bytes_required,
190                              current->first_created, current->last_used));
191     }
192   }
193   return kTfLiteOk;
194 }
195 
CommitPlan(ErrorReporter * error_reporter,MemoryPlanner * planner,uint8_t * starting_point,AllocationInfo * allocation_info,size_t allocation_info_size)196 TfLiteStatus CommitPlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
197                         uint8_t* starting_point,
198                         AllocationInfo* allocation_info,
199                         size_t allocation_info_size) {
200   // Figure out the actual memory addresses for each buffer, based on the plan.
201   int planner_index = 0;
202   for (size_t i = 0; i < allocation_info_size; ++i) {
203     AllocationInfo* current = &allocation_info[i];
204     if (current->needs_allocating) {
205       int offset = -1;
206       TF_LITE_ENSURE_STATUS(
207           planner->GetOffsetForBuffer(error_reporter, planner_index, &offset));
208       *current->output_ptr = reinterpret_cast<void*>(starting_point + offset);
209       ++planner_index;
210     }
211   }
212   return kTfLiteOk;
213 }
214 }  // namespace
215 
216 namespace internal {
217 
InitializeRuntimeTensor(SimpleMemoryAllocator * allocator,const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers,ErrorReporter * error_reporter,TfLiteTensor * result)218 TfLiteStatus InitializeRuntimeTensor(
219     SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
220     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
221     ErrorReporter* error_reporter, TfLiteTensor* result) {
222   *result = {};
223   // Make sure the serialized type is one we know how to deal with, and convert
224   // it from a flatbuffer enum into a constant used by the kernel C API.
225   TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
226                                           &result->type, error_reporter));
227   // Make sure we remember if the serialized tensor is designated as a variable.
228   result->is_variable = flatbuffer_tensor.is_variable();
229 
230   // We need to figure out where the actual contents of this tensor are stored
231   // in memory. We'll check to see if there's a serialized buffer (pretty much
232   // the same as a constant op in TensorFlow) associated with this tensor first,
233   // and if there is update the runtime structure to point to its location in
234   // memory.
235   // First see if there's any buffer information in the serialized tensor.
236   if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
237     // If we've found a buffer, does it have any data?
238     if (auto* array = buffer->data()) {
239       // If it has any data, is the data size larger than zero?
240       if (array->size()) {
241         // We've found a buffer with valid data, so update the runtime tensor
242         // data structure to point to it.
243         result->data.raw =
244             const_cast<char*>(reinterpret_cast<const char*>(array->data()));
245         // We set the data from a serialized buffer, so record tha.
246         result->allocation_type = kTfLiteMmapRo;
247       }
248     }
249     // TODO(petewarden): It's not clear in what circumstances we could have a
250     // buffer in the serialized tensor, but it doesn't have any data in it. Is
251     // that a validly-generated file, and if so what does it mean, or is it an
252     // error condition? It would be good to tighten up the specification to make
253     // it less ambiguous.
254   }
255 
256   // TODO(petewarden): Some of these paths aren't getting enough testing
257   // coverage, so we should figure out some tests that exercise them.
258   if (!result->data.raw) {
259     // The tensor contents haven't been set from a serialized buffer, so
260     // make a note that they will be allocated from memory. The actual
261     // allocation won't happen until later.
262     result->allocation_type = kTfLiteArenaRw;
263   }
264 
265   // Figure out what the size in bytes of the buffer is and store it.
266   size_t type_size;
267   TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
268       flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
269 
270   // TFLM doesn't allow reshaping the tensor which requires dynamic memory
271   // allocation so it is safe to drop the const qualifier. In the future, if we
272   // really want to update the tensor shape, we can always pass in a new
273   // TfLiteIntArray - especially we have to do so if the dimension is changed.
274   result->dims = const_cast<TfLiteIntArray*>(
275       reinterpret_cast<const TfLiteIntArray*>(flatbuffer_tensor.shape()));
276 
277   // Copy the quantization information from the serialized data.
278   const auto* src_quantization = flatbuffer_tensor.quantization();
279   if (src_quantization && src_quantization->scale() &&
280       (src_quantization->scale()->size() > 0) &&
281       src_quantization->zero_point() &&
282       (src_quantization->zero_point()->size() > 0)) {
283     // Always populate the TfLiteTensor.params field, even if there are
284     // per-channel quantization parameters.
285     result->params.scale = src_quantization->scale()->Get(0);
286     // Note that the zero_point field in the FlatBuffers schema is a 64-bit
287     // integer, but the zero_point field in the TfLiteQuantizationParams struct
288     // is a 32-bit integer.
289     result->params.zero_point =
290         static_cast<int32_t>(src_quantization->zero_point()->Get(0));
291 
292     // Populate per-channel quantization params.
293     int channels = src_quantization->scale()->size();
294     TfLiteAffineQuantization* quantization =
295         reinterpret_cast<TfLiteAffineQuantization*>(
296             allocator->AllocateFromTail(sizeof(TfLiteAffineQuantization),
297                                         alignof(TfLiteAffineQuantization)));
298     quantization->zero_point =
299         reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
300             TfLiteIntArrayGetSizeInBytes(channels), alignof(TfLiteIntArray)));
301     quantization->scale = reinterpret_cast<TfLiteFloatArray*>(
302         allocator->AllocateFromTail(TfLiteFloatArrayGetSizeInBytes(channels),
303                                     alignof(TfLiteFloatArray)));
304     quantization->zero_point->size = channels;
305     quantization->scale->size = channels;
306     int* zero_point_data = quantization->zero_point->data;
307     float* scale_data = quantization->scale->data;
308     for (int i = 0; i < channels; i++) {
309       zero_point_data[i] = src_quantization->zero_point()->Get(i);
310       scale_data[i] = src_quantization->scale()->Get(i);
311     }
312     // TODO(rocky): Need to add a micro_allocator test case that fails when
313     // this is not copied:
314     quantization->quantized_dimension = src_quantization->quantized_dimension();
315 
316     result->quantization = {kTfLiteAffineQuantization, quantization};
317   }
318   // Copy the name, if there is one.
319   if (flatbuffer_tensor.name()->c_str() != nullptr) {
320     result->name = flatbuffer_tensor.name()->c_str();
321   } else {
322     result->name = "<No name>";
323   }
324   return kTfLiteOk;
325 }
326 }  // namespace internal
327 
Init()328 TfLiteStatus MicroAllocator::Init() {
329   auto* subgraphs = model_->subgraphs();
330   if (subgraphs->size() != 1) {
331     error_reporter_->Report("Only 1 subgraph is currently supported.\n");
332     return kTfLiteError;
333   }
334   subgraph_ = (*subgraphs)[0];
335   tensors_ = subgraph_->tensors();
336   operators_ = subgraph_->operators();
337 
338   context_->tensors_size = tensors_->size();
339   context_->tensors =
340       reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
341           sizeof(TfLiteTensor) * context_->tensors_size,
342           alignof(TfLiteTensor)));
343   if (context_->tensors == nullptr) {
344     error_reporter_->Report(
345         "Failed to allocate memory for context->tensors, %d bytes required",
346         sizeof(TfLiteTensor) * context_->tensors_size);
347   }
348 
349   // Initialize runtime tensors in context_ using the flatbuffer.
350   for (size_t i = 0; i < tensors_->size(); ++i) {
351     TfLiteStatus status = internal::InitializeRuntimeTensor(
352         memory_allocator_, *tensors_->Get(i), model_->buffers(),
353         error_reporter_, &context_->tensors[i]);
354     if (status == kTfLiteError) {
355       error_reporter_->Report("Failed to initialize tensor %d", i);
356       return kTfLiteError;
357     }
358   }
359 
360   return kTfLiteOk;
361 }
362 
MicroAllocator(TfLiteContext * context,const Model * model,uint8_t * tensor_arena,size_t arena_size,ErrorReporter * error_reporter)363 MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
364                                uint8_t* tensor_arena, size_t arena_size,
365                                ErrorReporter* error_reporter)
366     : model_(model), error_reporter_(error_reporter), context_(context) {
367   uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
368   size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
369   // Creates a root memory allocator managing the arena. The allocator itself
370   // also locates in the arena buffer. This allocator doesn't need to be
371   // destructed as it's the root allocator.
372   SimpleMemoryAllocator* aligned_allocator =
373       CreateInPlaceSimpleMemoryAllocator(aligned_arena, aligned_arena_size);
374   memory_allocator_ = aligned_allocator;
375   TfLiteStatus status = Init();
376   // TODO(b/147871299): Consider improving this code. A better way of handling
377   // failures in the constructor is to have a static function that returns a
378   // pointer to the class. If allocation failed, a nullptr will be returned.
379   if (status != kTfLiteOk) {
380     error_reporter_->Report("MicroAllocator: Failed to initialize.");
381     active_ = false;
382   } else {
383     active_ = true;
384   }
385 }
386 
AllocateNodeAndRegistrations(const OpResolver & op_resolver,NodeAndRegistration ** node_and_registrations)387 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
388     const OpResolver& op_resolver,
389     NodeAndRegistration** node_and_registrations) {
390   if (!active_) {
391     return kTfLiteError;
392   }
393 
394   auto* output = reinterpret_cast<NodeAndRegistration*>(
395       memory_allocator_->AllocateFromTail(
396           sizeof(NodeAndRegistration) * operators_->size(),
397           alignof(NodeAndRegistration)));
398   if (output == nullptr) {
399     error_reporter_->Report(
400         "Failed to allocate memory for node_and_registrations.");
401     return kTfLiteError;
402   }
403   TfLiteStatus status = kTfLiteOk;
404   auto* opcodes = model_->operator_codes();
405   MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
406   for (size_t i = 0; i < operators_->size(); ++i) {
407     const auto* op = operators_->Get(i);
408     size_t index = op->opcode_index();
409     if (index >= opcodes->size()) {
410       error_reporter_->Report("Missing registration for opcode_index %d\n",
411                               index);
412       return kTfLiteError;
413     }
414     auto* opcode = (*opcodes)[index];
415     status = GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
416                                        &(output[i].registration));
417     if (status != kTfLiteOk) {
418       error_reporter_->Report("Failed to get registration from op code % d\n ",
419                               opcode);
420       return status;
421     }
422     const auto* registration = output[i].registration;
423     if (registration == nullptr) {
424       error_reporter_->Report("Skipping op for opcode_index %d\n", index);
425       return kTfLiteError;
426     }
427     BuiltinOperator op_type =
428         static_cast<BuiltinOperator>(registration->builtin_code);
429 
430     if (op_type != BuiltinOperator_CUSTOM && op->custom_options()) {
431       error_reporter_->Report(
432           "Unsupported behavior: found builtin operator %s with custom "
433           "options.\n",
434           EnumNameBuiltinOperator(op_type));
435       return kTfLiteError;
436     }
437 
438     const char* custom_data = nullptr;
439     size_t custom_data_size = 0;
440     unsigned char* builtin_data = nullptr;
441     if (op->custom_options()) {
442       custom_data = reinterpret_cast<const char*>(op->custom_options()->data());
443       custom_data_size = op->custom_options()->size();
444     } else {
445       TF_LITE_ENSURE_STATUS(ParseOpData(op, op_type, error_reporter_,
446                                         &builtin_data_allocator,
447                                         (void**)(&builtin_data)));
448     }
449 
450     // Disregard const qualifier to workaround with existing API.
451     TfLiteIntArray* inputs_array = const_cast<TfLiteIntArray*>(
452         reinterpret_cast<const TfLiteIntArray*>(op->inputs()));
453     TfLiteIntArray* outputs_array = const_cast<TfLiteIntArray*>(
454         reinterpret_cast<const TfLiteIntArray*>(op->outputs()));
455 
456     TfLiteNode* node = &(output[i].node);
457     *node = {};
458     node->inputs = inputs_array;
459     node->outputs = outputs_array;
460     // This is OK for now as temporary array is not in used.
461     node->temporaries = nullptr;
462     node->user_data = nullptr;  // Will be filled in after `init`
463     node->builtin_data = reinterpret_cast<void*>(builtin_data);
464     node->custom_initial_data = custom_data;
465     node->custom_initial_data_size = custom_data_size;
466     node->delegate = nullptr;
467   }
468   *node_and_registrations = output;
469   return kTfLiteOk;
470 }
471 
FinishTensorAllocation()472 TfLiteStatus MicroAllocator::FinishTensorAllocation() {
473   if (!active_) {
474     return kTfLiteError;
475   }
476 
477   // Create static memory plan. AllocationInfo is needed for creating the plan
478   // but is thrown away afterwards.
479   {
480     SimpleMemoryAllocator tmp_allocator =
481         memory_allocator_->CreateChildAllocator();
482     size_t allocation_info_size = tensors_->size();
483     AllocationInfo* allocation_info = AllocateAndCalculateAllocationInfo(
484         error_reporter_, allocation_info_size, subgraph_, context_->tensors,
485         &tmp_allocator);
486     if (allocation_info == nullptr) {
487       return kTfLiteError;
488     }
489 
490     uint8_t* aligned_arena = memory_allocator_->GetBuffer();
491     size_t arena_size = memory_allocator_->GetMaxBufferSize();
492 
493     // Remaining arena size that memory planner can use for calculating offsets.
494     // The remaining size should always be a positive number since the parent
495     // allocator is always bigger than the child allocator.
496     size_t remaining_arena_size = arena_size - tmp_allocator.GetDataSize();
497     GreedyMemoryPlanner planner(aligned_arena, remaining_arena_size);
498     TF_LITE_ENSURE_STATUS(CreatePlan(error_reporter_, &planner, allocation_info,
499                                      allocation_info_size));
500 
501     // Actual size available for placing tensors. This includes memory held by
502     // the tensor info array, which will be released.
503     size_t actual_available_arena_size =
504         arena_size - memory_allocator_->GetDataSize();
505     // Make sure we have enough arena size.
506     if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
507       error_reporter_->Report(
508           "Arena size is too small for activation buffers. Needed %d but only "
509           "%d was available.",
510           planner.GetMaximumMemorySize(), remaining_arena_size);
511       return kTfLiteError;
512     }
513 
514     TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner, aligned_arena,
515                                      allocation_info, allocation_info_size));
516   }
517 
518   // Data in variables need to be kept for the next invocation so allocating
519   // them from the tail (persistent area).
520   if (AllocateVariables(tensors_, context_->tensors, memory_allocator_) !=
521       kTfLiteOk) {
522     error_reporter_->Report(
523         "Failed to allocate variables. Please increase arena size.");
524     return kTfLiteError;
525   }
526 
527   active_ = false;
528   return kTfLiteOk;
529 }
530 
531 }  // namespace tflite
532