• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/micro/micro_allocator.h"
17 
18 #include <cstddef>
19 #include <cstdint>
20 
21 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
22 #include "tensorflow/lite/c/common.h"
23 #include "tensorflow/lite/core/api/error_reporter.h"
24 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
25 #include "tensorflow/lite/core/api/op_resolver.h"
26 #include "tensorflow/lite/core/api/tensor_utils.h"
27 #include "tensorflow/lite/kernels/internal/compatibility.h"
28 #include "tensorflow/lite/micro/compatibility.h"
29 #include "tensorflow/lite/micro/memory_helpers.h"
30 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
31 #include "tensorflow/lite/micro/memory_planner/memory_planner.h"
32 #include "tensorflow/lite/micro/micro_op_resolver.h"
33 #include "tensorflow/lite/micro/simple_memory_allocator.h"
34 #include "tensorflow/lite/schema/schema_generated.h"
35 #include "tensorflow/lite/schema/schema_utils.h"
36 
37 namespace tflite {
38 
39 namespace {
40 
41 // Maximum number of scratch buffer requests per operator. Operator kernels that
42 // request more than this value will receive an exception.
43 constexpr size_t kMaxScratchBuffersPerOp = 12;
44 
45 // Sentinel value used as a placeholder to mark a ScratchBufferRequest request
46 // needs a node id assignment.
47 constexpr int kUnassignedScratchBufferRequestIndex = -1;
48 
49 // Used to hold information used during allocation calculations.
50 struct AllocationInfo {
51   size_t bytes;
52   void** output_ptr;
53   int first_created;
54   int last_used;
55   int32_t offline_offset;
56   bool needs_allocating;
57 };
58 
59 // We align tensor buffers to 16-byte boundaries, since this is a common
60 // requirement for SIMD extensions.
61 constexpr int kBufferAlignment = 16;
62 constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
63 const TfLiteIntArray kZeroLengthIntArray = {};
64 
65 class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
66  public:
MicroBuiltinDataAllocator(SimpleMemoryAllocator * memory_allocator)67   explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
68       : memory_allocator_(memory_allocator) {}
69 
Allocate(size_t size,size_t alignment_hint)70   void* Allocate(size_t size, size_t alignment_hint) override {
71     return memory_allocator_->AllocateFromTail(size, alignment_hint);
72   }
Deallocate(void * data)73   void Deallocate(void* data) override {
74     // Do not deallocate, builtin data needs to be available for the life time
75     // of the model.
76   }
77 
78  private:
79   SimpleMemoryAllocator* memory_allocator_;
80 
81   TF_LITE_REMOVE_VIRTUAL_DELETE
82 };
83 
84 #if !defined(__clang__)
85 // Helper function to check flatbuffer metadata correctness. This function is
86 // not called by default. Hence it's not linked in to the final binary code.
CheckOfflinePlannedOffsets(const Model * model,ErrorReporter * error_reporter)87 TfLiteStatus CheckOfflinePlannedOffsets(const Model* model,
88                                         ErrorReporter* error_reporter) {
89   // Suppress compile warning for unused function
90   (void)CheckOfflinePlannedOffsets;
91 
92   if (model->metadata()) {
93     for (size_t i = 0; i < model->metadata()->size(); ++i) {
94       auto metadata = model->metadata()->Get(i);
95       if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
96                   strlen(kOfflineMemAllocMetadata)) == 0) {
97         auto* subgraphs = model->subgraphs();
98         const SubGraph* subgraph = (*subgraphs)[0];
99         const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
100             subgraph->tensors();
101         const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
102             model->buffers();
103         int nbr_tflite_tensors = tensors->size();
104         auto* buffer = (*buffers)[metadata->buffer()];
105         auto* array = buffer->data();
106         const uint32_t* metadata_buffer = (uint32_t*)array->data();
107         int version = metadata_buffer[0];
108         int subgraph_idx = metadata_buffer[1];
109         const int nbr_offline_offsets = metadata_buffer[2];
110 #ifndef TF_LITE_STRIP_ERROR_STRINGS
111         int* offline_planner_offsets = (int*)&metadata_buffer[3];
112 #endif
113 
114         TF_LITE_REPORT_ERROR(error_reporter, "==== Model metadata info: =====");
115         TF_LITE_REPORT_ERROR(error_reporter,
116                              "Offline planner metadata found, version %d, "
117                              "subgraph %d, nbr offline offsets %d",
118                              version, subgraph_idx, nbr_offline_offsets);
119         for (int j = 0; j < nbr_offline_offsets; ++j) {
120           TF_LITE_REPORT_ERROR(
121               error_reporter,
122               "Offline planner tensor index %d, offline offset: %d", j,
123               offline_planner_offsets[j]);
124         }
125 
126         if (version != 1) {
127           TF_LITE_REPORT_ERROR(error_reporter, "Version not supported! (%d)\n",
128                                version);
129           return kTfLiteError;
130         }
131         if (subgraph_idx != 0) {
132           TF_LITE_REPORT_ERROR(error_reporter,
133                                "Only 1 subgraph supported! Subgraph idx (%d)\n",
134                                subgraph_idx);
135           return kTfLiteError;
136         }
137         if (nbr_tflite_tensors != nbr_offline_offsets) {
138           TF_LITE_REPORT_ERROR(error_reporter,
139                                "Nbr of offline buffer offsets (%d) in metadata "
140                                "not equal nbr tensors (%d)\n",
141                                nbr_offline_offsets, nbr_tflite_tensors);
142           return kTfLiteError;
143         }
144       }
145     }
146   }
147   return kTfLiteOk;
148 }
149 #endif
150 
151 // A helper class to construct AllocationInfo array. This array contains the
152 // lifetime of tensors / scratch_buffer and will be used to calculate the memory
153 // plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
154 class AllocationInfoBuilder {
155  public:
AllocationInfoBuilder(AllocationInfo * info,size_t tensor_count,size_t scratch_buffer_count,ErrorReporter * reporter)156   AllocationInfoBuilder(AllocationInfo* info, size_t tensor_count,
157                         size_t scratch_buffer_count, ErrorReporter* reporter)
158       : info_(info),
159         tensor_count_(tensor_count),
160         buffer_count_(scratch_buffer_count),
161         reporter_(reporter) {}
162 
163   // Check if model contains offline planned buffer offsets.
164   //  - If there's no metadata available, offline_planner_offsets is not set
165   //  - If there's metadata available, offline_planner_offsets will point to the
166   //    first offset in the metadata buffer list.
167   TfLiteStatus GetOfflinePlannedOffsets(
168       const Model* model, const int32_t** offline_planner_offsets);
169 
170   // Add allocaiton information for the tensors.
171   TfLiteStatus AddTensors(const SubGraph* subgraph,
172                           const int32_t* offline_offsets,
173                           TfLiteEvalTensor* eval_tensors);
174 
175   // Add allocation information for the scratch buffers.
176   TfLiteStatus AddScratchBuffers(
177       internal::ScratchBufferRequest* scratch_buffer_requests,
178       ScratchBufferHandle* scratch_buffer_handles);
179 
180   // Returns a pointer to the built AllocationInfo array.
Finish() const181   const AllocationInfo* Finish() const { return info_; }
182 
183  private:
184   AllocationInfo* info_ = nullptr;
185   size_t tensor_count_ = 0;
186   size_t buffer_count_ = 0;
187   ErrorReporter* reporter_ = nullptr;
188 };
189 
AddTensors(const SubGraph * subgraph,const int32_t * offline_offsets,TfLiteEvalTensor * eval_tensors)190 TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
191                                                const int32_t* offline_offsets,
192                                                TfLiteEvalTensor* eval_tensors) {
193   TFLITE_DCHECK(eval_tensors != nullptr);
194 
195   // Set up allocation info for all tensors.
196   for (size_t i = 0; i < tensor_count_; ++i) {
197     AllocationInfo* current = &info_[i];
198     current->output_ptr = &(eval_tensors[i].data.data);
199 
200     TF_LITE_ENSURE_STATUS(
201         TfLiteEvalTensorByteLength(&eval_tensors[i], &current->bytes));
202 
203     current->first_created = -1;
204     current->last_used = -1;
205     current->needs_allocating = (eval_tensors[i].data.data == nullptr) &&
206                                 (!subgraph->tensors()->Get(i)->is_variable());
207     if (offline_offsets) {
208       current->offline_offset = offline_offsets[i];
209     } else {
210       current->offline_offset = kOnlinePlannedBuffer;
211     }
212   }
213 
214   for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
215     const int tensor_index = subgraph->inputs()->Get(i);
216     AllocationInfo* current = &info_[tensor_index];
217     current->first_created = 0;
218   }
219 
220   // Mark all outputs as persistent to the end of the invocation.
221   for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
222     const int tensor_index = subgraph->outputs()->Get(i);
223     AllocationInfo* current = &info_[tensor_index];
224     current->last_used = subgraph->operators()->size() - 1;
225   }
226 
227   // Figure out when the first and last use of each tensor is.
228   for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
229     const auto* op = subgraph->operators()->Get(i);
230     for (size_t n = 0; n < op->inputs()->size(); ++n) {
231       const int tensor_index = op->inputs()->Get(n);
232       AllocationInfo* current = &info_[tensor_index];
233       if (((current->last_used == -1) || (current->last_used < i))) {
234         current->last_used = i;
235       }
236     }
237     for (size_t n = 0; n < op->outputs()->size(); ++n) {
238       const int tensor_index = op->outputs()->Get(n);
239       AllocationInfo* current = &info_[tensor_index];
240       if ((current->first_created == -1) || (current->first_created > i)) {
241         current->first_created = i;
242       }
243     }
244   }
245 
246   // Sanity check for valid tensor lifetime.
247   for (size_t i = 0; i < tensor_count_; ++i) {
248     AllocationInfo* current = &info_[i];
249     // Even though tensor appears to be read only it may still need to be
250     // allocated.
251     const bool appears_read_only =
252         (current->first_created == -1) && (current->last_used != -1);
253     const bool has_partial_lifetime =
254         !appears_read_only &&
255         ((current->first_created == -1) || (current->last_used == -1));
256     if (has_partial_lifetime && current->needs_allocating) {
257       TF_LITE_REPORT_ERROR(
258           reporter_,
259           "Logic error in memory planner, tensor %d has an invalid lifetime: "
260           "first_created: %d, last_used: %d",
261           i, current->first_created, current->last_used);
262       return kTfLiteError;
263     }
264   }
265   return kTfLiteOk;
266 }
267 
268 // The tensor offsets will be encoded in the metadata:[Metadata] field of the
269 // Model. The following encoding applies:
270 //
271 // | Metadata component |                 Value                                |
272 // |    name:string     | “OfflineMemoryAllocation”                            |
273 // |    buffer:unit     | Index of buffer containing memory allocation data    |
274 //
275 // The buffer contents for the memory allocation is a list of 32-bit integers.
276 // The number of tensors, n, must be equal to the number of tensors defined in
277 // the model. The following encoding applies:
278 //
279 // |  Offset |                            Value                                |
280 // |    0    | Offline allocation format version – set to 0                    |
281 // |    1    | Subgraph index to which this allocation applies                 |
282 // |    2    | Number offsets following: n                                     |
283 // |    3    | Arena byte offset of tensor #0 or -1 to allocate at runtime     |
284 // |    4    | Arena byte offset of tensor #1 or -1 to allocate at runtime     |
285 // | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
GetOfflinePlannedOffsets(const Model * model,const int32_t ** offline_planner_offsets)286 TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
287     const Model* model, const int32_t** offline_planner_offsets) {
288   if (model->metadata()) {
289     for (size_t i = 0; i < model->metadata()->size(); ++i) {
290       auto metadata = model->metadata()->Get(i);
291       if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
292                   strlen(kOfflineMemAllocMetadata)) == 0) {
293         const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
294             model->buffers();
295         auto* buffer = (*buffers)[metadata->buffer()];
296         auto* array = buffer->data();
297         const uint32_t* metadata_buffer =
298             reinterpret_cast<const uint32_t*>(array->data());
299         const size_t nbr_tensors = static_cast<size_t>(metadata_buffer[2]);
300         *offline_planner_offsets =
301             reinterpret_cast<const int32_t*>(&metadata_buffer[3]);
302 
303         if (tensor_count_ != nbr_tensors) {
304           TF_LITE_REPORT_ERROR(reporter_,
305                                "Nbr of offline buffer offsets (%d) in metadata "
306                                "not equal nbr tensors (%d)\n",
307                                nbr_tensors, tensor_count_);
308           return kTfLiteError;
309         }
310       }
311     }
312   }
313   return kTfLiteOk;
314 }
315 
AddScratchBuffers(internal::ScratchBufferRequest * scratch_buffer_requests,ScratchBufferHandle * scratch_buffer_handles)316 TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
317     internal::ScratchBufferRequest* scratch_buffer_requests,
318     ScratchBufferHandle* scratch_buffer_handles) {
319   // Set up allocation info for buffers.
320   for (size_t i = tensor_count_; i < tensor_count_ + buffer_count_; ++i) {
321     internal::ScratchBufferRequest* current_request =
322         &(scratch_buffer_requests[i - tensor_count_]);
323     ScratchBufferHandle* current_handle =
324         &(scratch_buffer_handles[i - tensor_count_]);
325 
326     AllocationInfo* current = &info_[i];
327     current->output_ptr = reinterpret_cast<void**>(&current_handle->data);
328     current->bytes = current_request->bytes;
329     current->first_created = current_request->node_idx;
330     current->last_used = current_request->node_idx;
331     current->offline_offset = kOnlinePlannedBuffer;
332     current->needs_allocating = true;
333   }
334   return kTfLiteOk;
335 }
336 
CreatePlan(ErrorReporter * error_reporter,GreedyMemoryPlanner * planner,const AllocationInfo * allocation_info,size_t allocation_info_size)337 TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
338                         GreedyMemoryPlanner* planner,
339                         const AllocationInfo* allocation_info,
340                         size_t allocation_info_size) {
341   // Add the tensors to our allocation plan.
342   for (size_t i = 0; i < allocation_info_size; ++i) {
343     const AllocationInfo* current = &allocation_info[i];
344     if (current->needs_allocating) {
345       size_t aligned_bytes_required =
346           AlignSizeUp(current->bytes, kBufferAlignment);
347       if (current->offline_offset == kOnlinePlannedBuffer) {
348         TF_LITE_ENSURE_STATUS(
349             planner->AddBuffer(error_reporter, aligned_bytes_required,
350                                current->first_created, current->last_used));
351       } else {
352         TF_LITE_ENSURE_STATUS(planner->AddBuffer(
353             error_reporter, aligned_bytes_required, current->first_created,
354             current->last_used, current->offline_offset));
355       }
356     }
357   }
358   return kTfLiteOk;
359 }
360 
CommitPlan(ErrorReporter * error_reporter,MemoryPlanner * planner,uint8_t * starting_point,const AllocationInfo * allocation_info,size_t allocation_info_size)361 TfLiteStatus CommitPlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
362                         uint8_t* starting_point,
363                         const AllocationInfo* allocation_info,
364                         size_t allocation_info_size) {
365   // Figure out the actual memory addresses for each buffer, based on the plan.
366   int planner_index = 0;
367   for (size_t i = 0; i < allocation_info_size; ++i) {
368     const AllocationInfo* current = &allocation_info[i];
369     if (current->needs_allocating) {
370       int offset = -1;
371       TF_LITE_ENSURE_STATUS(
372           planner->GetOffsetForBuffer(error_reporter, planner_index, &offset));
373       *current->output_ptr = reinterpret_cast<void*>(starting_point + offset);
374       ++planner_index;
375     }
376   }
377   return kTfLiteOk;
378 }
379 }  // namespace
380 
381 namespace internal {
382 
383 // Handles architecture safe mapping of flatbuffer vectors to a TfLite*Array
384 // struct. Matching types are required (e.g. float and TfLiteFloatArray).
385 // Big-endian systems will always allocate dimension array data in the tail
386 // (persistent) section.
387 template <typename kFlatBufferVectorType, typename kTfLiteArrayType>
FlatBufferVectorToTfLiteTypeArray(SimpleMemoryAllocator * allocator,ErrorReporter * error_reporter,const flatbuffers::Vector<kFlatBufferVectorType> * flatbuffer_array,kTfLiteArrayType ** result)388 TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
389     SimpleMemoryAllocator* allocator, ErrorReporter* error_reporter,
390     const flatbuffers::Vector<kFlatBufferVectorType>* flatbuffer_array,
391     kTfLiteArrayType** result) {
392   TFLITE_DCHECK(error_reporter != nullptr);
393   TFLITE_DCHECK(flatbuffer_array != nullptr);
394   // TODO(b/159668691): Consider adding type assertion or breaking this function
395   // into multiple functions for each type. std::is_same is c++11 and has a
396   // special updated constructor in c++17 that requires a string argument.
397   if (FLATBUFFERS_LITTLEENDIAN) {
398     // On little-endian machines, TfLite*Array happens to have the same memory
399     // layout as flatbuffers:Vector<kFlatBufferVectorType>, so we can
400     // reinterpret_cast the flatbuffer vector and avoid a copy and malloc.
401     *result = const_cast<kTfLiteArrayType*>(
402         reinterpret_cast<const kTfLiteArrayType*>(flatbuffer_array));
403   } else {
404     // Big-endian architecture can not use the same memory layout as
405     // flatbuffers::Vector<kFlatBufferVectorType>. Allocate from the tail and
406     // copy values from the flatbuffer into the newly allocated chunk.
407     kTfLiteArrayType* array =
408         reinterpret_cast<kTfLiteArrayType*>(allocator->AllocateFromTail(
409             TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()),
410             alignof(kTfLiteArrayType)));
411     if (array == nullptr) {
412       TF_LITE_REPORT_ERROR(
413           error_reporter,
414           "Failed to allocate %d bytes of memory to copy an array.",
415           TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()));
416       return kTfLiteError;
417     }
418     array->size = flatbuffer_array->Length();
419     for (int i = 0; i < array->size; ++i) {
420       array->data[i] = flatbuffer_array->Get(i);
421     }
422     *result = array;
423   }
424   return kTfLiteOk;
425 }
426 
427 // Returns a pointer to any buffer associated with the flatbuffer tensor. Can
428 // return nullptr if no buffer is found.
GetFlatbufferTensorBuffer(const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers)429 void* GetFlatbufferTensorBuffer(
430     const tflite::Tensor& flatbuffer_tensor,
431     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers) {
432   // We need to figure out where the actual contents of this tensor are stored
433   // in memory. We'll check to see if there's a serialized buffer (pretty much
434   // the same as a constant op in TensorFlow) associated with this tensor first,
435   // and if there is update the runtime structure to point to its location in
436   // memory.
437   // First see if there's any buffer information in the serialized tensor.
438   // TODO(b/170379532): Add better unit tests to validate flatbuffer values.
439   void* out_buffer = nullptr;
440   if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
441     // If we've found a buffer, does it have any data?
442     if (auto* array = buffer->data()) {
443       // If it has any data, is the data size larger than zero?
444       if (array->size()) {
445         // We've found a buffer with valid data, so update the runtime tensor
446         // data structure to point to it.
447         out_buffer = const_cast<void*>(static_cast<const void*>(array->data()));
448       }
449     }
450     // TODO(petewarden): It's not clear in what circumstances we could have a
451     // buffer in the serialized tensor, but it doesn't have any data in it. Is
452     // that a validly-generated file, and if so what does it mean, or is it an
453     // error condition? It would be good to tighten up the specification to make
454     // it less ambiguous.
455   }
456   return out_buffer;
457 }
458 
InitializeTfLiteTensorFromFlatbuffer(SimpleMemoryAllocator * allocator,bool allocate_temp,const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers,ErrorReporter * error_reporter,TfLiteTensor * result)459 TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
460     SimpleMemoryAllocator* allocator, bool allocate_temp,
461     const tflite::Tensor& flatbuffer_tensor,
462     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
463     ErrorReporter* error_reporter, TfLiteTensor* result) {
464   TFLITE_DCHECK(result != nullptr);
465 
466   *result = {};
467   // Make sure the serialized type is one we know how to deal with, and convert
468   // it from a flatbuffer enum into a constant used by the kernel C API.
469   TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
470                                           &result->type, error_reporter));
471   // Make sure we remember if the serialized tensor is designated as a variable.
472   result->is_variable = flatbuffer_tensor.is_variable();
473 
474   result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
475 
476   // TODO(petewarden): Some of these paths aren't getting enough testing
477   // coverage, so we should figure out some tests that exercise them.
478   if (result->data.data == nullptr) {
479     // The tensor contents haven't been set from a serialized buffer, so
480     // make a note that they will be allocated from memory. The actual
481     // allocation won't happen until later.
482     result->allocation_type = kTfLiteArenaRw;
483   } else {
484     // We set the data from a serialized buffer, so record tha.
485     result->allocation_type = kTfLiteMmapRo;
486   }
487 
488   // Figure out what the size in bytes of the buffer is and store it.
489   size_t type_size;
490   TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
491       flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
492 
493   if (flatbuffer_tensor.shape() == nullptr) {
494     // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
495     // tensor.
496     result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
497   } else {
498     // TFLM doesn't allow reshaping the tensor which requires dynamic memory
499     // allocation so it is safe to drop the const qualifier. In the future, if
500     // we really want to update the tensor shape, we can always pass in a new
501     // TfLiteIntArray - especially we have to do so if the dimension is
502     TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
503         allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims)));
504   }
505 
506   // Copy the quantization information from the serialized data.
507   const auto* src_quantization = flatbuffer_tensor.quantization();
508   if (src_quantization && src_quantization->scale() &&
509       (src_quantization->scale()->size() > 0) &&
510       src_quantization->zero_point() &&
511       (src_quantization->zero_point()->size() > 0)) {
512     // Always populate the TfLiteTensor.params field, even if there are
513     // per-channel quantization parameters.
514     result->params.scale = src_quantization->scale()->Get(0);
515     // Note that the zero_point field in the FlatBuffers schema is a 64-bit
516     // integer, but the zero_point field in the TfLiteQuantizationParams struct
517     // is a 32-bit integer.
518     result->params.zero_point =
519         static_cast<int32_t>(src_quantization->zero_point()->Get(0));
520 
521     // Populate per-channel quantization params.
522     int channels = src_quantization->scale()->size();
523     TfLiteAffineQuantization* quantization =
524         allocate_temp
525             ? reinterpret_cast<TfLiteAffineQuantization*>(
526                   allocator->AllocateTemp(sizeof(TfLiteAffineQuantization),
527                                           alignof(TfLiteAffineQuantization)))
528             : reinterpret_cast<TfLiteAffineQuantization*>(
529                   allocator->AllocateFromTail(
530                       sizeof(TfLiteAffineQuantization),
531                       alignof(TfLiteAffineQuantization)));
532     if (quantization == nullptr) {
533       TF_LITE_REPORT_ERROR(error_reporter,
534                            "Unable to allocate TfLiteAffineQuantization.\n");
535       return kTfLiteError;
536     }
537 
538     // TODO(b/153688719): Reduce tail allocation by using a global zero-point
539     // buffer. This value can not be reused from the flatbuffer since the
540     // zero_point is stored as a int64_t.
541     quantization->zero_point =
542         allocate_temp
543             ? reinterpret_cast<TfLiteIntArray*>(allocator->AllocateTemp(
544                   TfLiteIntArrayGetSizeInBytes(channels),
545                   alignof(TfLiteIntArray)))
546             : reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
547                   TfLiteIntArrayGetSizeInBytes(channels),
548                   alignof(TfLiteIntArray)));
549     if (quantization->zero_point == nullptr) {
550       TF_LITE_REPORT_ERROR(error_reporter,
551                            "Unable to allocate quantization->zero_point.\n");
552       return kTfLiteError;
553     }
554 
555     TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
556         allocator, error_reporter, src_quantization->scale(),
557         &quantization->scale));
558 
559     quantization->zero_point->size = channels;
560     int* zero_point_data = quantization->zero_point->data;
561     for (int i = 0; i < channels; i++) {
562       zero_point_data[i] = src_quantization->zero_point()->Get(i);
563     }
564     // TODO(rocky): Need to add a micro_allocator test case that fails when
565     // this is not copied:
566     quantization->quantized_dimension = src_quantization->quantized_dimension();
567 
568     result->quantization = {kTfLiteAffineQuantization, quantization};
569   }
570   return kTfLiteOk;
571 }
572 
InitializeTfLiteEvalTensorFromFlatbuffer(SimpleMemoryAllocator * allocator,const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers,ErrorReporter * error_reporter,TfLiteEvalTensor * result)573 TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
574     SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
575     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
576     ErrorReporter* error_reporter, TfLiteEvalTensor* result) {
577   *result = {};
578   // Make sure the serialized type is one we know how to deal with, and convert
579   // it from a flatbuffer enum into a constant used by the kernel C API.
580   TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
581                                           &result->type, error_reporter));
582 
583   result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
584 
585   if (flatbuffer_tensor.shape() == nullptr) {
586     // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
587     // tensor.
588     result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
589   } else {
590     TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
591         allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims)));
592   }
593   return kTfLiteOk;
594 }
595 
596 }  // namespace internal
597 
MicroAllocator(SimpleMemoryAllocator * memory_allocator,ErrorReporter * error_reporter)598 MicroAllocator::MicroAllocator(SimpleMemoryAllocator* memory_allocator,
599                                ErrorReporter* error_reporter)
600     : memory_allocator_(memory_allocator),
601       error_reporter_(error_reporter),
602       model_is_allocating_(false) {}
603 
~MicroAllocator()604 MicroAllocator::~MicroAllocator() {}
605 
Create(uint8_t * tensor_arena,size_t arena_size,ErrorReporter * error_reporter)606 MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size,
607                                        ErrorReporter* error_reporter) {
608   uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
609   size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
610   return Create(SimpleMemoryAllocator::Create(error_reporter, aligned_arena,
611                                               aligned_arena_size),
612                 error_reporter);
613 }
614 
Create(SimpleMemoryAllocator * memory_allocator,ErrorReporter * error_reporter)615 MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
616                                        ErrorReporter* error_reporter) {
617   TFLITE_DCHECK(memory_allocator != nullptr);
618   TFLITE_DCHECK(error_reporter != nullptr);
619 
620   uint8_t* allocator_buffer = memory_allocator->AllocateFromTail(
621       sizeof(MicroAllocator), alignof(MicroAllocator));
622   MicroAllocator* allocator =
623       new (allocator_buffer) MicroAllocator(memory_allocator, error_reporter);
624   return allocator;
625 }
626 
StartModelAllocation(const Model * model,const MicroOpResolver & op_resolver,NodeAndRegistration ** node_and_registrations,TfLiteEvalTensor ** eval_tensors)627 TfLiteStatus MicroAllocator::StartModelAllocation(
628     const Model* model, const MicroOpResolver& op_resolver,
629     NodeAndRegistration** node_and_registrations,
630     TfLiteEvalTensor** eval_tensors) {
631   TFLITE_DCHECK(model != nullptr);
632 
633   if (model_is_allocating_) {
634     TF_LITE_REPORT_ERROR(error_reporter_,
635                          "MicroAllocator: Model allocation started before "
636                          "finishing previously allocated model");
637     return kTfLiteError;
638   }
639 
640   model_is_allocating_ = true;
641 
642   TF_LITE_ENSURE_STATUS(InitScratchBufferData());
643   TF_LITE_ENSURE_STATUS(AllocateTfLiteEvalTensors(model, eval_tensors));
644   TF_LITE_ENSURE_STATUS(
645       AllocateNodeAndRegistrations(model, node_and_registrations));
646   TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer(
647       model, op_resolver, *node_and_registrations));
648 
649   return kTfLiteOk;
650 }
651 
FinishModelAllocation(const Model * model,TfLiteEvalTensor * eval_tensors,ScratchBufferHandle ** scratch_buffer_handles)652 TfLiteStatus MicroAllocator::FinishModelAllocation(
653     const Model* model, TfLiteEvalTensor* eval_tensors,
654     ScratchBufferHandle** scratch_buffer_handles) {
655   if (!model_is_allocating_) {
656     TF_LITE_REPORT_ERROR(error_reporter_,
657                          "MicroAllocator: Model allocation finished before "
658                          "starting allocating model");
659     return kTfLiteError;
660   }
661 
662   const SubGraph* subgraph = GetSubGraphFromModel(model);
663   TFLITE_DCHECK(subgraph != nullptr);
664 
665   TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
666       scratch_buffer_handles, scratch_buffer_request_count_));
667   TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph, eval_tensors,
668                                                *scratch_buffer_handles));
669   TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph, eval_tensors));
670 
671   model_is_allocating_ = false;
672   return kTfLiteOk;
673 }
674 
AllocatePersistentBuffer(size_t bytes)675 void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) {
676   return memory_allocator_->AllocateFromTail(bytes, kBufferAlignment);
677 }
678 
RequestScratchBufferInArena(size_t bytes,int * buffer_idx)679 TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
680                                                          int* buffer_idx) {
681   // All scratch buffer requests are stored in the head section of the arena
682   // when a model is in the prepare phase. First align a scratch buffer request
683   // pointer to the start of the head:
684   internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
685 
686   // Count the number of requested scratch buffers for the current node:
687   size_t current_node_request_count = 0;
688   for (size_t i = 0; i < scratch_buffer_request_count_; ++i) {
689     if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) {
690       ++current_node_request_count;
691     }
692   }
693 
694   // First, ensure that the per-kernel request has not exceeded the limit:
695   if (current_node_request_count >= kMaxScratchBuffersPerOp) {
696     TF_LITE_REPORT_ERROR(
697         error_reporter_,
698         "Scratch buffer request exeeds limit per operator (%d)",
699         kMaxScratchBuffersPerOp);
700     return kTfLiteError;
701   }
702 
703   // Initialize and assign values for the request at the current index:
704   internal::ScratchBufferRequest* current_request =
705       &requests[scratch_buffer_request_count_];
706   *current_request = {};
707   // Assign -1 as a sentinel value that will be updated when the node finishes
708   // allocating:
709   current_request->bytes = bytes;
710   current_request->node_idx = kUnassignedScratchBufferRequestIndex;
711 
712   // Assign the current request index to the out-param:
713   *buffer_idx = scratch_buffer_request_count_;
714 
715   // Bump the request count to prepare for the next request:
716   ++scratch_buffer_request_count_;
717   return kTfLiteOk;
718 }
719 
FinishPrepareNodeAllocations(int node_id)720 TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
721   // When a node has finished preparing, all temp allocations performed by the
722   // kernel should be cleaned up:
723   ResetTempAllocations();
724 
725   // Find and update any new scratch buffer requests for the current node:
726   internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
727 
728   for (size_t i = 0; i < scratch_buffer_request_count_; ++i) {
729     // A request with a node_idx of -1 is a sentinel value used to indicate this
730     // was a new request for the current node. The allocator finally knows the
731     // node index at this point. Assign the value and update the list of new
732     // requests so the head section can be adjusted to allow for the next kernel
733     // to allocate at most kMaxScratchBuffersPerOp requests:
734     if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) {
735       requests[i].node_idx = node_id;
736     }
737   }
738 
739   // Ensure that the head is re-adjusted to allow for another at-most
740   // kMaxScratchBuffersPerOp scratch buffer requests in the next operator:
741   TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
742       sizeof(internal::ScratchBufferRequest) *
743           (scratch_buffer_request_count_ + kMaxScratchBuffersPerOp),
744       alignof(internal::ScratchBufferRequest)));
745 
746   return kTfLiteOk;
747 }
748 
used_bytes() const749 size_t MicroAllocator::used_bytes() const {
750   return memory_allocator_->GetUsedBytes();
751 }
752 
AllocateNodeAndRegistrations(const Model * model,NodeAndRegistration ** node_and_registrations)753 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
754     const Model* model, NodeAndRegistration** node_and_registrations) {
755   TFLITE_DCHECK(node_and_registrations);
756 
757   const SubGraph* subgraph = GetSubGraphFromModel(model);
758   TFLITE_DCHECK(subgraph != nullptr);
759 
760   NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
761       memory_allocator_->AllocateFromTail(
762           sizeof(NodeAndRegistration) * subgraph->operators()->size(),
763           alignof(NodeAndRegistration)));
764   if (output == nullptr) {
765     TF_LITE_REPORT_ERROR(
766         error_reporter_,
767         "Failed to allocate memory for node_and_registrations.");
768     return kTfLiteError;
769   }
770   *node_and_registrations = output;
771   return kTfLiteOk;
772 }
773 
PrepareNodeAndRegistrationDataFromFlatbuffer(const Model * model,const MicroOpResolver & op_resolver,NodeAndRegistration * node_and_registrations)774 TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
775     const Model* model, const MicroOpResolver& op_resolver,
776     NodeAndRegistration* node_and_registrations) {
777   TFLITE_DCHECK(model != nullptr);
778   TFLITE_DCHECK(node_and_registrations != nullptr);
779 
780   const SubGraph* subgraph = GetSubGraphFromModel(model);
781   TFLITE_DCHECK(subgraph != nullptr);
782 
783   TfLiteStatus status = kTfLiteOk;
784   auto* opcodes = model->operator_codes();
785   MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
786   for (size_t i = 0; i < subgraph->operators()->size(); ++i) {
787     const auto* op = subgraph->operators()->Get(i);
788     const size_t index = op->opcode_index();
789     if (index >= opcodes->size()) {
790       TF_LITE_REPORT_ERROR(error_reporter_,
791                            "Missing registration for opcode_index %d\n", index);
792       return kTfLiteError;
793     }
794     auto* opcode = (*opcodes)[index];
795     status =
796         GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
797                                   &(node_and_registrations[i].registration));
798     if (status != kTfLiteOk) {
799       TF_LITE_REPORT_ERROR(error_reporter_,
800                            "Failed to get registration from op code %s\n ",
801                            EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
802       return status;
803     }
804     const auto* registration = node_and_registrations[i].registration;
805     if (registration == nullptr) {
806       TF_LITE_REPORT_ERROR(error_reporter_, "Skipping op for opcode_index %d\n",
807                            index);
808       return kTfLiteError;
809     }
810     BuiltinOperator op_type =
811         static_cast<BuiltinOperator>(registration->builtin_code);
812 
813     const char* custom_data = nullptr;
814     size_t custom_data_size = 0;
815     unsigned char* builtin_data = nullptr;
816 
817     if (op_type == BuiltinOperator_CUSTOM) {
818       // Custom Ops may or may not have a non-null custom_options field.
819       if (op->custom_options() != nullptr) {
820         custom_data =
821             reinterpret_cast<const char*>(op->custom_options()->data());
822         custom_data_size = op->custom_options()->size();
823       }
824     } else {
825       if (op->custom_options() != nullptr) {
826         TF_LITE_REPORT_ERROR(
827             error_reporter_,
828             "Unsupported behavior: found builtin operator %s with custom "
829             "options.\n",
830             EnumNameBuiltinOperator(op_type));
831         return kTfLiteError;
832       }
833 
834       MicroOpResolver::BuiltinParseFunction parser =
835           op_resolver.GetOpDataParser(op_type);
836       if (parser == nullptr) {
837         TF_LITE_REPORT_ERROR(error_reporter_, "Did not find a parser for %s",
838                              EnumNameBuiltinOperator(op_type));
839 
840         return kTfLiteError;
841       }
842       TF_LITE_ENSURE_STATUS(parser(op, error_reporter_, &builtin_data_allocator,
843                                    (void**)(&builtin_data)));
844     }
845 
846     TfLiteIntArray* inputs_array;
847     TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
848         memory_allocator_, error_reporter_, op->inputs(), &inputs_array));
849 
850     TfLiteIntArray* outputs_array;
851     TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
852         memory_allocator_, error_reporter_, op->outputs(), &outputs_array));
853 
854     TfLiteNode* node = &(node_and_registrations[i].node);
855     *node = {};
856     node->inputs = inputs_array;
857     node->outputs = outputs_array;
858     node->builtin_data = reinterpret_cast<void*>(builtin_data);
859     node->custom_initial_data = custom_data;
860     node->custom_initial_data_size = custom_data_size;
861   }
862 
863   return kTfLiteOk;
864 }
865 
AllocatePersistentTfLiteTensor(const Model * model,TfLiteEvalTensor * eval_tensors,int tensor_index)866 TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
867     const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
868   const SubGraph* subgraph = GetSubGraphFromModel(model);
869   TFLITE_DCHECK(subgraph != nullptr);
870 
871   // This value is allocated from persistent arena space. It is guaranteed to be
872   // around for the lifetime of the application.
873   TfLiteTensor* tensor =
874       AllocatePersistentTfLiteTensorInternal(model, eval_tensors, tensor_index);
875 
876   // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
877   // allocated in the persistent section of the arena, ensure that additional
878   // allocations also take place in that section of the arena.
879   if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
880                                          /*allocate_temp=*/false) !=
881       kTfLiteOk) {
882     TF_LITE_REPORT_ERROR(error_reporter_,
883                          "Failed to populate a persistent TfLiteTensor struct "
884                          "from flatbuffer data!");
885     return nullptr;
886   }
887 
888   if (eval_tensors != nullptr) {
889     // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
890     // and not located in the flatbuffer are stored on the pre-allocated list of
891     // TfLiteEvalTensors structs. These structs are the source of truth, simply
892     // point the corresponding buffer to the new TfLiteTensor data value.
893     tensor->data.data = eval_tensors[tensor_index].data.data;
894   }
895   return tensor;
896 }
897 
AllocateTempTfLiteTensor(const Model * model,TfLiteEvalTensor * eval_tensors,int tensor_index)898 TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
899     const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
900   const SubGraph* subgraph = GetSubGraphFromModel(model);
901   TFLITE_DCHECK(subgraph != nullptr);
902 
903   // This value is allocated from temporary arena space. It is guaranteed to be
904   // around for at least the scope of the calling function. Since this struct
905   // allocation takes place in temp space, no need to own or cleanup.
906   TfLiteTensor* tensor =
907       reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateTemp(
908           sizeof(TfLiteTensor), alignof(TfLiteTensor)));
909 
910   // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
911   // allocated in the temp section of the arena, ensure that additional
912   // allocations also take place in that section of the arena.
913   if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
914                                          /*allocate_temp=*/true) != kTfLiteOk) {
915     TF_LITE_REPORT_ERROR(
916         error_reporter_,
917         "Failed to populate a temp TfLiteTensor struct from flatbuffer data!");
918     return nullptr;
919   }
920 
921   if (eval_tensors != nullptr) {
922     // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
923     // and not located in the flatbuffer are stored on the pre-allocated list of
924     // TfLiteEvalTensors structs. These structs are the source of truth, simply
925     // point the corresponding buffer to the new TfLiteTensor data value.
926     tensor->data.data = eval_tensors[tensor_index].data.data;
927   }
928   return tensor;
929 }
930 
ResetTempAllocations()931 void MicroAllocator::ResetTempAllocations() {
932   memory_allocator_->ResetTempAllocations();
933 }
934 
AllocateTfLiteEvalTensors(const Model * model,TfLiteEvalTensor ** eval_tensors)935 TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
936     const Model* model, TfLiteEvalTensor** eval_tensors) {
937   TFLITE_DCHECK(eval_tensors != nullptr);
938 
939   const SubGraph* subgraph = GetSubGraphFromModel(model);
940   TFLITE_DCHECK(subgraph != nullptr);
941 
942   size_t alloc_count = subgraph->tensors()->size();
943   TfLiteEvalTensor* tensors =
944       reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
945           sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
946   if (tensors == nullptr) {
947     TF_LITE_REPORT_ERROR(error_reporter_,
948                          "Failed to allocate memory for context->eval_tensors, "
949                          "%d bytes required",
950                          sizeof(TfLiteEvalTensor) * alloc_count);
951     return kTfLiteError;
952   }
953 
954   for (size_t i = 0; i < alloc_count; ++i) {
955     TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
956         memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
957         error_reporter_, &tensors[i]);
958     if (status != kTfLiteOk) {
959       TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
960                            i);
961       return kTfLiteError;
962     }
963   }
964   *eval_tensors = tensors;
965   return kTfLiteOk;
966 }
967 
AllocateVariables(const SubGraph * subgraph,TfLiteEvalTensor * eval_tensors)968 TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
969                                                TfLiteEvalTensor* eval_tensors) {
970   for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
971     auto* tensor = subgraph->tensors()->Get(i);
972     if (tensor->is_variable()) {
973       size_t buffer_size;
974       TF_LITE_ENSURE_STATUS(
975           TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
976 
977       eval_tensors[i].data.data =
978           memory_allocator_->AllocateFromTail(buffer_size, kBufferAlignment);
979 
980       if (eval_tensors[i].data.data == nullptr) {
981         TF_LITE_REPORT_ERROR(error_reporter_,
982                              "Failed to allocate variable tensor of size %d",
983                              buffer_size);
984         return kTfLiteError;
985       }
986     }
987   }
988   return kTfLiteOk;
989 }
990 
AllocatePersistentTfLiteTensorInternal(const Model * model,TfLiteEvalTensor * eval_tensors,int tensor_index)991 TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal(
992     const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
993   return reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
994       sizeof(TfLiteTensor), alignof(TfLiteTensor)));
995 }
996 
PopulateTfLiteTensorFromFlatbuffer(const Model * model,const SubGraph * subgraph,TfLiteTensor * tensor,int tensor_index,bool allocate_temp)997 TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
998     const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
999     int tensor_index, bool allocate_temp) {
1000   // TODO(b/162311891): This method serves as a stub to ensure quantized
1001   // allocations in the tail can be recorded. Once the interpreter has APIs for
1002   // accessing buffers on TfLiteEvalTensor this method can be dropped.
1003   return internal::InitializeTfLiteTensorFromFlatbuffer(
1004       memory_allocator_, allocate_temp, *subgraph->tensors()->Get(tensor_index),
1005       model->buffers(), error_reporter_, tensor);
1006 }
1007 
error_reporter() const1008 ErrorReporter* MicroAllocator::error_reporter() const {
1009   return error_reporter_;
1010 }
1011 
GetSubGraphFromModel(const Model * model)1012 const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
1013   auto* subgraphs = model->subgraphs();
1014   if (subgraphs->size() != 1) {
1015     TF_LITE_REPORT_ERROR(error_reporter_,
1016                          "Only 1 subgraph is currently supported.\n");
1017     return nullptr;
1018   }
1019   return (*subgraphs)[0];
1020 }
1021 
CommitStaticMemoryPlan(const Model * model,const SubGraph * subgraph,TfLiteEvalTensor * eval_tensors,ScratchBufferHandle * scratch_buffer_handles)1022 TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
1023     const Model* model, const SubGraph* subgraph,
1024     TfLiteEvalTensor* eval_tensors,
1025     ScratchBufferHandle* scratch_buffer_handles) {
1026   size_t head_usage = 0;
1027   // Create static memory plan
1028   // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
1029   // 2. Add them into the planner (such as the GreedyMemoryPlanner).
1030   // 3. Static memory planning using the planner.
1031   // 4. Set tensor/buffer pointers based on the offsets from the previous step.
1032   //
1033   // Note that AllocationInfo is only needed for creating the plan. It will be
1034   // allocated from the temp section and cleaned up at the bottom of this
1035   // function.
1036 
1037   size_t allocation_info_count =
1038       subgraph->tensors()->size() + scratch_buffer_request_count_;
1039   size_t bytes = sizeof(AllocationInfo) * allocation_info_count;
1040 
1041   // Allocate an array of AllocationInfo structs from the temp section. This
1042   // struct will be used by AllocationInfoBuilder to find buffer usage.
1043   AllocationInfo* allocation_info = reinterpret_cast<AllocationInfo*>(
1044       memory_allocator_->AllocateTemp(bytes, alignof(AllocationInfo)));
1045   if (allocation_info == nullptr) {
1046     TF_LITE_REPORT_ERROR(
1047         error_reporter_,
1048         "Failed to allocate memory for allocation_info, %d bytes required",
1049         bytes);
1050     return kTfLiteError;
1051   }
1052 
1053   // Use the AllocationInfoBuilder class to help determine where buffers are
1054   // used in the subgraph.
1055   AllocationInfoBuilder builder(allocation_info, subgraph->tensors()->size(),
1056                                 scratch_buffer_request_count_, error_reporter_);
1057 
1058   const int32_t* offline_planner_offsets = nullptr;
1059   TF_LITE_ENSURE_STATUS(
1060       builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
1061   TF_LITE_ENSURE_STATUS(
1062       builder.AddTensors(subgraph, offline_planner_offsets, eval_tensors));
1063 
1064   internal::ScratchBufferRequest* scratch_buffer_requests =
1065       GetScratchBufferRequests();
1066 
1067   TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_requests,
1068                                                   scratch_buffer_handles));
1069 
1070   // Remaining arena size that memory planner can use for calculating offsets.
1071   size_t remaining_arena_size =
1072       memory_allocator_->GetAvailableMemory(kBufferAlignment);
1073   uint8_t* planner_arena =
1074       memory_allocator_->AllocateTemp(remaining_arena_size, kBufferAlignment);
1075   TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
1076   GreedyMemoryPlanner planner(planner_arena, remaining_arena_size);
1077   TF_LITE_ENSURE_STATUS(CreatePlan(error_reporter_, &planner, allocation_info,
1078                                    allocation_info_count));
1079 
1080   // Reset all temp allocations used above:
1081   memory_allocator_->ResetTempAllocations();
1082 
1083   size_t actual_available_arena_size =
1084       memory_allocator_->GetAvailableMemory(kBufferAlignment);
1085 
1086   // Make sure we have enough arena size.
1087   if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
1088     TF_LITE_REPORT_ERROR(
1089         error_reporter_,
1090         "Arena size is too small for all buffers. Needed %u but only "
1091         "%u was available.",
1092         planner.GetMaximumMemorySize(), actual_available_arena_size);
1093     return kTfLiteError;
1094   }
1095   // Commit the plan.
1096   TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
1097                                    memory_allocator_->GetHeadBuffer(),
1098                                    allocation_info, allocation_info_count));
1099   head_usage = planner.GetMaximumMemorySize();
1100 
1101   // The head is used to store memory plans for one model at a time during the
1102   // model preparation stage, and is re-purposed to store scratch buffer handles
1103   // during model invocation. The head must be as large as the greater of the
1104   // largest model memory plan's size and the total space required for all
1105   // scratch buffer handles.
1106   if (max_head_buffer_usage_ < head_usage) {
1107     max_head_buffer_usage_ = head_usage;
1108   }
1109 
1110   // The head is used for storing scratch buffer allocations before finalizing a
1111   // memory plan in this function. Ensure that the head is set to the largest
1112   // memory plan sent through the allocator:
1113   TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
1114       max_head_buffer_usage_, kBufferAlignment));
1115   return kTfLiteOk;
1116 }
1117 
AllocateScratchBufferHandles(ScratchBufferHandle ** scratch_buffer_handles,size_t handle_count)1118 TfLiteStatus MicroAllocator::AllocateScratchBufferHandles(
1119     ScratchBufferHandle** scratch_buffer_handles, size_t handle_count) {
1120   TFLITE_DCHECK(scratch_buffer_handles != nullptr);
1121 
1122   if (scratch_buffer_request_count_ == 0) {
1123     // No scratch buffer requests were requested during model allocation.
1124     return kTfLiteOk;
1125   }
1126 
1127   // Allocate a consecutive block of memory store the scratch buffer handles.
1128   // This alignment ensures quick lookup during inference time for the model:
1129   *scratch_buffer_handles = reinterpret_cast<ScratchBufferHandle*>(
1130       memory_allocator_->AllocateFromTail(
1131           sizeof(ScratchBufferHandle) * handle_count,
1132           alignof(ScratchBufferHandle)));
1133 
1134   return kTfLiteOk;
1135 }
1136 
InitScratchBufferData()1137 TfLiteStatus MicroAllocator::InitScratchBufferData() {
1138   // A model is preparing to allocate resources, ensure that scratch buffer
1139   // request counter is cleared:
1140   scratch_buffer_request_count_ = 0;
1141 
1142   // All requests will be stored in the head section. Each kernel is allowed at
1143   // most kMaxScratchBuffersPerOp requests. Adjust the head to reserve at most
1144   // that many requests to begin:
1145   TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
1146       sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp,
1147       alignof(internal::ScratchBufferRequest)));
1148 
1149   return kTfLiteOk;
1150 }
1151 
GetScratchBufferRequests()1152 internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() {
1153   return reinterpret_cast<internal::ScratchBufferRequest*>(
1154       AlignPointerUp(memory_allocator_->GetHeadBuffer(),
1155                      alignof(internal::ScratchBufferRequest)));
1156 }
1157 
1158 }  // namespace tflite
1159