1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/micro/micro_allocator.h"
17
18 #include <cstddef>
19 #include <cstdint>
20
21 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
22 #include "tensorflow/lite/c/common.h"
23 #include "tensorflow/lite/core/api/error_reporter.h"
24 #include "tensorflow/lite/core/api/flatbuffer_conversions.h"
25 #include "tensorflow/lite/core/api/op_resolver.h"
26 #include "tensorflow/lite/core/api/tensor_utils.h"
27 #include "tensorflow/lite/kernels/internal/compatibility.h"
28 #include "tensorflow/lite/micro/compatibility.h"
29 #include "tensorflow/lite/micro/memory_helpers.h"
30 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
31 #include "tensorflow/lite/micro/memory_planner/memory_planner.h"
32 #include "tensorflow/lite/micro/micro_op_resolver.h"
33 #include "tensorflow/lite/micro/simple_memory_allocator.h"
34 #include "tensorflow/lite/schema/schema_generated.h"
35 #include "tensorflow/lite/schema/schema_utils.h"
36
37 namespace tflite {
38
39 namespace {
40
41 // Maximum number of scratch buffer requests per operator. Operator kernels that
42 // request more than this value will receive an exception.
43 constexpr size_t kMaxScratchBuffersPerOp = 12;
44
45 // Sentinel value used as a placeholder to mark a ScratchBufferRequest request
46 // needs a node id assignment.
47 constexpr int kUnassignedScratchBufferRequestIndex = -1;
48
49 // Used to hold information used during allocation calculations.
50 struct AllocationInfo {
51 size_t bytes;
52 void** output_ptr;
53 int first_created;
54 int last_used;
55 int32_t offline_offset;
56 bool needs_allocating;
57 };
58
59 // We align tensor buffers to 16-byte boundaries, since this is a common
60 // requirement for SIMD extensions.
61 constexpr int kBufferAlignment = 16;
62 constexpr char kOfflineMemAllocMetadata[] = "OfflineMemoryAllocation";
63 const TfLiteIntArray kZeroLengthIntArray = {};
64
65 class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
66 public:
MicroBuiltinDataAllocator(SimpleMemoryAllocator * memory_allocator)67 explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
68 : memory_allocator_(memory_allocator) {}
69
Allocate(size_t size,size_t alignment_hint)70 void* Allocate(size_t size, size_t alignment_hint) override {
71 return memory_allocator_->AllocateFromTail(size, alignment_hint);
72 }
Deallocate(void * data)73 void Deallocate(void* data) override {
74 // Do not deallocate, builtin data needs to be available for the life time
75 // of the model.
76 }
77
78 private:
79 SimpleMemoryAllocator* memory_allocator_;
80
81 TF_LITE_REMOVE_VIRTUAL_DELETE
82 };
83
84 #if !defined(__clang__)
85 // Helper function to check flatbuffer metadata correctness. This function is
86 // not called by default. Hence it's not linked in to the final binary code.
CheckOfflinePlannedOffsets(const Model * model,ErrorReporter * error_reporter)87 TfLiteStatus CheckOfflinePlannedOffsets(const Model* model,
88 ErrorReporter* error_reporter) {
89 // Suppress compile warning for unused function
90 (void)CheckOfflinePlannedOffsets;
91
92 if (model->metadata()) {
93 for (size_t i = 0; i < model->metadata()->size(); ++i) {
94 auto metadata = model->metadata()->Get(i);
95 if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
96 strlen(kOfflineMemAllocMetadata)) == 0) {
97 auto* subgraphs = model->subgraphs();
98 const SubGraph* subgraph = (*subgraphs)[0];
99 const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors =
100 subgraph->tensors();
101 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
102 model->buffers();
103 int nbr_tflite_tensors = tensors->size();
104 auto* buffer = (*buffers)[metadata->buffer()];
105 auto* array = buffer->data();
106 const uint32_t* metadata_buffer = (uint32_t*)array->data();
107 int version = metadata_buffer[0];
108 int subgraph_idx = metadata_buffer[1];
109 const int nbr_offline_offsets = metadata_buffer[2];
110 #ifndef TF_LITE_STRIP_ERROR_STRINGS
111 int* offline_planner_offsets = (int*)&metadata_buffer[3];
112 #endif
113
114 TF_LITE_REPORT_ERROR(error_reporter, "==== Model metadata info: =====");
115 TF_LITE_REPORT_ERROR(error_reporter,
116 "Offline planner metadata found, version %d, "
117 "subgraph %d, nbr offline offsets %d",
118 version, subgraph_idx, nbr_offline_offsets);
119 for (int j = 0; j < nbr_offline_offsets; ++j) {
120 TF_LITE_REPORT_ERROR(
121 error_reporter,
122 "Offline planner tensor index %d, offline offset: %d", j,
123 offline_planner_offsets[j]);
124 }
125
126 if (version != 1) {
127 TF_LITE_REPORT_ERROR(error_reporter, "Version not supported! (%d)\n",
128 version);
129 return kTfLiteError;
130 }
131 if (subgraph_idx != 0) {
132 TF_LITE_REPORT_ERROR(error_reporter,
133 "Only 1 subgraph supported! Subgraph idx (%d)\n",
134 subgraph_idx);
135 return kTfLiteError;
136 }
137 if (nbr_tflite_tensors != nbr_offline_offsets) {
138 TF_LITE_REPORT_ERROR(error_reporter,
139 "Nbr of offline buffer offsets (%d) in metadata "
140 "not equal nbr tensors (%d)\n",
141 nbr_offline_offsets, nbr_tflite_tensors);
142 return kTfLiteError;
143 }
144 }
145 }
146 }
147 return kTfLiteOk;
148 }
149 #endif
150
151 // A helper class to construct AllocationInfo array. This array contains the
152 // lifetime of tensors / scratch_buffer and will be used to calculate the memory
153 // plan. Methods need to be called in order from `Init`, `Add*`, to `Finish`.
154 class AllocationInfoBuilder {
155 public:
AllocationInfoBuilder(AllocationInfo * info,size_t tensor_count,size_t scratch_buffer_count,ErrorReporter * reporter)156 AllocationInfoBuilder(AllocationInfo* info, size_t tensor_count,
157 size_t scratch_buffer_count, ErrorReporter* reporter)
158 : info_(info),
159 tensor_count_(tensor_count),
160 buffer_count_(scratch_buffer_count),
161 reporter_(reporter) {}
162
163 // Check if model contains offline planned buffer offsets.
164 // - If there's no metadata available, offline_planner_offsets is not set
165 // - If there's metadata available, offline_planner_offsets will point to the
166 // first offset in the metadata buffer list.
167 TfLiteStatus GetOfflinePlannedOffsets(
168 const Model* model, const int32_t** offline_planner_offsets);
169
170 // Add allocaiton information for the tensors.
171 TfLiteStatus AddTensors(const SubGraph* subgraph,
172 const int32_t* offline_offsets,
173 TfLiteEvalTensor* eval_tensors);
174
175 // Add allocation information for the scratch buffers.
176 TfLiteStatus AddScratchBuffers(
177 internal::ScratchBufferRequest* scratch_buffer_requests,
178 ScratchBufferHandle* scratch_buffer_handles);
179
180 // Returns a pointer to the built AllocationInfo array.
Finish() const181 const AllocationInfo* Finish() const { return info_; }
182
183 private:
184 AllocationInfo* info_ = nullptr;
185 size_t tensor_count_ = 0;
186 size_t buffer_count_ = 0;
187 ErrorReporter* reporter_ = nullptr;
188 };
189
AddTensors(const SubGraph * subgraph,const int32_t * offline_offsets,TfLiteEvalTensor * eval_tensors)190 TfLiteStatus AllocationInfoBuilder::AddTensors(const SubGraph* subgraph,
191 const int32_t* offline_offsets,
192 TfLiteEvalTensor* eval_tensors) {
193 TFLITE_DCHECK(eval_tensors != nullptr);
194
195 // Set up allocation info for all tensors.
196 for (size_t i = 0; i < tensor_count_; ++i) {
197 AllocationInfo* current = &info_[i];
198 current->output_ptr = &(eval_tensors[i].data.data);
199
200 TF_LITE_ENSURE_STATUS(
201 TfLiteEvalTensorByteLength(&eval_tensors[i], ¤t->bytes));
202
203 current->first_created = -1;
204 current->last_used = -1;
205 current->needs_allocating = (eval_tensors[i].data.data == nullptr) &&
206 (!subgraph->tensors()->Get(i)->is_variable());
207 if (offline_offsets) {
208 current->offline_offset = offline_offsets[i];
209 } else {
210 current->offline_offset = kOnlinePlannedBuffer;
211 }
212 }
213
214 for (size_t i = 0; i < subgraph->inputs()->size(); ++i) {
215 const int tensor_index = subgraph->inputs()->Get(i);
216 AllocationInfo* current = &info_[tensor_index];
217 current->first_created = 0;
218 }
219
220 // Mark all outputs as persistent to the end of the invocation.
221 for (size_t i = 0; i < subgraph->outputs()->size(); ++i) {
222 const int tensor_index = subgraph->outputs()->Get(i);
223 AllocationInfo* current = &info_[tensor_index];
224 current->last_used = subgraph->operators()->size() - 1;
225 }
226
227 // Figure out when the first and last use of each tensor is.
228 for (int i = (subgraph->operators()->size() - 1); i >= 0; --i) {
229 const auto* op = subgraph->operators()->Get(i);
230 for (size_t n = 0; n < op->inputs()->size(); ++n) {
231 const int tensor_index = op->inputs()->Get(n);
232 AllocationInfo* current = &info_[tensor_index];
233 if (((current->last_used == -1) || (current->last_used < i))) {
234 current->last_used = i;
235 }
236 }
237 for (size_t n = 0; n < op->outputs()->size(); ++n) {
238 const int tensor_index = op->outputs()->Get(n);
239 AllocationInfo* current = &info_[tensor_index];
240 if ((current->first_created == -1) || (current->first_created > i)) {
241 current->first_created = i;
242 }
243 }
244 }
245
246 // Sanity check for valid tensor lifetime.
247 for (size_t i = 0; i < tensor_count_; ++i) {
248 AllocationInfo* current = &info_[i];
249 // Even though tensor appears to be read only it may still need to be
250 // allocated.
251 const bool appears_read_only =
252 (current->first_created == -1) && (current->last_used != -1);
253 const bool has_partial_lifetime =
254 !appears_read_only &&
255 ((current->first_created == -1) || (current->last_used == -1));
256 if (has_partial_lifetime && current->needs_allocating) {
257 TF_LITE_REPORT_ERROR(
258 reporter_,
259 "Logic error in memory planner, tensor %d has an invalid lifetime: "
260 "first_created: %d, last_used: %d",
261 i, current->first_created, current->last_used);
262 return kTfLiteError;
263 }
264 }
265 return kTfLiteOk;
266 }
267
268 // The tensor offsets will be encoded in the metadata:[Metadata] field of the
269 // Model. The following encoding applies:
270 //
271 // | Metadata component | Value |
272 // | name:string | “OfflineMemoryAllocation” |
273 // | buffer:unit | Index of buffer containing memory allocation data |
274 //
275 // The buffer contents for the memory allocation is a list of 32-bit integers.
276 // The number of tensors, n, must be equal to the number of tensors defined in
277 // the model. The following encoding applies:
278 //
279 // | Offset | Value |
280 // | 0 | Offline allocation format version – set to 0 |
281 // | 1 | Subgraph index to which this allocation applies |
282 // | 2 | Number offsets following: n |
283 // | 3 | Arena byte offset of tensor #0 or -1 to allocate at runtime |
284 // | 4 | Arena byte offset of tensor #1 or -1 to allocate at runtime |
285 // | 3+(n-1) | Arena byte offset of tensor #(n-1) or -1 to allocate at runtime |
GetOfflinePlannedOffsets(const Model * model,const int32_t ** offline_planner_offsets)286 TfLiteStatus AllocationInfoBuilder::GetOfflinePlannedOffsets(
287 const Model* model, const int32_t** offline_planner_offsets) {
288 if (model->metadata()) {
289 for (size_t i = 0; i < model->metadata()->size(); ++i) {
290 auto metadata = model->metadata()->Get(i);
291 if (strncmp(metadata->name()->c_str(), kOfflineMemAllocMetadata,
292 strlen(kOfflineMemAllocMetadata)) == 0) {
293 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
294 model->buffers();
295 auto* buffer = (*buffers)[metadata->buffer()];
296 auto* array = buffer->data();
297 const uint32_t* metadata_buffer =
298 reinterpret_cast<const uint32_t*>(array->data());
299 const size_t nbr_tensors = static_cast<size_t>(metadata_buffer[2]);
300 *offline_planner_offsets =
301 reinterpret_cast<const int32_t*>(&metadata_buffer[3]);
302
303 if (tensor_count_ != nbr_tensors) {
304 TF_LITE_REPORT_ERROR(reporter_,
305 "Nbr of offline buffer offsets (%d) in metadata "
306 "not equal nbr tensors (%d)\n",
307 nbr_tensors, tensor_count_);
308 return kTfLiteError;
309 }
310 }
311 }
312 }
313 return kTfLiteOk;
314 }
315
AddScratchBuffers(internal::ScratchBufferRequest * scratch_buffer_requests,ScratchBufferHandle * scratch_buffer_handles)316 TfLiteStatus AllocationInfoBuilder::AddScratchBuffers(
317 internal::ScratchBufferRequest* scratch_buffer_requests,
318 ScratchBufferHandle* scratch_buffer_handles) {
319 // Set up allocation info for buffers.
320 for (size_t i = tensor_count_; i < tensor_count_ + buffer_count_; ++i) {
321 internal::ScratchBufferRequest* current_request =
322 &(scratch_buffer_requests[i - tensor_count_]);
323 ScratchBufferHandle* current_handle =
324 &(scratch_buffer_handles[i - tensor_count_]);
325
326 AllocationInfo* current = &info_[i];
327 current->output_ptr = reinterpret_cast<void**>(¤t_handle->data);
328 current->bytes = current_request->bytes;
329 current->first_created = current_request->node_idx;
330 current->last_used = current_request->node_idx;
331 current->offline_offset = kOnlinePlannedBuffer;
332 current->needs_allocating = true;
333 }
334 return kTfLiteOk;
335 }
336
CreatePlan(ErrorReporter * error_reporter,GreedyMemoryPlanner * planner,const AllocationInfo * allocation_info,size_t allocation_info_size)337 TfLiteStatus CreatePlan(ErrorReporter* error_reporter,
338 GreedyMemoryPlanner* planner,
339 const AllocationInfo* allocation_info,
340 size_t allocation_info_size) {
341 // Add the tensors to our allocation plan.
342 for (size_t i = 0; i < allocation_info_size; ++i) {
343 const AllocationInfo* current = &allocation_info[i];
344 if (current->needs_allocating) {
345 size_t aligned_bytes_required =
346 AlignSizeUp(current->bytes, kBufferAlignment);
347 if (current->offline_offset == kOnlinePlannedBuffer) {
348 TF_LITE_ENSURE_STATUS(
349 planner->AddBuffer(error_reporter, aligned_bytes_required,
350 current->first_created, current->last_used));
351 } else {
352 TF_LITE_ENSURE_STATUS(planner->AddBuffer(
353 error_reporter, aligned_bytes_required, current->first_created,
354 current->last_used, current->offline_offset));
355 }
356 }
357 }
358 return kTfLiteOk;
359 }
360
CommitPlan(ErrorReporter * error_reporter,MemoryPlanner * planner,uint8_t * starting_point,const AllocationInfo * allocation_info,size_t allocation_info_size)361 TfLiteStatus CommitPlan(ErrorReporter* error_reporter, MemoryPlanner* planner,
362 uint8_t* starting_point,
363 const AllocationInfo* allocation_info,
364 size_t allocation_info_size) {
365 // Figure out the actual memory addresses for each buffer, based on the plan.
366 int planner_index = 0;
367 for (size_t i = 0; i < allocation_info_size; ++i) {
368 const AllocationInfo* current = &allocation_info[i];
369 if (current->needs_allocating) {
370 int offset = -1;
371 TF_LITE_ENSURE_STATUS(
372 planner->GetOffsetForBuffer(error_reporter, planner_index, &offset));
373 *current->output_ptr = reinterpret_cast<void*>(starting_point + offset);
374 ++planner_index;
375 }
376 }
377 return kTfLiteOk;
378 }
379 } // namespace
380
381 namespace internal {
382
383 // Handles architecture safe mapping of flatbuffer vectors to a TfLite*Array
384 // struct. Matching types are required (e.g. float and TfLiteFloatArray).
385 // Big-endian systems will always allocate dimension array data in the tail
386 // (persistent) section.
387 template <typename kFlatBufferVectorType, typename kTfLiteArrayType>
FlatBufferVectorToTfLiteTypeArray(SimpleMemoryAllocator * allocator,ErrorReporter * error_reporter,const flatbuffers::Vector<kFlatBufferVectorType> * flatbuffer_array,kTfLiteArrayType ** result)388 TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
389 SimpleMemoryAllocator* allocator, ErrorReporter* error_reporter,
390 const flatbuffers::Vector<kFlatBufferVectorType>* flatbuffer_array,
391 kTfLiteArrayType** result) {
392 TFLITE_DCHECK(error_reporter != nullptr);
393 TFLITE_DCHECK(flatbuffer_array != nullptr);
394 // TODO(b/159668691): Consider adding type assertion or breaking this function
395 // into multiple functions for each type. std::is_same is c++11 and has a
396 // special updated constructor in c++17 that requires a string argument.
397 if (FLATBUFFERS_LITTLEENDIAN) {
398 // On little-endian machines, TfLite*Array happens to have the same memory
399 // layout as flatbuffers:Vector<kFlatBufferVectorType>, so we can
400 // reinterpret_cast the flatbuffer vector and avoid a copy and malloc.
401 *result = const_cast<kTfLiteArrayType*>(
402 reinterpret_cast<const kTfLiteArrayType*>(flatbuffer_array));
403 } else {
404 // Big-endian architecture can not use the same memory layout as
405 // flatbuffers::Vector<kFlatBufferVectorType>. Allocate from the tail and
406 // copy values from the flatbuffer into the newly allocated chunk.
407 kTfLiteArrayType* array =
408 reinterpret_cast<kTfLiteArrayType*>(allocator->AllocateFromTail(
409 TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()),
410 alignof(kTfLiteArrayType)));
411 if (array == nullptr) {
412 TF_LITE_REPORT_ERROR(
413 error_reporter,
414 "Failed to allocate %d bytes of memory to copy an array.",
415 TfLiteIntArrayGetSizeInBytes(flatbuffer_array->Length()));
416 return kTfLiteError;
417 }
418 array->size = flatbuffer_array->Length();
419 for (int i = 0; i < array->size; ++i) {
420 array->data[i] = flatbuffer_array->Get(i);
421 }
422 *result = array;
423 }
424 return kTfLiteOk;
425 }
426
427 // Returns a pointer to any buffer associated with the flatbuffer tensor. Can
428 // return nullptr if no buffer is found.
GetFlatbufferTensorBuffer(const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers)429 void* GetFlatbufferTensorBuffer(
430 const tflite::Tensor& flatbuffer_tensor,
431 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers) {
432 // We need to figure out where the actual contents of this tensor are stored
433 // in memory. We'll check to see if there's a serialized buffer (pretty much
434 // the same as a constant op in TensorFlow) associated with this tensor first,
435 // and if there is update the runtime structure to point to its location in
436 // memory.
437 // First see if there's any buffer information in the serialized tensor.
438 // TODO(b/170379532): Add better unit tests to validate flatbuffer values.
439 void* out_buffer = nullptr;
440 if (auto* buffer = (*buffers)[flatbuffer_tensor.buffer()]) {
441 // If we've found a buffer, does it have any data?
442 if (auto* array = buffer->data()) {
443 // If it has any data, is the data size larger than zero?
444 if (array->size()) {
445 // We've found a buffer with valid data, so update the runtime tensor
446 // data structure to point to it.
447 out_buffer = const_cast<void*>(static_cast<const void*>(array->data()));
448 }
449 }
450 // TODO(petewarden): It's not clear in what circumstances we could have a
451 // buffer in the serialized tensor, but it doesn't have any data in it. Is
452 // that a validly-generated file, and if so what does it mean, or is it an
453 // error condition? It would be good to tighten up the specification to make
454 // it less ambiguous.
455 }
456 return out_buffer;
457 }
458
InitializeTfLiteTensorFromFlatbuffer(SimpleMemoryAllocator * allocator,bool allocate_temp,const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers,ErrorReporter * error_reporter,TfLiteTensor * result)459 TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
460 SimpleMemoryAllocator* allocator, bool allocate_temp,
461 const tflite::Tensor& flatbuffer_tensor,
462 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
463 ErrorReporter* error_reporter, TfLiteTensor* result) {
464 TFLITE_DCHECK(result != nullptr);
465
466 *result = {};
467 // Make sure the serialized type is one we know how to deal with, and convert
468 // it from a flatbuffer enum into a constant used by the kernel C API.
469 TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
470 &result->type, error_reporter));
471 // Make sure we remember if the serialized tensor is designated as a variable.
472 result->is_variable = flatbuffer_tensor.is_variable();
473
474 result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
475
476 // TODO(petewarden): Some of these paths aren't getting enough testing
477 // coverage, so we should figure out some tests that exercise them.
478 if (result->data.data == nullptr) {
479 // The tensor contents haven't been set from a serialized buffer, so
480 // make a note that they will be allocated from memory. The actual
481 // allocation won't happen until later.
482 result->allocation_type = kTfLiteArenaRw;
483 } else {
484 // We set the data from a serialized buffer, so record tha.
485 result->allocation_type = kTfLiteMmapRo;
486 }
487
488 // Figure out what the size in bytes of the buffer is and store it.
489 size_t type_size;
490 TF_LITE_ENSURE_STATUS(BytesRequiredForTensor(
491 flatbuffer_tensor, &result->bytes, &type_size, error_reporter));
492
493 if (flatbuffer_tensor.shape() == nullptr) {
494 // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
495 // tensor.
496 result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
497 } else {
498 // TFLM doesn't allow reshaping the tensor which requires dynamic memory
499 // allocation so it is safe to drop the const qualifier. In the future, if
500 // we really want to update the tensor shape, we can always pass in a new
501 // TfLiteIntArray - especially we have to do so if the dimension is
502 TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
503 allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims)));
504 }
505
506 // Copy the quantization information from the serialized data.
507 const auto* src_quantization = flatbuffer_tensor.quantization();
508 if (src_quantization && src_quantization->scale() &&
509 (src_quantization->scale()->size() > 0) &&
510 src_quantization->zero_point() &&
511 (src_quantization->zero_point()->size() > 0)) {
512 // Always populate the TfLiteTensor.params field, even if there are
513 // per-channel quantization parameters.
514 result->params.scale = src_quantization->scale()->Get(0);
515 // Note that the zero_point field in the FlatBuffers schema is a 64-bit
516 // integer, but the zero_point field in the TfLiteQuantizationParams struct
517 // is a 32-bit integer.
518 result->params.zero_point =
519 static_cast<int32_t>(src_quantization->zero_point()->Get(0));
520
521 // Populate per-channel quantization params.
522 int channels = src_quantization->scale()->size();
523 TfLiteAffineQuantization* quantization =
524 allocate_temp
525 ? reinterpret_cast<TfLiteAffineQuantization*>(
526 allocator->AllocateTemp(sizeof(TfLiteAffineQuantization),
527 alignof(TfLiteAffineQuantization)))
528 : reinterpret_cast<TfLiteAffineQuantization*>(
529 allocator->AllocateFromTail(
530 sizeof(TfLiteAffineQuantization),
531 alignof(TfLiteAffineQuantization)));
532 if (quantization == nullptr) {
533 TF_LITE_REPORT_ERROR(error_reporter,
534 "Unable to allocate TfLiteAffineQuantization.\n");
535 return kTfLiteError;
536 }
537
538 // TODO(b/153688719): Reduce tail allocation by using a global zero-point
539 // buffer. This value can not be reused from the flatbuffer since the
540 // zero_point is stored as a int64_t.
541 quantization->zero_point =
542 allocate_temp
543 ? reinterpret_cast<TfLiteIntArray*>(allocator->AllocateTemp(
544 TfLiteIntArrayGetSizeInBytes(channels),
545 alignof(TfLiteIntArray)))
546 : reinterpret_cast<TfLiteIntArray*>(allocator->AllocateFromTail(
547 TfLiteIntArrayGetSizeInBytes(channels),
548 alignof(TfLiteIntArray)));
549 if (quantization->zero_point == nullptr) {
550 TF_LITE_REPORT_ERROR(error_reporter,
551 "Unable to allocate quantization->zero_point.\n");
552 return kTfLiteError;
553 }
554
555 TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
556 allocator, error_reporter, src_quantization->scale(),
557 &quantization->scale));
558
559 quantization->zero_point->size = channels;
560 int* zero_point_data = quantization->zero_point->data;
561 for (int i = 0; i < channels; i++) {
562 zero_point_data[i] = src_quantization->zero_point()->Get(i);
563 }
564 // TODO(rocky): Need to add a micro_allocator test case that fails when
565 // this is not copied:
566 quantization->quantized_dimension = src_quantization->quantized_dimension();
567
568 result->quantization = {kTfLiteAffineQuantization, quantization};
569 }
570 return kTfLiteOk;
571 }
572
InitializeTfLiteEvalTensorFromFlatbuffer(SimpleMemoryAllocator * allocator,const tflite::Tensor & flatbuffer_tensor,const flatbuffers::Vector<flatbuffers::Offset<Buffer>> * buffers,ErrorReporter * error_reporter,TfLiteEvalTensor * result)573 TfLiteStatus InitializeTfLiteEvalTensorFromFlatbuffer(
574 SimpleMemoryAllocator* allocator, const tflite::Tensor& flatbuffer_tensor,
575 const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
576 ErrorReporter* error_reporter, TfLiteEvalTensor* result) {
577 *result = {};
578 // Make sure the serialized type is one we know how to deal with, and convert
579 // it from a flatbuffer enum into a constant used by the kernel C API.
580 TF_LITE_ENSURE_STATUS(ConvertTensorType(flatbuffer_tensor.type(),
581 &result->type, error_reporter));
582
583 result->data.data = GetFlatbufferTensorBuffer(flatbuffer_tensor, buffers);
584
585 if (flatbuffer_tensor.shape() == nullptr) {
586 // flatbuffer_tensor.shape() can return a nullptr in the case of a scalar
587 // tensor.
588 result->dims = const_cast<TfLiteIntArray*>(&kZeroLengthIntArray);
589 } else {
590 TF_LITE_ENSURE_STATUS(FlatBufferVectorToTfLiteTypeArray(
591 allocator, error_reporter, flatbuffer_tensor.shape(), &(result->dims)));
592 }
593 return kTfLiteOk;
594 }
595
596 } // namespace internal
597
MicroAllocator(SimpleMemoryAllocator * memory_allocator,ErrorReporter * error_reporter)598 MicroAllocator::MicroAllocator(SimpleMemoryAllocator* memory_allocator,
599 ErrorReporter* error_reporter)
600 : memory_allocator_(memory_allocator),
601 error_reporter_(error_reporter),
602 model_is_allocating_(false) {}
603
~MicroAllocator()604 MicroAllocator::~MicroAllocator() {}
605
Create(uint8_t * tensor_arena,size_t arena_size,ErrorReporter * error_reporter)606 MicroAllocator* MicroAllocator::Create(uint8_t* tensor_arena, size_t arena_size,
607 ErrorReporter* error_reporter) {
608 uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment);
609 size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena;
610 return Create(SimpleMemoryAllocator::Create(error_reporter, aligned_arena,
611 aligned_arena_size),
612 error_reporter);
613 }
614
Create(SimpleMemoryAllocator * memory_allocator,ErrorReporter * error_reporter)615 MicroAllocator* MicroAllocator::Create(SimpleMemoryAllocator* memory_allocator,
616 ErrorReporter* error_reporter) {
617 TFLITE_DCHECK(memory_allocator != nullptr);
618 TFLITE_DCHECK(error_reporter != nullptr);
619
620 uint8_t* allocator_buffer = memory_allocator->AllocateFromTail(
621 sizeof(MicroAllocator), alignof(MicroAllocator));
622 MicroAllocator* allocator =
623 new (allocator_buffer) MicroAllocator(memory_allocator, error_reporter);
624 return allocator;
625 }
626
StartModelAllocation(const Model * model,const MicroOpResolver & op_resolver,NodeAndRegistration ** node_and_registrations,TfLiteEvalTensor ** eval_tensors)627 TfLiteStatus MicroAllocator::StartModelAllocation(
628 const Model* model, const MicroOpResolver& op_resolver,
629 NodeAndRegistration** node_and_registrations,
630 TfLiteEvalTensor** eval_tensors) {
631 TFLITE_DCHECK(model != nullptr);
632
633 if (model_is_allocating_) {
634 TF_LITE_REPORT_ERROR(error_reporter_,
635 "MicroAllocator: Model allocation started before "
636 "finishing previously allocated model");
637 return kTfLiteError;
638 }
639
640 model_is_allocating_ = true;
641
642 TF_LITE_ENSURE_STATUS(InitScratchBufferData());
643 TF_LITE_ENSURE_STATUS(AllocateTfLiteEvalTensors(model, eval_tensors));
644 TF_LITE_ENSURE_STATUS(
645 AllocateNodeAndRegistrations(model, node_and_registrations));
646 TF_LITE_ENSURE_STATUS(PrepareNodeAndRegistrationDataFromFlatbuffer(
647 model, op_resolver, *node_and_registrations));
648
649 return kTfLiteOk;
650 }
651
FinishModelAllocation(const Model * model,TfLiteEvalTensor * eval_tensors,ScratchBufferHandle ** scratch_buffer_handles)652 TfLiteStatus MicroAllocator::FinishModelAllocation(
653 const Model* model, TfLiteEvalTensor* eval_tensors,
654 ScratchBufferHandle** scratch_buffer_handles) {
655 if (!model_is_allocating_) {
656 TF_LITE_REPORT_ERROR(error_reporter_,
657 "MicroAllocator: Model allocation finished before "
658 "starting allocating model");
659 return kTfLiteError;
660 }
661
662 const SubGraph* subgraph = GetSubGraphFromModel(model);
663 TFLITE_DCHECK(subgraph != nullptr);
664
665 TF_LITE_ENSURE_STATUS(AllocateScratchBufferHandles(
666 scratch_buffer_handles, scratch_buffer_request_count_));
667 TF_LITE_ENSURE_STATUS(CommitStaticMemoryPlan(model, subgraph, eval_tensors,
668 *scratch_buffer_handles));
669 TF_LITE_ENSURE_STATUS(AllocateVariables(subgraph, eval_tensors));
670
671 model_is_allocating_ = false;
672 return kTfLiteOk;
673 }
674
AllocatePersistentBuffer(size_t bytes)675 void* MicroAllocator::AllocatePersistentBuffer(size_t bytes) {
676 return memory_allocator_->AllocateFromTail(bytes, kBufferAlignment);
677 }
678
RequestScratchBufferInArena(size_t bytes,int * buffer_idx)679 TfLiteStatus MicroAllocator::RequestScratchBufferInArena(size_t bytes,
680 int* buffer_idx) {
681 // All scratch buffer requests are stored in the head section of the arena
682 // when a model is in the prepare phase. First align a scratch buffer request
683 // pointer to the start of the head:
684 internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
685
686 // Count the number of requested scratch buffers for the current node:
687 size_t current_node_request_count = 0;
688 for (size_t i = 0; i < scratch_buffer_request_count_; ++i) {
689 if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) {
690 ++current_node_request_count;
691 }
692 }
693
694 // First, ensure that the per-kernel request has not exceeded the limit:
695 if (current_node_request_count >= kMaxScratchBuffersPerOp) {
696 TF_LITE_REPORT_ERROR(
697 error_reporter_,
698 "Scratch buffer request exeeds limit per operator (%d)",
699 kMaxScratchBuffersPerOp);
700 return kTfLiteError;
701 }
702
703 // Initialize and assign values for the request at the current index:
704 internal::ScratchBufferRequest* current_request =
705 &requests[scratch_buffer_request_count_];
706 *current_request = {};
707 // Assign -1 as a sentinel value that will be updated when the node finishes
708 // allocating:
709 current_request->bytes = bytes;
710 current_request->node_idx = kUnassignedScratchBufferRequestIndex;
711
712 // Assign the current request index to the out-param:
713 *buffer_idx = scratch_buffer_request_count_;
714
715 // Bump the request count to prepare for the next request:
716 ++scratch_buffer_request_count_;
717 return kTfLiteOk;
718 }
719
FinishPrepareNodeAllocations(int node_id)720 TfLiteStatus MicroAllocator::FinishPrepareNodeAllocations(int node_id) {
721 // When a node has finished preparing, all temp allocations performed by the
722 // kernel should be cleaned up:
723 ResetTempAllocations();
724
725 // Find and update any new scratch buffer requests for the current node:
726 internal::ScratchBufferRequest* requests = GetScratchBufferRequests();
727
728 for (size_t i = 0; i < scratch_buffer_request_count_; ++i) {
729 // A request with a node_idx of -1 is a sentinel value used to indicate this
730 // was a new request for the current node. The allocator finally knows the
731 // node index at this point. Assign the value and update the list of new
732 // requests so the head section can be adjusted to allow for the next kernel
733 // to allocate at most kMaxScratchBuffersPerOp requests:
734 if (requests[i].node_idx == kUnassignedScratchBufferRequestIndex) {
735 requests[i].node_idx = node_id;
736 }
737 }
738
739 // Ensure that the head is re-adjusted to allow for another at-most
740 // kMaxScratchBuffersPerOp scratch buffer requests in the next operator:
741 TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
742 sizeof(internal::ScratchBufferRequest) *
743 (scratch_buffer_request_count_ + kMaxScratchBuffersPerOp),
744 alignof(internal::ScratchBufferRequest)));
745
746 return kTfLiteOk;
747 }
748
used_bytes() const749 size_t MicroAllocator::used_bytes() const {
750 return memory_allocator_->GetUsedBytes();
751 }
752
AllocateNodeAndRegistrations(const Model * model,NodeAndRegistration ** node_and_registrations)753 TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations(
754 const Model* model, NodeAndRegistration** node_and_registrations) {
755 TFLITE_DCHECK(node_and_registrations);
756
757 const SubGraph* subgraph = GetSubGraphFromModel(model);
758 TFLITE_DCHECK(subgraph != nullptr);
759
760 NodeAndRegistration* output = reinterpret_cast<NodeAndRegistration*>(
761 memory_allocator_->AllocateFromTail(
762 sizeof(NodeAndRegistration) * subgraph->operators()->size(),
763 alignof(NodeAndRegistration)));
764 if (output == nullptr) {
765 TF_LITE_REPORT_ERROR(
766 error_reporter_,
767 "Failed to allocate memory for node_and_registrations.");
768 return kTfLiteError;
769 }
770 *node_and_registrations = output;
771 return kTfLiteOk;
772 }
773
PrepareNodeAndRegistrationDataFromFlatbuffer(const Model * model,const MicroOpResolver & op_resolver,NodeAndRegistration * node_and_registrations)774 TfLiteStatus MicroAllocator::PrepareNodeAndRegistrationDataFromFlatbuffer(
775 const Model* model, const MicroOpResolver& op_resolver,
776 NodeAndRegistration* node_and_registrations) {
777 TFLITE_DCHECK(model != nullptr);
778 TFLITE_DCHECK(node_and_registrations != nullptr);
779
780 const SubGraph* subgraph = GetSubGraphFromModel(model);
781 TFLITE_DCHECK(subgraph != nullptr);
782
783 TfLiteStatus status = kTfLiteOk;
784 auto* opcodes = model->operator_codes();
785 MicroBuiltinDataAllocator builtin_data_allocator(memory_allocator_);
786 for (size_t i = 0; i < subgraph->operators()->size(); ++i) {
787 const auto* op = subgraph->operators()->Get(i);
788 const size_t index = op->opcode_index();
789 if (index >= opcodes->size()) {
790 TF_LITE_REPORT_ERROR(error_reporter_,
791 "Missing registration for opcode_index %d\n", index);
792 return kTfLiteError;
793 }
794 auto* opcode = (*opcodes)[index];
795 status =
796 GetRegistrationFromOpCode(opcode, op_resolver, error_reporter_,
797 &(node_and_registrations[i].registration));
798 if (status != kTfLiteOk) {
799 TF_LITE_REPORT_ERROR(error_reporter_,
800 "Failed to get registration from op code %s\n ",
801 EnumNameBuiltinOperator(GetBuiltinCode(opcode)));
802 return status;
803 }
804 const auto* registration = node_and_registrations[i].registration;
805 if (registration == nullptr) {
806 TF_LITE_REPORT_ERROR(error_reporter_, "Skipping op for opcode_index %d\n",
807 index);
808 return kTfLiteError;
809 }
810 BuiltinOperator op_type =
811 static_cast<BuiltinOperator>(registration->builtin_code);
812
813 const char* custom_data = nullptr;
814 size_t custom_data_size = 0;
815 unsigned char* builtin_data = nullptr;
816
817 if (op_type == BuiltinOperator_CUSTOM) {
818 // Custom Ops may or may not have a non-null custom_options field.
819 if (op->custom_options() != nullptr) {
820 custom_data =
821 reinterpret_cast<const char*>(op->custom_options()->data());
822 custom_data_size = op->custom_options()->size();
823 }
824 } else {
825 if (op->custom_options() != nullptr) {
826 TF_LITE_REPORT_ERROR(
827 error_reporter_,
828 "Unsupported behavior: found builtin operator %s with custom "
829 "options.\n",
830 EnumNameBuiltinOperator(op_type));
831 return kTfLiteError;
832 }
833
834 MicroOpResolver::BuiltinParseFunction parser =
835 op_resolver.GetOpDataParser(op_type);
836 if (parser == nullptr) {
837 TF_LITE_REPORT_ERROR(error_reporter_, "Did not find a parser for %s",
838 EnumNameBuiltinOperator(op_type));
839
840 return kTfLiteError;
841 }
842 TF_LITE_ENSURE_STATUS(parser(op, error_reporter_, &builtin_data_allocator,
843 (void**)(&builtin_data)));
844 }
845
846 TfLiteIntArray* inputs_array;
847 TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
848 memory_allocator_, error_reporter_, op->inputs(), &inputs_array));
849
850 TfLiteIntArray* outputs_array;
851 TF_LITE_ENSURE_STATUS(internal::FlatBufferVectorToTfLiteTypeArray(
852 memory_allocator_, error_reporter_, op->outputs(), &outputs_array));
853
854 TfLiteNode* node = &(node_and_registrations[i].node);
855 *node = {};
856 node->inputs = inputs_array;
857 node->outputs = outputs_array;
858 node->builtin_data = reinterpret_cast<void*>(builtin_data);
859 node->custom_initial_data = custom_data;
860 node->custom_initial_data_size = custom_data_size;
861 }
862
863 return kTfLiteOk;
864 }
865
AllocatePersistentTfLiteTensor(const Model * model,TfLiteEvalTensor * eval_tensors,int tensor_index)866 TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensor(
867 const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
868 const SubGraph* subgraph = GetSubGraphFromModel(model);
869 TFLITE_DCHECK(subgraph != nullptr);
870
871 // This value is allocated from persistent arena space. It is guaranteed to be
872 // around for the lifetime of the application.
873 TfLiteTensor* tensor =
874 AllocatePersistentTfLiteTensorInternal(model, eval_tensors, tensor_index);
875
876 // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
877 // allocated in the persistent section of the arena, ensure that additional
878 // allocations also take place in that section of the arena.
879 if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
880 /*allocate_temp=*/false) !=
881 kTfLiteOk) {
882 TF_LITE_REPORT_ERROR(error_reporter_,
883 "Failed to populate a persistent TfLiteTensor struct "
884 "from flatbuffer data!");
885 return nullptr;
886 }
887
888 if (eval_tensors != nullptr) {
889 // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
890 // and not located in the flatbuffer are stored on the pre-allocated list of
891 // TfLiteEvalTensors structs. These structs are the source of truth, simply
892 // point the corresponding buffer to the new TfLiteTensor data value.
893 tensor->data.data = eval_tensors[tensor_index].data.data;
894 }
895 return tensor;
896 }
897
AllocateTempTfLiteTensor(const Model * model,TfLiteEvalTensor * eval_tensors,int tensor_index)898 TfLiteTensor* MicroAllocator::AllocateTempTfLiteTensor(
899 const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
900 const SubGraph* subgraph = GetSubGraphFromModel(model);
901 TFLITE_DCHECK(subgraph != nullptr);
902
903 // This value is allocated from temporary arena space. It is guaranteed to be
904 // around for at least the scope of the calling function. Since this struct
905 // allocation takes place in temp space, no need to own or cleanup.
906 TfLiteTensor* tensor =
907 reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateTemp(
908 sizeof(TfLiteTensor), alignof(TfLiteTensor)));
909
910 // Populate any fields from the flatbuffer, since this TfLiteTensor struct is
911 // allocated in the temp section of the arena, ensure that additional
912 // allocations also take place in that section of the arena.
913 if (PopulateTfLiteTensorFromFlatbuffer(model, subgraph, tensor, tensor_index,
914 /*allocate_temp=*/true) != kTfLiteOk) {
915 TF_LITE_REPORT_ERROR(
916 error_reporter_,
917 "Failed to populate a temp TfLiteTensor struct from flatbuffer data!");
918 return nullptr;
919 }
920
921 if (eval_tensors != nullptr) {
922 // Tensor buffers that are allocated at runtime (e.g. non-weight buffers)
923 // and not located in the flatbuffer are stored on the pre-allocated list of
924 // TfLiteEvalTensors structs. These structs are the source of truth, simply
925 // point the corresponding buffer to the new TfLiteTensor data value.
926 tensor->data.data = eval_tensors[tensor_index].data.data;
927 }
928 return tensor;
929 }
930
ResetTempAllocations()931 void MicroAllocator::ResetTempAllocations() {
932 memory_allocator_->ResetTempAllocations();
933 }
934
AllocateTfLiteEvalTensors(const Model * model,TfLiteEvalTensor ** eval_tensors)935 TfLiteStatus MicroAllocator::AllocateTfLiteEvalTensors(
936 const Model* model, TfLiteEvalTensor** eval_tensors) {
937 TFLITE_DCHECK(eval_tensors != nullptr);
938
939 const SubGraph* subgraph = GetSubGraphFromModel(model);
940 TFLITE_DCHECK(subgraph != nullptr);
941
942 size_t alloc_count = subgraph->tensors()->size();
943 TfLiteEvalTensor* tensors =
944 reinterpret_cast<TfLiteEvalTensor*>(memory_allocator_->AllocateFromTail(
945 sizeof(TfLiteEvalTensor) * alloc_count, alignof(TfLiteEvalTensor)));
946 if (tensors == nullptr) {
947 TF_LITE_REPORT_ERROR(error_reporter_,
948 "Failed to allocate memory for context->eval_tensors, "
949 "%d bytes required",
950 sizeof(TfLiteEvalTensor) * alloc_count);
951 return kTfLiteError;
952 }
953
954 for (size_t i = 0; i < alloc_count; ++i) {
955 TfLiteStatus status = internal::InitializeTfLiteEvalTensorFromFlatbuffer(
956 memory_allocator_, *subgraph->tensors()->Get(i), model->buffers(),
957 error_reporter_, &tensors[i]);
958 if (status != kTfLiteOk) {
959 TF_LITE_REPORT_ERROR(error_reporter_, "Failed to initialize tensor %d",
960 i);
961 return kTfLiteError;
962 }
963 }
964 *eval_tensors = tensors;
965 return kTfLiteOk;
966 }
967
AllocateVariables(const SubGraph * subgraph,TfLiteEvalTensor * eval_tensors)968 TfLiteStatus MicroAllocator::AllocateVariables(const SubGraph* subgraph,
969 TfLiteEvalTensor* eval_tensors) {
970 for (size_t i = 0; i < subgraph->tensors()->size(); ++i) {
971 auto* tensor = subgraph->tensors()->Get(i);
972 if (tensor->is_variable()) {
973 size_t buffer_size;
974 TF_LITE_ENSURE_STATUS(
975 TfLiteEvalTensorByteLength(&eval_tensors[i], &buffer_size));
976
977 eval_tensors[i].data.data =
978 memory_allocator_->AllocateFromTail(buffer_size, kBufferAlignment);
979
980 if (eval_tensors[i].data.data == nullptr) {
981 TF_LITE_REPORT_ERROR(error_reporter_,
982 "Failed to allocate variable tensor of size %d",
983 buffer_size);
984 return kTfLiteError;
985 }
986 }
987 }
988 return kTfLiteOk;
989 }
990
AllocatePersistentTfLiteTensorInternal(const Model * model,TfLiteEvalTensor * eval_tensors,int tensor_index)991 TfLiteTensor* MicroAllocator::AllocatePersistentTfLiteTensorInternal(
992 const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index) {
993 return reinterpret_cast<TfLiteTensor*>(memory_allocator_->AllocateFromTail(
994 sizeof(TfLiteTensor), alignof(TfLiteTensor)));
995 }
996
PopulateTfLiteTensorFromFlatbuffer(const Model * model,const SubGraph * subgraph,TfLiteTensor * tensor,int tensor_index,bool allocate_temp)997 TfLiteStatus MicroAllocator::PopulateTfLiteTensorFromFlatbuffer(
998 const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
999 int tensor_index, bool allocate_temp) {
1000 // TODO(b/162311891): This method serves as a stub to ensure quantized
1001 // allocations in the tail can be recorded. Once the interpreter has APIs for
1002 // accessing buffers on TfLiteEvalTensor this method can be dropped.
1003 return internal::InitializeTfLiteTensorFromFlatbuffer(
1004 memory_allocator_, allocate_temp, *subgraph->tensors()->Get(tensor_index),
1005 model->buffers(), error_reporter_, tensor);
1006 }
1007
error_reporter() const1008 ErrorReporter* MicroAllocator::error_reporter() const {
1009 return error_reporter_;
1010 }
1011
GetSubGraphFromModel(const Model * model)1012 const SubGraph* MicroAllocator::GetSubGraphFromModel(const Model* model) {
1013 auto* subgraphs = model->subgraphs();
1014 if (subgraphs->size() != 1) {
1015 TF_LITE_REPORT_ERROR(error_reporter_,
1016 "Only 1 subgraph is currently supported.\n");
1017 return nullptr;
1018 }
1019 return (*subgraphs)[0];
1020 }
1021
CommitStaticMemoryPlan(const Model * model,const SubGraph * subgraph,TfLiteEvalTensor * eval_tensors,ScratchBufferHandle * scratch_buffer_handles)1022 TfLiteStatus MicroAllocator::CommitStaticMemoryPlan(
1023 const Model* model, const SubGraph* subgraph,
1024 TfLiteEvalTensor* eval_tensors,
1025 ScratchBufferHandle* scratch_buffer_handles) {
1026 size_t head_usage = 0;
1027 // Create static memory plan
1028 // 1. Calculate AllocationInfo to know the lifetime of each tensor/buffer.
1029 // 2. Add them into the planner (such as the GreedyMemoryPlanner).
1030 // 3. Static memory planning using the planner.
1031 // 4. Set tensor/buffer pointers based on the offsets from the previous step.
1032 //
1033 // Note that AllocationInfo is only needed for creating the plan. It will be
1034 // allocated from the temp section and cleaned up at the bottom of this
1035 // function.
1036
1037 size_t allocation_info_count =
1038 subgraph->tensors()->size() + scratch_buffer_request_count_;
1039 size_t bytes = sizeof(AllocationInfo) * allocation_info_count;
1040
1041 // Allocate an array of AllocationInfo structs from the temp section. This
1042 // struct will be used by AllocationInfoBuilder to find buffer usage.
1043 AllocationInfo* allocation_info = reinterpret_cast<AllocationInfo*>(
1044 memory_allocator_->AllocateTemp(bytes, alignof(AllocationInfo)));
1045 if (allocation_info == nullptr) {
1046 TF_LITE_REPORT_ERROR(
1047 error_reporter_,
1048 "Failed to allocate memory for allocation_info, %d bytes required",
1049 bytes);
1050 return kTfLiteError;
1051 }
1052
1053 // Use the AllocationInfoBuilder class to help determine where buffers are
1054 // used in the subgraph.
1055 AllocationInfoBuilder builder(allocation_info, subgraph->tensors()->size(),
1056 scratch_buffer_request_count_, error_reporter_);
1057
1058 const int32_t* offline_planner_offsets = nullptr;
1059 TF_LITE_ENSURE_STATUS(
1060 builder.GetOfflinePlannedOffsets(model, &offline_planner_offsets));
1061 TF_LITE_ENSURE_STATUS(
1062 builder.AddTensors(subgraph, offline_planner_offsets, eval_tensors));
1063
1064 internal::ScratchBufferRequest* scratch_buffer_requests =
1065 GetScratchBufferRequests();
1066
1067 TF_LITE_ENSURE_STATUS(builder.AddScratchBuffers(scratch_buffer_requests,
1068 scratch_buffer_handles));
1069
1070 // Remaining arena size that memory planner can use for calculating offsets.
1071 size_t remaining_arena_size =
1072 memory_allocator_->GetAvailableMemory(kBufferAlignment);
1073 uint8_t* planner_arena =
1074 memory_allocator_->AllocateTemp(remaining_arena_size, kBufferAlignment);
1075 TF_LITE_ENSURE(error_reporter_, planner_arena != nullptr);
1076 GreedyMemoryPlanner planner(planner_arena, remaining_arena_size);
1077 TF_LITE_ENSURE_STATUS(CreatePlan(error_reporter_, &planner, allocation_info,
1078 allocation_info_count));
1079
1080 // Reset all temp allocations used above:
1081 memory_allocator_->ResetTempAllocations();
1082
1083 size_t actual_available_arena_size =
1084 memory_allocator_->GetAvailableMemory(kBufferAlignment);
1085
1086 // Make sure we have enough arena size.
1087 if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
1088 TF_LITE_REPORT_ERROR(
1089 error_reporter_,
1090 "Arena size is too small for all buffers. Needed %u but only "
1091 "%u was available.",
1092 planner.GetMaximumMemorySize(), actual_available_arena_size);
1093 return kTfLiteError;
1094 }
1095 // Commit the plan.
1096 TF_LITE_ENSURE_STATUS(CommitPlan(error_reporter_, &planner,
1097 memory_allocator_->GetHeadBuffer(),
1098 allocation_info, allocation_info_count));
1099 head_usage = planner.GetMaximumMemorySize();
1100
1101 // The head is used to store memory plans for one model at a time during the
1102 // model preparation stage, and is re-purposed to store scratch buffer handles
1103 // during model invocation. The head must be as large as the greater of the
1104 // largest model memory plan's size and the total space required for all
1105 // scratch buffer handles.
1106 if (max_head_buffer_usage_ < head_usage) {
1107 max_head_buffer_usage_ = head_usage;
1108 }
1109
1110 // The head is used for storing scratch buffer allocations before finalizing a
1111 // memory plan in this function. Ensure that the head is set to the largest
1112 // memory plan sent through the allocator:
1113 TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
1114 max_head_buffer_usage_, kBufferAlignment));
1115 return kTfLiteOk;
1116 }
1117
AllocateScratchBufferHandles(ScratchBufferHandle ** scratch_buffer_handles,size_t handle_count)1118 TfLiteStatus MicroAllocator::AllocateScratchBufferHandles(
1119 ScratchBufferHandle** scratch_buffer_handles, size_t handle_count) {
1120 TFLITE_DCHECK(scratch_buffer_handles != nullptr);
1121
1122 if (scratch_buffer_request_count_ == 0) {
1123 // No scratch buffer requests were requested during model allocation.
1124 return kTfLiteOk;
1125 }
1126
1127 // Allocate a consecutive block of memory store the scratch buffer handles.
1128 // This alignment ensures quick lookup during inference time for the model:
1129 *scratch_buffer_handles = reinterpret_cast<ScratchBufferHandle*>(
1130 memory_allocator_->AllocateFromTail(
1131 sizeof(ScratchBufferHandle) * handle_count,
1132 alignof(ScratchBufferHandle)));
1133
1134 return kTfLiteOk;
1135 }
1136
InitScratchBufferData()1137 TfLiteStatus MicroAllocator::InitScratchBufferData() {
1138 // A model is preparing to allocate resources, ensure that scratch buffer
1139 // request counter is cleared:
1140 scratch_buffer_request_count_ = 0;
1141
1142 // All requests will be stored in the head section. Each kernel is allowed at
1143 // most kMaxScratchBuffersPerOp requests. Adjust the head to reserve at most
1144 // that many requests to begin:
1145 TF_LITE_ENSURE_STATUS(memory_allocator_->SetHeadBufferSize(
1146 sizeof(internal::ScratchBufferRequest) * kMaxScratchBuffersPerOp,
1147 alignof(internal::ScratchBufferRequest)));
1148
1149 return kTfLiteOk;
1150 }
1151
GetScratchBufferRequests()1152 internal::ScratchBufferRequest* MicroAllocator::GetScratchBufferRequests() {
1153 return reinterpret_cast<internal::ScratchBufferRequest*>(
1154 AlignPointerUp(memory_allocator_->GetHeadBuffer(),
1155 alignof(internal::ScratchBufferRequest)));
1156 }
1157
1158 } // namespace tflite
1159