• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
16 #define TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
17 
18 #include <cstddef>
19 #include <cstdint>
20 
21 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
22 #include "tensorflow/lite/c/common.h"
23 #include "tensorflow/lite/core/api/error_reporter.h"
24 #include "tensorflow/lite/micro/compatibility.h"
25 #include "tensorflow/lite/micro/micro_op_resolver.h"
26 #include "tensorflow/lite/micro/simple_memory_allocator.h"
27 #include "tensorflow/lite/schema/schema_generated.h"
28 
29 namespace tflite {
30 
31 namespace internal {
32 
33 // Sets up all of the data structure members for a TfLiteTensor based on the
34 // contents of a serialized tensor in the flatbuffer.
35 // TODO(b/162311891): Drop this method when the interpreter has an API for
36 // returning buffers on TfLiteEvalTensor.
37 TfLiteStatus InitializeTfLiteTensorFromFlatbuffer(
38     SimpleMemoryAllocator* allocator, bool allocate_temp,
39     const tflite::Tensor& flatbuffer_tensor,
40     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
41     ErrorReporter* error_reporter, TfLiteTensor* result);
42 
43 // Holds placeholder information for a scratch buffer request from a kernel.
44 // This struct is only used during the model prepare stage. Each request from a
45 // kernel is stored in the head section. During the prepare stage, the head
46 // section will at least hold kMaxScratchBuffersPerOp number of requests plus
47 // any requests from previous kernel requests.
48 //
49 // When the memory plan is finalized, these structs are no longer used in favor
50 // of a sequential, array of ScratchBufferHandle allocations in the tail
51 // section. These allocations are indexed by the request API defined in the
52 // TfLiteContext struct.
53 typedef struct {
54   // Number of bytes required by the buffer. The actual allocated size might be
55   // greater than `bytes` due to buffer alignment.
56   size_t bytes;
57   // Node where the buffer is allocated for. This provides useful information to
58   // determine the lifetime of the buffer. In AllocationInfo, this buffer will
59   // have `before` = node_idx and `after` = node_idx.
60   int node_idx;
61 } ScratchBufferRequest;
62 
63 }  // namespace internal
64 
65 typedef struct {
66   TfLiteNode node;
67   const TfLiteRegistration* registration;
68 } NodeAndRegistration;
69 
70 // Holds a pointer to a buffer for a scratch buffer requested by a kernel during
71 // the model prepare stage. This struct is allocated in-place and allows for
72 // quick pointer-indexed lookup for speed during model inference.
73 typedef struct {
74   // Pointer to location of the scratch buffer:
75   uint8_t* data;
76 } ScratchBufferHandle;
77 
78 // Allocator responsible for allocating memory for all intermediate tensors
79 // necessary to invoke a model.
80 //
81 // The lifetime of the model, tensor arena and error reporter must be at
82 // least as long as that of the allocator object, since the allocator needs
83 // them to be accessible during its entire lifetime.
84 //
85 // The MicroAllocator simply plans out additional allocations that are required
86 // to standup a model for inference in TF Micro. This class currently relies on
87 // an additional allocator - SimpleMemoryAllocator - for all allocations from an
88 // arena. These allocations are divided into head (non-persistent) and tail
89 // (persistent) regions:
90 //
91 // Memory layout to help understand how it works
92 // This information could change in the future version.
93 // ************** .memory_allocator->GetBuffer()
94 // Tensors/Scratch buffers (head)
95 // ************** .head_watermark
96 // unused memory
97 // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
98 //                                               - ->GetDataSize()
99 // persistent area (tail)
100 // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize()
101 class MicroAllocator {
102  public:
103   // Creates a MicroAllocator instance from a given tensor arena. This arena
104   // will be managed by the created instance.
105   // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16
106   // bytes aligned, otherwise some head room will be wasted.
107   // TODO(b/157615197): Cleanup constructor + factory usage.
108   static MicroAllocator* Create(uint8_t* tensor_arena, size_t arena_size,
109                                 ErrorReporter* error_reporter);
110 
111   // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator
112   // intance. This allocator instance will use the SimpleMemoryAllocator
113   // instance to manage allocations internally.
114   static MicroAllocator* Create(SimpleMemoryAllocator* memory_allocator,
115                                 ErrorReporter* error_reporter);
116 
117   // Begin allocating internal resources required for model inference.
118   // This method will run through the flatbuffer data supplied in the model to
119   // properly allocate tensor, node, and op registration data. This method is
120   // expected to be followed with a call to FinishModelAllocation() before
121   // resuming allocation with another model. All persistent tensor buffers are
122   // stored in the out-param eval_tensors. This value is allocated from the
123   // persistent memory arena and will be used to host runtime tensor buffers.
124   TfLiteStatus StartModelAllocation(
125       const Model* model, const MicroOpResolver& op_resolver,
126       NodeAndRegistration** node_and_registrations,
127       TfLiteEvalTensor** eval_tensors);
128 
129   // Finish allocating internal resources required for model inference.
130   // This method will plan non-persistent buffers and commit a memory plan to
131   // the 'head' section of the memory arena. All variable tensor data will also
132   // be allocated. This method should be called after assigning model resources
133   // in StartModelAllocation(). The eval_tensors pointer should be the value
134   // passed into this class during StartModelAllocation(). Scratch buffer
135   // handles are stored in the out-param `scratch_buffer_handles`. This value
136   // will be used in `GetScratchBuffer` call to retrieve scratch buffers.
137   TfLiteStatus FinishModelAllocation(
138       const Model* model, TfLiteEvalTensor* eval_tensors,
139       ScratchBufferHandle** scratch_buffer_handles);
140 
141   // Allocates a TfLiteTensor struct and populates the returned value with
142   // properties from the model flatbuffer. This struct is allocated from
143   // persistent arena memory is only guaranteed for the lifetime of the
144   // application. The eval_tensors pointer should be the value passed into this
145   // class during StartModelAllocation() and contains the source-of-truth for
146   // buffers.
147   virtual TfLiteTensor* AllocatePersistentTfLiteTensor(
148       const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
149 
150   // Allocates a TfLiteTensor struct and populates the returned value with
151   // properties from the model flatbuffer. This struct is allocated from
152   // temporary arena memory is only guaranteed until a call is made to
153   // ResetTempAllocations(). The eval_tensors pointer should be the value passed
154   // into this class during StartModelAllocation() and contains the
155   // source-of-truth for buffers.
156   virtual TfLiteTensor* AllocateTempTfLiteTensor(const Model* model,
157                                                  TfLiteEvalTensor* eval_tensors,
158                                                  int tensor_index);
159 
160   // Resets all temporary allocations. This method should be called after a
161   // chain of temp allocations (e.g. chain of TfLiteTensor objects via
162   // AllocateTfLiteTensor()).
163   virtual void ResetTempAllocations();
164 
165   // Allocates persistent buffer which has the same life time as the allocator.
166   // The memory is immediately available and is allocated from the tail of the
167   // arena.
168   virtual void* AllocatePersistentBuffer(size_t bytes);
169 
170   // Register a scratch buffer of size `bytes` for Node with `node_id`.
171   // This method only requests a buffer with a given size to be used after a
172   // model has finished allocation via FinishModelAllocation(). All requested
173   // buffers will be accessible by the out-param in that method.
174   TfLiteStatus RequestScratchBufferInArena(size_t bytes, int* buffer_idx);
175 
176   // Finish allocating a specific NodeAndRegistration prepare block (kernel
177   // entry for a model) with a given node ID. This call ensures that any scratch
178   // buffer requests and temporary allocations are handled and ready for the
179   // next node prepare block.
180   TfLiteStatus FinishPrepareNodeAllocations(int node_id);
181 
182   // Returns the arena usage in bytes, only available after
183   // `FinishModelAllocation`. Otherwise, it will return 0.
184   size_t used_bytes() const;
185 
186  protected:
187   MicroAllocator(SimpleMemoryAllocator* memory_allocator,
188                  ErrorReporter* error_reporter);
189   virtual ~MicroAllocator();
190 
191   // Allocates an array in the arena to hold pointers to the node and
192   // registration pointers required to represent the inference graph of the
193   // model.
194   virtual TfLiteStatus AllocateNodeAndRegistrations(
195       const Model* model, NodeAndRegistration** node_and_registrations);
196 
197   // Populates node and registration pointers representing the inference graph
198   // of the model from values inside the flatbuffer (loaded from the TfLiteModel
199   // instance). Persistent data (e.g. operator data) is allocated from the
200   // arena.
201   virtual TfLiteStatus PrepareNodeAndRegistrationDataFromFlatbuffer(
202       const Model* model, const MicroOpResolver& op_resolver,
203       NodeAndRegistration* node_and_registrations);
204 
205   // Allocates the list of persistent TfLiteEvalTensors that are used for the
206   // "eval" phase of model inference. These structs will be the source of truth
207   // for all tensor buffers. Allocation results are stored in the out-param
208   // eval_tensors.
209   virtual TfLiteStatus AllocateTfLiteEvalTensors(
210       const Model* model, TfLiteEvalTensor** eval_tensors);
211 
212   // Allocates persistent tensor buffers for variable tensors in the subgraph.
213   virtual TfLiteStatus AllocateVariables(const SubGraph* subgraph,
214                                          TfLiteEvalTensor* eval_tensors);
215 
216   // Allocate and return a persistent TfLiteTensor.
217   // TODO(b/162311891): Drop this method when the interpreter has an API for
218   // accessing TfLiteEvalTensor structs.
219   virtual TfLiteTensor* AllocatePersistentTfLiteTensorInternal(
220       const Model* model, TfLiteEvalTensor* eval_tensors, int tensor_index);
221 
222   // Populates a TfLiteTensor struct with data from the model flatbuffer. Any
223   // quantization data is allocated from either the tail (persistent) or temp
224   // sections of the arena based on the allocation flag.
225   virtual TfLiteStatus PopulateTfLiteTensorFromFlatbuffer(
226       const Model* model, const SubGraph* subgraph, TfLiteTensor* tensor,
227       int tensor_index, bool allocate_temp);
228 
229   ErrorReporter* error_reporter() const;
230 
231   // Returns the first subgraph from the model.
232   const SubGraph* GetSubGraphFromModel(const Model* model);
233 
234  private:
235   // Commits a memory plan for all non-persistent buffer allocations in the
236   // 'head' section of the memory arena. The eval_tensors pointer is the list of
237   // pre-allocated TfLiteEvalTensor structs that will point to the buffers that
238   // will be allocated into the head section in this function call. The
239   // scratch_buffer_handles pointer is the array of pre-allocated
240   // ScratchBufferHandle structs that will point to allocated buffers also in
241   // the head section.
242   virtual TfLiteStatus CommitStaticMemoryPlan(
243       const Model* model, const SubGraph* subgraph,
244       TfLiteEvalTensor* eval_tensors,
245       ScratchBufferHandle* scratch_buffer_handles);
246 
247   // Allocates an array of ScratchBufferHandle structs in the tail section for a
248   // given number of handles.
249   virtual TfLiteStatus AllocateScratchBufferHandles(
250       ScratchBufferHandle** scratch_buffer_handles, size_t handle_count);
251 
252   // Clears all internal scratch buffer request counts and resets the head to
253   // prepare for kernels to request scratch buffer data when a model is
254   // preparing.
255   TfLiteStatus InitScratchBufferData();
256 
257   // Returns the pointer for the array of ScratchBufferRequest allocations in
258   // the head section.
259   internal::ScratchBufferRequest* GetScratchBufferRequests();
260 
261   // A simple memory allocator that always allocate from the arena tail or head.
262   SimpleMemoryAllocator* memory_allocator_;
263 
264   ErrorReporter* error_reporter_;
265   bool model_is_allocating_;
266 
267   // Holds the number of ScratchBufferRequest instances stored in the head
268   // section when a model is allocating.
269   size_t scratch_buffer_request_count_ = 0;
270 
271   // Holds the byte length of the memory plan with the largest head usage. Used
272   // to ensure that multi-tenant allocations can share the head for buffers.
273   size_t max_head_buffer_usage_ = 0;
274 
275   TF_LITE_REMOVE_VIRTUAL_DELETE
276 };
277 
278 }  // namespace tflite
279 #endif  // TENSORFLOW_LITE_MICRO_MICRO_ALLOCATOR_H_
280