1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
17
18 namespace tflite {
19
20 // Simple stable in-place sort function. Not time-efficient for large arrays.
21 // Would normally be in an anonymous namespace to keep it private, but we want
22 // to be able to test it externally.
ReverseSortInPlace(int * values,int * ids,int size)23 void ReverseSortInPlace(int* values, int* ids, int size) {
24 bool any_swapped;
25 do {
26 any_swapped = false;
27 for (int i = 1; i < size; ++i) {
28 if (values[i - 1] < values[i]) {
29 const int value_temp = values[i - 1];
30 values[i - 1] = values[i];
31 values[i] = value_temp;
32 const int id_temp = ids[i - 1];
33 ids[i - 1] = ids[i];
34 ids[i] = id_temp;
35 any_swapped = true;
36 }
37 }
38 } while (any_swapped);
39 }
40
GreedyMemoryPlanner(unsigned char * scratch_buffer,int scratch_buffer_size)41 GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
42 int scratch_buffer_size)
43 : buffer_count_(0), need_to_calculate_offsets_(true) {
44 // Allocate the arrays we need within the scratch buffer arena.
45 max_buffer_count_ = scratch_buffer_size / per_buffer_size();
46
47 unsigned char* next_free = scratch_buffer;
48 requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
49 next_free += sizeof(BufferRequirements) * max_buffer_count_;
50
51 buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
52 next_free += sizeof(int) * max_buffer_count_;
53
54 buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
55 next_free += sizeof(int) * max_buffer_count_;
56
57 buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
58 next_free += sizeof(ListEntry) * max_buffer_count_;
59
60 buffer_offsets_ = reinterpret_cast<int*>(next_free);
61 }
62
~GreedyMemoryPlanner()63 GreedyMemoryPlanner::~GreedyMemoryPlanner() {
64 // We don't own the scratch buffer, so don't deallocate anything.
65 }
66
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used)67 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
68 tflite::ErrorReporter* error_reporter, int size, int first_time_used,
69 int last_time_used) {
70 if (buffer_count_ >= max_buffer_count_) {
71 TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
72 max_buffer_count_);
73 return kTfLiteError;
74 }
75 BufferRequirements* current = &requirements_[buffer_count_];
76 current->size = size;
77 current->first_time_used = first_time_used;
78 current->last_time_used = last_time_used;
79 current->offline_offset = kOnlinePlannedBuffer;
80 ++buffer_count_;
81 need_to_calculate_offsets_ = true;
82 return kTfLiteOk;
83 }
84
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used,int offline_offset)85 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
86 tflite::ErrorReporter* error_reporter, int size, int first_time_used,
87 int last_time_used, int offline_offset) {
88 BufferRequirements* current = &requirements_[buffer_count_];
89 if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
90 kTfLiteOk) {
91 return kTfLiteError;
92 }
93 current->offline_offset = offline_offset;
94 return kTfLiteOk;
95 }
96
DoesEntryOverlapInTime(const GreedyMemoryPlanner::ListEntry * entry,const int first_time_used,const int last_time_used) const97 bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
98 const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
99 const int last_time_used) const {
100 const BufferRequirements* entry_requirements =
101 &requirements_[entry->requirements_index];
102 if (entry_requirements->first_time_used > last_time_used) {
103 return false;
104 }
105 if (first_time_used > entry_requirements->last_time_used) {
106 return false;
107 }
108 return true;
109 }
110
111 GreedyMemoryPlanner::ListEntry*
NextSimultaneouslyActiveBuffer(const GreedyMemoryPlanner::ListEntry * start,const int first_time_used,const int last_time_used)112 GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
113 const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
114 const int last_time_used) {
115 ListEntry* result = nullptr;
116 ListEntry* candidate_next_entry;
117 if (start == nullptr) {
118 candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
119 } else {
120 if (start->next_entry_index == -1) {
121 return nullptr;
122 }
123 candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
124 }
125 do {
126 if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
127 last_time_used)) {
128 result = candidate_next_entry;
129 break;
130 }
131 if (candidate_next_entry->next_entry_index == -1) {
132 break;
133 }
134 candidate_next_entry =
135 &buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
136 } while (true);
137 return result;
138 }
139
CalculateOffsetsIfNeeded()140 void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
141 if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
142 return;
143 }
144 need_to_calculate_offsets_ = false;
145
146 // Start off by ordering the buffers in descending order of size.
147 // This helps find a more compact layout. Intuitively, you can think
148 // about putting the large buffers in place first, and then the
149 // smaller buffers can fit in the gaps, rather than fragmenting the
150 // gaps with small buffers at the beginning. Add offline planned offsets
151 // first in the list, since they have a predetermined offset.
152 int idx_from_tail = buffer_count_;
153 int idx_from_head = 0;
154 for (int i = 0; i < buffer_count_; ++i) {
155 if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
156 idx_from_tail--;
157 buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
158 buffer_ids_sorted_[idx_from_tail] = i;
159 buffer_offsets_[i] = -1;
160 } else {
161 buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
162 buffer_ids_sorted_[idx_from_head] = i;
163 buffer_offsets_[i] = requirements_[i].offline_offset;
164 idx_from_head++;
165 }
166 }
167
168 // This sorting algorithm is naive, and may end up taking a very long time
169 // with hundreds of buffers. Do not sort the offline planned offsets.
170 ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
171 &buffer_ids_sorted_[idx_from_head],
172 buffer_count_ - idx_from_head);
173
174 // Initialize the first entry to the first buffer in
175 // buffer_ids_sorted_.
176 // - If there are no offline planned offsets, the largest buffer will be
177 // first, and the buffers will be handled in size order.
178 // - If offline offsets are present, these will be handled first in order
179 // for the greedy algorithm to utilized gaps in the offline plan.
180 first_entry_index_ = 0;
181 next_free_entry_ = 1;
182 ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
183 first_entry->next_entry_index = -1; // to mark the entry as end of list
184 int buffer_id = buffer_ids_sorted_[0];
185 first_entry->requirements_index = buffer_id;
186 if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
187 buffer_offsets_[buffer_id] = 0;
188 }
189 first_entry->offset = buffer_offsets_[buffer_id];
190
191 // Work through the rest of the buffers to find a good gap to place each one.
192 for (int i = 1; i < buffer_count_; ++i) {
193 // The id is the order the buffer was originally added by the client.
194 buffer_id = buffer_ids_sorted_[i];
195 // Look at what size and time range the buffer needs to be active.
196 BufferRequirements* wanted_requirements = &requirements_[buffer_id];
197 const int wanted_size = wanted_requirements->size;
198 const int wanted_first_time_used = wanted_requirements->first_time_used;
199 const int wanted_last_time_used = wanted_requirements->last_time_used;
200
201 // Find the first buffer that's active in our time range. All placed
202 // buffers are stored in the order of their starting position in the arena
203 // so that it's easy to find the next buffer in memory, and so the gap.
204 // The candidate_entry variable holds the buffer that we're considering
205 // placing the current buffer after.
206
207 int candidate_offset = 0;
208 // Loop through the offset-ordered list of buffers, looking for gaps.
209 if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
210 ListEntry* prior_entry = nullptr;
211 while (true) {
212 // Find out what the next active buffer is.
213 ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
214 prior_entry, wanted_first_time_used, wanted_last_time_used);
215
216 if (prior_entry) {
217 BufferRequirements* candidate_requirements =
218 &requirements_[prior_entry->requirements_index];
219 const int prior_entry_offset =
220 prior_entry->offset + candidate_requirements->size;
221 if (prior_entry_offset > candidate_offset) {
222 candidate_offset = prior_entry_offset;
223 }
224 }
225 if (next_entry == nullptr) {
226 // We're at the end of the list, so we can always append the buffer
227 // here.
228 break;
229 }
230 // Find out how much space there is between us and the next buffer.
231 const int gap = next_entry->offset - candidate_offset;
232 if (gap >= wanted_size) {
233 // This entry has a big enough gap between it and the next, so
234 // use it!
235 break;
236 }
237 // The gap wasn't big enough, so move on to another candidate.
238 prior_entry = next_entry;
239 }
240 } else {
241 // Offline planned offset are to be considered constant
242 candidate_offset = wanted_requirements->offline_offset;
243 }
244 // At this point, we've either found a gap (possibly at the end of the
245 // list) and want to place the buffer there, or there are no other active
246 // buffers in this time range and so we can put it at offset zero.
247 // Record the buffer's offset in our plan.
248 buffer_offsets_[buffer_id] = candidate_offset;
249 // Add the newly-placed buffer to our offset-ordered list, so that
250 // subsequent passes can fit in their buffers around it.
251 ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
252 new_entry->offset = candidate_offset;
253 new_entry->requirements_index = buffer_id;
254 const int new_entry_index = next_free_entry_;
255 ++next_free_entry_;
256
257 if (first_entry->offset > candidate_offset) {
258 // The new entry offset is smaller than the first entry offset =>
259 // replace the first entry
260 first_entry = new_entry;
261 first_entry->next_entry_index = first_entry_index_;
262 first_entry_index_ = new_entry_index;
263 } else {
264 ListEntry* current_entry = first_entry;
265 // Make sure that we insert the buffer at the correct place in the
266 // buffer-offset-ordered list
267 while (true) {
268 const int next_entry_index = current_entry->next_entry_index;
269 if (next_entry_index == -1) {
270 // We're at the end of the list, so just add the new entry here.
271 current_entry->next_entry_index = new_entry_index;
272 new_entry->next_entry_index = -1;
273 break;
274 }
275 // not at the end of the list -> take a look at next entry
276 ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
277 if (next_entry->offset > candidate_offset) {
278 // We're at the right spot to do an insertion and retain the sorting
279 // order, so place the new entry here.
280 new_entry->next_entry_index = current_entry->next_entry_index;
281 current_entry->next_entry_index = new_entry_index;
282 break;
283 }
284 current_entry = next_entry;
285 }
286 }
287 }
288 }
289
GetMaximumMemorySize()290 size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
291 CalculateOffsetsIfNeeded();
292 if (buffer_count_ == 0) {
293 return 0;
294 }
295 ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
296 size_t max_size = 0;
297 while (entry) {
298 BufferRequirements* requirements =
299 &requirements_[entry->requirements_index];
300 // TODO(b/148246793): Update all size and offset variables types from
301 // int to size_t
302 const size_t current_size = entry->offset + requirements->size;
303 if (current_size > max_size) {
304 max_size = current_size;
305 }
306 if (entry->next_entry_index == -1) {
307 break;
308 }
309 entry = &buffers_sorted_by_offset_[entry->next_entry_index];
310 }
311 return max_size;
312 }
313
PrintMemoryPlan(ErrorReporter * error_reporter)314 void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
315 CalculateOffsetsIfNeeded();
316
317 for (int i = 0; i < buffer_count_; ++i) {
318 TF_LITE_REPORT_ERROR(
319 error_reporter,
320 "Planner buffer ID: %d, calculated offset: %d, size required: %d, "
321 "first_time_created: %d, "
322 "last_time_used: %d",
323 i, buffer_offsets_[i], requirements_[i].size,
324 requirements_[i].first_time_used, requirements_[i].last_time_used);
325 }
326
327 constexpr int kLineWidth = 80;
328 int max_size = kLineWidth;
329 int max_time = 0;
330 for (int i = 0; i < buffer_count_; ++i) {
331 BufferRequirements* requirements = &requirements_[i];
332 const int offset = buffer_offsets_[i];
333 const int last_time_used = requirements->last_time_used;
334 const int size = offset + requirements->size;
335 if (size > max_size) {
336 max_size = size;
337 }
338 if (last_time_used > max_time) {
339 max_time = last_time_used;
340 }
341 }
342
343 char line[kLineWidth + 1];
344 for (int t = 0; t <= max_time; ++t) {
345 for (int c = 0; c < kLineWidth; ++c) {
346 line[c] = '.';
347 }
348 for (int i = 0; i < buffer_count_; ++i) {
349 BufferRequirements* requirements = &requirements_[i];
350 if ((t < requirements->first_time_used) ||
351 (t > requirements->last_time_used)) {
352 continue;
353 }
354 const int offset = buffer_offsets_[i];
355 if (offset == -1) {
356 continue;
357 }
358 const int size = requirements->size;
359 const int line_start = (offset * kLineWidth) / max_size;
360 const int line_end = ((offset + size) * kLineWidth) / max_size;
361 for (int n = line_start; n < line_end; ++n) {
362 if (line[n] == '.') {
363 char display;
364 if (i < 10) {
365 display = '0' + i;
366 } else if (i < 36) {
367 display = 'a' + (i - 10);
368 } else if (i < 62) {
369 display = 'A' + (i - 36);
370 } else {
371 display = '*';
372 }
373 line[n] = display;
374 } else {
375 line[n] = '!';
376 }
377 }
378 }
379 line[kLineWidth] = 0;
380 TF_LITE_REPORT_ERROR(error_reporter, "%s", (const char*)line);
381 }
382 }
383
GetBufferCount()384 int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
385
GetOffsetForBuffer(tflite::ErrorReporter * error_reporter,int buffer_index,int * offset)386 TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
387 tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
388 CalculateOffsetsIfNeeded();
389 if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
390 TF_LITE_REPORT_ERROR(error_reporter,
391 "buffer index %d is outside range 0 to %d",
392 buffer_index, buffer_count_);
393 return kTfLiteError;
394 }
395 *offset = buffer_offsets_[buffer_index];
396 return kTfLiteOk;
397 }
398
DoAnyBuffersOverlap(ErrorReporter * error_reporter)399 bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
400 CalculateOffsetsIfNeeded();
401 bool were_overlaps_found = false;
402 for (int i = 0; i < buffer_count_; ++i) {
403 BufferRequirements* a_requirements = &requirements_[i];
404 const int a_start_offset = buffer_offsets_[i];
405 const int a_first_time_used = a_requirements->first_time_used;
406 const int a_last_time_used = a_requirements->last_time_used;
407 const int a_end_offset = a_start_offset + a_requirements->size;
408 for (int j = 0; j < buffer_count_; ++j) {
409 if (i == j) {
410 continue;
411 }
412 BufferRequirements* b_requirements = &requirements_[j];
413 const int b_start_offset = buffer_offsets_[j];
414 const int b_first_time_used = b_requirements->first_time_used;
415 const int b_last_time_used = b_requirements->last_time_used;
416 const int b_end_offset = b_start_offset + b_requirements->size;
417 if ((a_first_time_used > b_last_time_used) ||
418 (b_first_time_used > a_last_time_used)) {
419 // Buffers don't overlap in time.
420 continue;
421 }
422 if ((a_start_offset >= b_end_offset) ||
423 (b_start_offset >= a_end_offset)) {
424 // No overlap in memory.
425 continue;
426 }
427 were_overlaps_found = true;
428 TF_LITE_REPORT_ERROR(
429 error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
430 i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
431 j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
432 }
433 }
434 return were_overlaps_found;
435 }
436
437 } // namespace tflite
438