• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/micro/memory_planner/greedy_memory_planner.h"
17 
18 namespace tflite {
19 
20 // Simple stable in-place sort function. Not time-efficient for large arrays.
21 // Would normally be in an anonymous namespace to keep it private, but we want
22 // to be able to test it externally.
ReverseSortInPlace(int * values,int * ids,int size)23 void ReverseSortInPlace(int* values, int* ids, int size) {
24   bool any_swapped;
25   do {
26     any_swapped = false;
27     for (int i = 1; i < size; ++i) {
28       if (values[i - 1] < values[i]) {
29         const int value_temp = values[i - 1];
30         values[i - 1] = values[i];
31         values[i] = value_temp;
32         const int id_temp = ids[i - 1];
33         ids[i - 1] = ids[i];
34         ids[i] = id_temp;
35         any_swapped = true;
36       }
37     }
38   } while (any_swapped);
39 }
40 
GreedyMemoryPlanner(unsigned char * scratch_buffer,int scratch_buffer_size)41 GreedyMemoryPlanner::GreedyMemoryPlanner(unsigned char* scratch_buffer,
42                                          int scratch_buffer_size)
43     : buffer_count_(0), need_to_calculate_offsets_(true) {
44   // Allocate the arrays we need within the scratch buffer arena.
45   max_buffer_count_ = scratch_buffer_size / per_buffer_size();
46 
47   unsigned char* next_free = scratch_buffer;
48   requirements_ = reinterpret_cast<BufferRequirements*>(next_free);
49   next_free += sizeof(BufferRequirements) * max_buffer_count_;
50 
51   buffer_sizes_sorted_ = reinterpret_cast<int*>(next_free);
52   next_free += sizeof(int) * max_buffer_count_;
53 
54   buffer_ids_sorted_ = reinterpret_cast<int*>(next_free);
55   next_free += sizeof(int) * max_buffer_count_;
56 
57   buffers_sorted_by_offset_ = reinterpret_cast<ListEntry*>(next_free);
58   next_free += sizeof(ListEntry) * max_buffer_count_;
59 
60   buffer_offsets_ = reinterpret_cast<int*>(next_free);
61 }
62 
~GreedyMemoryPlanner()63 GreedyMemoryPlanner::~GreedyMemoryPlanner() {
64   // We don't own the scratch buffer, so don't deallocate anything.
65 }
66 
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used)67 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
68     tflite::ErrorReporter* error_reporter, int size, int first_time_used,
69     int last_time_used) {
70   if (buffer_count_ >= max_buffer_count_) {
71     TF_LITE_REPORT_ERROR(error_reporter, "Too many buffers (max is %d)",
72                          max_buffer_count_);
73     return kTfLiteError;
74   }
75   BufferRequirements* current = &requirements_[buffer_count_];
76   current->size = size;
77   current->first_time_used = first_time_used;
78   current->last_time_used = last_time_used;
79   current->offline_offset = kOnlinePlannedBuffer;
80   ++buffer_count_;
81   need_to_calculate_offsets_ = true;
82   return kTfLiteOk;
83 }
84 
AddBuffer(tflite::ErrorReporter * error_reporter,int size,int first_time_used,int last_time_used,int offline_offset)85 TfLiteStatus GreedyMemoryPlanner::AddBuffer(
86     tflite::ErrorReporter* error_reporter, int size, int first_time_used,
87     int last_time_used, int offline_offset) {
88   BufferRequirements* current = &requirements_[buffer_count_];
89   if (AddBuffer(error_reporter, size, first_time_used, last_time_used) !=
90       kTfLiteOk) {
91     return kTfLiteError;
92   }
93   current->offline_offset = offline_offset;
94   return kTfLiteOk;
95 }
96 
DoesEntryOverlapInTime(const GreedyMemoryPlanner::ListEntry * entry,const int first_time_used,const int last_time_used) const97 bool GreedyMemoryPlanner::DoesEntryOverlapInTime(
98     const GreedyMemoryPlanner::ListEntry* entry, const int first_time_used,
99     const int last_time_used) const {
100   const BufferRequirements* entry_requirements =
101       &requirements_[entry->requirements_index];
102   if (entry_requirements->first_time_used > last_time_used) {
103     return false;
104   }
105   if (first_time_used > entry_requirements->last_time_used) {
106     return false;
107   }
108   return true;
109 }
110 
111 GreedyMemoryPlanner::ListEntry*
NextSimultaneouslyActiveBuffer(const GreedyMemoryPlanner::ListEntry * start,const int first_time_used,const int last_time_used)112 GreedyMemoryPlanner::NextSimultaneouslyActiveBuffer(
113     const GreedyMemoryPlanner::ListEntry* start, const int first_time_used,
114     const int last_time_used) {
115   ListEntry* result = nullptr;
116   ListEntry* candidate_next_entry;
117   if (start == nullptr) {
118     candidate_next_entry = &buffers_sorted_by_offset_[first_entry_index_];
119   } else {
120     if (start->next_entry_index == -1) {
121       return nullptr;
122     }
123     candidate_next_entry = &buffers_sorted_by_offset_[start->next_entry_index];
124   }
125   do {
126     if (DoesEntryOverlapInTime(candidate_next_entry, first_time_used,
127                                last_time_used)) {
128       result = candidate_next_entry;
129       break;
130     }
131     if (candidate_next_entry->next_entry_index == -1) {
132       break;
133     }
134     candidate_next_entry =
135         &buffers_sorted_by_offset_[candidate_next_entry->next_entry_index];
136   } while (true);
137   return result;
138 }
139 
CalculateOffsetsIfNeeded()140 void GreedyMemoryPlanner::CalculateOffsetsIfNeeded() {
141   if (!need_to_calculate_offsets_ || (buffer_count_ == 0)) {
142     return;
143   }
144   need_to_calculate_offsets_ = false;
145 
146   // Start off by ordering the buffers in descending order of size.
147   // This helps find a more compact layout. Intuitively, you can think
148   // about putting the large buffers in place first, and then the
149   // smaller buffers can fit in the gaps, rather than fragmenting the
150   // gaps with small buffers at the beginning. Add offline planned offsets
151   // first in the list, since they have a predetermined offset.
152   int idx_from_tail = buffer_count_;
153   int idx_from_head = 0;
154   for (int i = 0; i < buffer_count_; ++i) {
155     if (requirements_[i].offline_offset == kOnlinePlannedBuffer) {
156       idx_from_tail--;
157       buffer_sizes_sorted_[idx_from_tail] = requirements_[i].size;
158       buffer_ids_sorted_[idx_from_tail] = i;
159       buffer_offsets_[i] = -1;
160     } else {
161       buffer_sizes_sorted_[idx_from_head] = requirements_[i].size;
162       buffer_ids_sorted_[idx_from_head] = i;
163       buffer_offsets_[i] = requirements_[i].offline_offset;
164       idx_from_head++;
165     }
166   }
167 
168   // This sorting algorithm is naive, and may end up taking a very long time
169   // with hundreds of buffers. Do not sort the offline planned offsets.
170   ReverseSortInPlace(&buffer_sizes_sorted_[idx_from_head],
171                      &buffer_ids_sorted_[idx_from_head],
172                      buffer_count_ - idx_from_head);
173 
174   // Initialize the first entry to the first buffer in
175   // buffer_ids_sorted_.
176   //   - If there are no offline planned offsets, the largest buffer will be
177   //     first, and the buffers will be handled in size order.
178   //   - If offline offsets are present, these will be handled first in order
179   //     for the greedy algorithm to utilized gaps in the offline plan.
180   first_entry_index_ = 0;
181   next_free_entry_ = 1;
182   ListEntry* first_entry = &buffers_sorted_by_offset_[first_entry_index_];
183   first_entry->next_entry_index = -1;  // to mark the entry as end of list
184   int buffer_id = buffer_ids_sorted_[0];
185   first_entry->requirements_index = buffer_id;
186   if (requirements_[buffer_id].offline_offset == kOnlinePlannedBuffer) {
187     buffer_offsets_[buffer_id] = 0;
188   }
189   first_entry->offset = buffer_offsets_[buffer_id];
190 
191   // Work through the rest of the buffers to find a good gap to place each one.
192   for (int i = 1; i < buffer_count_; ++i) {
193     // The id is the order the buffer was originally added by the client.
194     buffer_id = buffer_ids_sorted_[i];
195     // Look at what size and time range the buffer needs to be active.
196     BufferRequirements* wanted_requirements = &requirements_[buffer_id];
197     const int wanted_size = wanted_requirements->size;
198     const int wanted_first_time_used = wanted_requirements->first_time_used;
199     const int wanted_last_time_used = wanted_requirements->last_time_used;
200 
201     // Find the first buffer that's active in our time range. All placed
202     // buffers are stored in the order of their starting position in the arena
203     // so that it's easy to find the next buffer in memory, and so the gap.
204     // The candidate_entry variable holds the buffer that we're considering
205     // placing the current buffer after.
206 
207     int candidate_offset = 0;
208     // Loop through the offset-ordered list of buffers, looking for gaps.
209     if (wanted_requirements->offline_offset == kOnlinePlannedBuffer) {
210       ListEntry* prior_entry = nullptr;
211       while (true) {
212         // Find out what the next active buffer is.
213         ListEntry* next_entry = NextSimultaneouslyActiveBuffer(
214             prior_entry, wanted_first_time_used, wanted_last_time_used);
215 
216         if (prior_entry) {
217           BufferRequirements* candidate_requirements =
218               &requirements_[prior_entry->requirements_index];
219           const int prior_entry_offset =
220               prior_entry->offset + candidate_requirements->size;
221           if (prior_entry_offset > candidate_offset) {
222             candidate_offset = prior_entry_offset;
223           }
224         }
225         if (next_entry == nullptr) {
226           // We're at the end of the list, so we can always append the buffer
227           // here.
228           break;
229         }
230         // Find out how much space there is between us and the next buffer.
231         const int gap = next_entry->offset - candidate_offset;
232         if (gap >= wanted_size) {
233           // This entry has a big enough gap between it and the next, so
234           // use it!
235           break;
236         }
237         // The gap wasn't big enough, so move on to another candidate.
238         prior_entry = next_entry;
239       }
240     } else {
241       // Offline planned offset are to be considered constant
242       candidate_offset = wanted_requirements->offline_offset;
243     }
244     // At this point, we've either found a gap (possibly at the end of the
245     // list) and want to place the buffer there, or there are no other active
246     // buffers in this time range and so we can put it at offset zero.
247     // Record the buffer's offset in our plan.
248     buffer_offsets_[buffer_id] = candidate_offset;
249     // Add the newly-placed buffer to our offset-ordered list, so that
250     // subsequent passes can fit in their buffers around it.
251     ListEntry* new_entry = &buffers_sorted_by_offset_[next_free_entry_];
252     new_entry->offset = candidate_offset;
253     new_entry->requirements_index = buffer_id;
254     const int new_entry_index = next_free_entry_;
255     ++next_free_entry_;
256 
257     if (first_entry->offset > candidate_offset) {
258       // The new entry offset is smaller than the first entry offset =>
259       // replace the first entry
260       first_entry = new_entry;
261       first_entry->next_entry_index = first_entry_index_;
262       first_entry_index_ = new_entry_index;
263     } else {
264       ListEntry* current_entry = first_entry;
265       // Make sure that we insert the buffer at the correct place in the
266       // buffer-offset-ordered list
267       while (true) {
268         const int next_entry_index = current_entry->next_entry_index;
269         if (next_entry_index == -1) {
270           // We're at the end of the list, so just add the new entry here.
271           current_entry->next_entry_index = new_entry_index;
272           new_entry->next_entry_index = -1;
273           break;
274         }
275         // not at the end of the list -> take a look at next entry
276         ListEntry* next_entry = &buffers_sorted_by_offset_[next_entry_index];
277         if (next_entry->offset > candidate_offset) {
278           // We're at the right spot to do an insertion and retain the sorting
279           // order, so place the new entry here.
280           new_entry->next_entry_index = current_entry->next_entry_index;
281           current_entry->next_entry_index = new_entry_index;
282           break;
283         }
284         current_entry = next_entry;
285       }
286     }
287   }
288 }
289 
GetMaximumMemorySize()290 size_t GreedyMemoryPlanner::GetMaximumMemorySize() {
291   CalculateOffsetsIfNeeded();
292   if (buffer_count_ == 0) {
293     return 0;
294   }
295   ListEntry* entry = &buffers_sorted_by_offset_[first_entry_index_];
296   size_t max_size = 0;
297   while (entry) {
298     BufferRequirements* requirements =
299         &requirements_[entry->requirements_index];
300     // TODO(b/148246793): Update all size and offset variables types from
301     //                    int to size_t
302     const size_t current_size = entry->offset + requirements->size;
303     if (current_size > max_size) {
304       max_size = current_size;
305     }
306     if (entry->next_entry_index == -1) {
307       break;
308     }
309     entry = &buffers_sorted_by_offset_[entry->next_entry_index];
310   }
311   return max_size;
312 }
313 
PrintMemoryPlan(ErrorReporter * error_reporter)314 void GreedyMemoryPlanner::PrintMemoryPlan(ErrorReporter* error_reporter) {
315   CalculateOffsetsIfNeeded();
316 
317   for (int i = 0; i < buffer_count_; ++i) {
318     TF_LITE_REPORT_ERROR(
319         error_reporter,
320         "Planner buffer ID: %d, calculated offset: %d, size required: %d, "
321         "first_time_created: %d, "
322         "last_time_used: %d",
323         i, buffer_offsets_[i], requirements_[i].size,
324         requirements_[i].first_time_used, requirements_[i].last_time_used);
325   }
326 
327   constexpr int kLineWidth = 80;
328   int max_size = kLineWidth;
329   int max_time = 0;
330   for (int i = 0; i < buffer_count_; ++i) {
331     BufferRequirements* requirements = &requirements_[i];
332     const int offset = buffer_offsets_[i];
333     const int last_time_used = requirements->last_time_used;
334     const int size = offset + requirements->size;
335     if (size > max_size) {
336       max_size = size;
337     }
338     if (last_time_used > max_time) {
339       max_time = last_time_used;
340     }
341   }
342 
343   char line[kLineWidth + 1];
344   for (int t = 0; t <= max_time; ++t) {
345     for (int c = 0; c < kLineWidth; ++c) {
346       line[c] = '.';
347     }
348     for (int i = 0; i < buffer_count_; ++i) {
349       BufferRequirements* requirements = &requirements_[i];
350       if ((t < requirements->first_time_used) ||
351           (t > requirements->last_time_used)) {
352         continue;
353       }
354       const int offset = buffer_offsets_[i];
355       if (offset == -1) {
356         continue;
357       }
358       const int size = requirements->size;
359       const int line_start = (offset * kLineWidth) / max_size;
360       const int line_end = ((offset + size) * kLineWidth) / max_size;
361       for (int n = line_start; n < line_end; ++n) {
362         if (line[n] == '.') {
363           char display;
364           if (i < 10) {
365             display = '0' + i;
366           } else if (i < 36) {
367             display = 'a' + (i - 10);
368           } else if (i < 62) {
369             display = 'A' + (i - 36);
370           } else {
371             display = '*';
372           }
373           line[n] = display;
374         } else {
375           line[n] = '!';
376         }
377       }
378     }
379     line[kLineWidth] = 0;
380     TF_LITE_REPORT_ERROR(error_reporter, "%s", (const char*)line);
381   }
382 }
383 
GetBufferCount()384 int GreedyMemoryPlanner::GetBufferCount() { return buffer_count_; }
385 
GetOffsetForBuffer(tflite::ErrorReporter * error_reporter,int buffer_index,int * offset)386 TfLiteStatus GreedyMemoryPlanner::GetOffsetForBuffer(
387     tflite::ErrorReporter* error_reporter, int buffer_index, int* offset) {
388   CalculateOffsetsIfNeeded();
389   if ((buffer_index < 0) || (buffer_index >= buffer_count_)) {
390     TF_LITE_REPORT_ERROR(error_reporter,
391                          "buffer index %d is outside range 0 to %d",
392                          buffer_index, buffer_count_);
393     return kTfLiteError;
394   }
395   *offset = buffer_offsets_[buffer_index];
396   return kTfLiteOk;
397 }
398 
DoAnyBuffersOverlap(ErrorReporter * error_reporter)399 bool GreedyMemoryPlanner::DoAnyBuffersOverlap(ErrorReporter* error_reporter) {
400   CalculateOffsetsIfNeeded();
401   bool were_overlaps_found = false;
402   for (int i = 0; i < buffer_count_; ++i) {
403     BufferRequirements* a_requirements = &requirements_[i];
404     const int a_start_offset = buffer_offsets_[i];
405     const int a_first_time_used = a_requirements->first_time_used;
406     const int a_last_time_used = a_requirements->last_time_used;
407     const int a_end_offset = a_start_offset + a_requirements->size;
408     for (int j = 0; j < buffer_count_; ++j) {
409       if (i == j) {
410         continue;
411       }
412       BufferRequirements* b_requirements = &requirements_[j];
413       const int b_start_offset = buffer_offsets_[j];
414       const int b_first_time_used = b_requirements->first_time_used;
415       const int b_last_time_used = b_requirements->last_time_used;
416       const int b_end_offset = b_start_offset + b_requirements->size;
417       if ((a_first_time_used > b_last_time_used) ||
418           (b_first_time_used > a_last_time_used)) {
419         // Buffers don't overlap in time.
420         continue;
421       }
422       if ((a_start_offset >= b_end_offset) ||
423           (b_start_offset >= a_end_offset)) {
424         // No overlap in memory.
425         continue;
426       }
427       were_overlaps_found = true;
428       TF_LITE_REPORT_ERROR(
429           error_reporter, "Overlap: %d (%d=>%d, %d->%d) vs %d (%d=>%d, %d->%d)",
430           i, a_first_time_used, a_last_time_used, a_start_offset, a_end_offset,
431           j, b_first_time_used, b_last_time_used, b_start_offset, b_end_offset);
432     }
433   }
434   return were_overlaps_found;
435 }
436 
437 }  // namespace tflite
438