• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h"
17 
18 #include <algorithm>
19 #include <cstddef>
20 #include <iterator>
21 #include <vector>
22 
23 #include "absl/status/status.h"
24 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
25 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
26 
27 namespace tflite {
28 namespace gpu {
29 namespace {
30 
31 struct SizeDistPriorityInfo {
32   // - Tensor with leftmost position in positional maximums vector has higher
33   // priority;
34   // - If two tensors have equal position, the one, that has usage interval with
35   // smallest positive distance (best_dist) to some of already assigned tensors,
36   // has higher priority;
37   // - If two tensors have equal position and best_dist, the one with greater
38   // tensor_size has higher priority.
operator >tflite::gpu::__anonc08423ed0111::SizeDistPriorityInfo39   bool operator>(const SizeDistPriorityInfo& other) const {
40     return position < other.position ||
41            (position == other.position &&
42             (best_dist < other.best_dist || (best_dist == other.best_dist &&
43                                              tensor_size > other.tensor_size)));
44   }
45 
46   // Recalculate best distance and best object, based on precalculated distances
47   // in vector dist.
RecalcBestDisttflite::gpu::__anonc08423ed0111::SizeDistPriorityInfo48   void RecalcBestDist() {
49     best_dist = kNotAssigned;
50     for (size_t obj_id = 0; obj_id < dist.size(); ++obj_id) {
51       if (dist[obj_id] < best_dist) {
52         best_dist = dist[obj_id];
53         best_object = obj_id;
54       }
55     }
56   }
57 
58   size_t position;
59   size_t tensor_size;
60   std::vector<size_t> dist;
61   size_t best_dist;
62   size_t best_object;
63   size_t tensor_usage_id;
64 };
65 
66 }  // namespace
67 
GreedyBySizeAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,OffsetsAssignment * assignment)68 absl::Status GreedyBySizeAssignment(
69     const std::vector<TensorUsageRecord<size_t>>& usage_records,
70     OffsetsAssignment* assignment) {
71   const size_t num_tensors = usage_records.size();
72   assignment->offsets.resize(num_tensors);
73   assignment->total_size = 0;
74 
75   // Ordered records are to be sorted by size of corresponding tensor.
76   std::vector<TensorUsageWithIndex<size_t>> ordered_records;
77   for (size_t i = 0; i < num_tensors; ++i) {
78     ordered_records.emplace_back(&usage_records[i], i);
79   }
80   std::sort(ordered_records.begin(), ordered_records.end(), CompareBySize);
81 
82   // Vector of ids of already allocated tensors, ordered by offset.
83   std::vector<size_t> ordered_allocs;
84 
85   for (const auto& rec_with_idx : ordered_records) {
86     const TensorUsageRecord<size_t>* rec = rec_with_idx.usage_record;
87     size_t best_diff = kNotAssigned;
88     size_t best_offset = kNotAssigned;
89     size_t prev_offset = 0;
90     for (const auto& allocated_id : ordered_allocs) {
91       if (usage_records[allocated_id].last_task < rec->first_task ||
92           usage_records[allocated_id].first_task > rec->last_task) {
93         // Tensor allocated_id has usage interval, that doesn't intersect with
94         // current tensor's usage interval, so we skip it.
95         continue;
96       }
97       size_t cur_offset = assignment->offsets[allocated_id];
98       if (cur_offset >= prev_offset) {
99         size_t diff = cur_offset - prev_offset;
100         // Check, if current_tensor fits into the gap, located directly to the
101         // left of tensor allocated_id offset, and that this gap is the smallest
102         // of previously considered suitable gaps.
103         if (diff >= rec->tensor_size && diff < best_diff) {
104           best_diff = diff;
105           best_offset = prev_offset;
106         }
107       }
108       prev_offset = std::max(
109           prev_offset, cur_offset + usage_records[allocated_id].tensor_size);
110     }
111     if (assignment->total_size < prev_offset) {
112       return absl::InternalError("Total size is wrong.");
113     }
114 
115     // If no suitable gap found, we should allocate current tensor after the
116     // rightmost tensor, which usage interval intersects with the current one.
117     if (best_offset == kNotAssigned) {
118       best_offset = prev_offset;
119     }
120 
121     // Assign best_offset to the current tensor and find the correct place to
122     // insert information about it into ordered_allocs to save the order.
123     auto it = ordered_allocs.begin();
124     while (it != ordered_allocs.end() &&
125            assignment->offsets[*it] <= best_offset) {
126       ++it;
127     }
128     ordered_allocs.insert(it, rec_with_idx.idx);
129     assignment->offsets[rec_with_idx.idx] = best_offset;
130     assignment->total_size =
131         std::max(assignment->total_size, best_offset + rec->tensor_size);
132   }
133   return absl::OkStatus();
134 }
135 
136 // Assigns given tensors to shared objects, using the following greedy
137 // algorithm:
138 // - We have tensor usage records of all intermideate tensors as an input. Each
139 // record consists of tensor size, first and last tasks, that use it. Let's call
140 // [first_task..last_task] a tensor usage interval;
141 // - Distance between two usage intervals is the absolute difference between
142 // closest tasks in their intervals. If two usage intervals don't intersect,
143 // than the distance between them is positive;
144 // - Calculate positional maximums vector, e.g. the vector of lower bounds on
145 // size of each shared object;
146 // - For each tensor find the rightmost positional maximum, that is greater or
147 // equal, than current tensor's size (call it position);
148 // - Iterate through all tensors in non-decreasing order of their
149 // SizeDistPriority (described above);
150 // - For every such tensor, assign it to the object, that already has tensor,
151 // which usage interval has the smallest existing positive distance to the
152 // current tensor's usage interval (this distance and object id are already
153 // precalculated in its SizeDistPriority record). Size of the chosen object can
154 // possible increase;
155 // - If there are several such objects, use the largest one;
156 // - If there are no suitable shared objects, assign current tensor to the new
157 // object with size equal to current tensor's size;
158 // - Modify SizeDistPriority records of tensors, that haven't been assigned yet,
159 // to reflect distance changes after that assignment.
GreedyBySizeDistPriorityAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,ObjectsAssignment<size_t> * assignment)160 absl::Status GreedyBySizeDistPriorityAssignment(
161     const std::vector<TensorUsageRecord<size_t>>& usage_records,
162     ObjectsAssignment<size_t>* assignment) {
163   std::vector<size_t> positional_max =
164       CalculatePositionalMaximums(usage_records);
165 
166   size_t num_records = usage_records.size();
167   std::vector<SizeDistPriorityInfo> priority_info(num_records);
168   for (size_t rec_id = 0; rec_id < usage_records.size(); ++rec_id) {
169     priority_info[rec_id].tensor_usage_id = rec_id;
170     priority_info[rec_id].tensor_size = usage_records[rec_id].tensor_size;
171 
172     // No objects have been created yet.
173     priority_info[rec_id].best_dist = kNotAssigned;
174     priority_info[rec_id].best_object = kNotAssigned;
175 
176     // Find the rightmost positional maximum, that is greater or
177     size_t pos = 0;
178     while (pos < positional_max.size() &&
179            positional_max[pos] >= priority_info[rec_id].tensor_size) {
180       ++pos;
181     }
182     if (pos == 0) {
183       return absl::InternalError("Variable pos must be positive.");
184     }
185     priority_info[rec_id].position = pos - 1;
186   }
187 
188   assignment->object_sizes.clear();
189   assignment->object_ids.assign(num_records, kNotAssigned);
190   for (size_t it = 0; it < num_records; ++it) {
191     size_t best_info_id = kNotAssigned;
192     for (size_t info_id = 0; info_id < num_records; ++info_id) {
193       if (assignment->object_ids[priority_info[info_id].tensor_usage_id] !=
194           kNotAssigned) {
195         // Tensor already assigned.
196         continue;
197       }
198       if (best_info_id == kNotAssigned ||
199           priority_info[info_id] > priority_info[best_info_id]) {
200         best_info_id = info_id;
201       }
202     }
203     if (best_info_id == kNotAssigned) {
204       // During each iteration we assign exactly one of the tensors, so some not
205       // yet assigned tensors must exist.
206       return absl::InternalError("Invalid value for variable best_info_id.");
207     }
208 
209     size_t best_rec_id = priority_info[best_info_id].tensor_usage_id;
210     size_t best_obj_id = priority_info[best_info_id].best_object;
211     bool new_object = false;
212     if (priority_info[best_info_id].best_dist == kNotAssigned) {
213       // No suitable shared object, so we create a new one.
214       new_object = true;
215       best_obj_id = assignment->object_sizes.size();
216       assignment->object_ids[best_rec_id] = best_obj_id;
217       assignment->object_sizes.push_back(
218           usage_records[best_rec_id].tensor_size);
219     } else {
220       // Assign tensor best_rec_id to the already existing object best_obj_id.
221       assignment->object_ids[best_rec_id] = best_obj_id;
222       assignment->object_sizes[best_obj_id] =
223           std::max(assignment->object_sizes[best_obj_id],
224                    usage_records[best_rec_id].tensor_size);
225     }
226 
227     // Modify SizeDistPriority records of tensors, that haven't been assigned
228     // yet, to reflect distance changes after that assignment.
229     for (size_t info_id = 0; info_id < num_records; ++info_id) {
230       // SizeDistPriority record info_id contains priority of tensor rec_id.
231       size_t rec_id = priority_info[info_id].tensor_usage_id;
232 
233       if (assignment->object_ids[rec_id] != kNotAssigned) {
234         // Tensor rec_id is already assigned.
235         continue;
236       }
237       if (!new_object &&
238           priority_info[info_id].dist[best_obj_id] == kNotAssigned) {
239         // Tensor rec_id intersects with some of the tensors, that are assigned
240         // to object best_obj_id.
241         continue;
242       }
243 
244       size_t dist = kNotAssigned;
245       if (usage_records[rec_id].last_task <
246           usage_records[best_rec_id].first_task) {
247         dist = usage_records[best_rec_id].first_task -
248                usage_records[rec_id].last_task;
249       } else if (usage_records[best_rec_id].last_task <
250                  usage_records[rec_id].first_task) {
251         dist = usage_records[rec_id].first_task -
252                usage_records[best_rec_id].last_task;
253       }
254 
255       if (new_object) {
256         // best_rec_id is the only tensor, assigned to the new object.
257         priority_info[info_id].dist.push_back(dist);
258       } else if (dist == kNotAssigned) {
259         // Usage intervals of tensors rec_id and best_rec_id intersect. So
260         // rec_id can't be assigned to best_obj_id anymore.
261         priority_info[info_id].dist[best_obj_id] = kNotAssigned;
262         if (priority_info[info_id].best_object == best_obj_id) {
263           // best_obj_id was the best shared object for tensor rec_id, but now
264           // it's not suitable anymore, so we need some recalculation.
265           priority_info[info_id].RecalcBestDist();
266         }
267       } else {
268         // Update distance, because it has probably been changed.
269         priority_info[info_id].dist[best_obj_id] =
270             std::min(priority_info[info_id].dist[best_obj_id], dist);
271       }
272       if (dist < priority_info[info_id].best_dist) {
273         // Update best distance and best object for tensor rec_id.
274         priority_info[info_id].best_dist = dist;
275         priority_info[info_id].best_object = best_obj_id;
276       }
277     }
278   }
279   return absl::OkStatus();
280 }
281 
282 }  // namespace gpu
283 }  // namespace tflite
284