1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/common/memory_management/greedy_by_size_assignment.h"
17
18 #include <algorithm>
19 #include <cstddef>
20 #include <iterator>
21 #include <vector>
22
23 #include "absl/status/status.h"
24 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
25 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
26
27 namespace tflite {
28 namespace gpu {
29 namespace {
30
31 struct SizeDistPriorityInfo {
32 // - Tensor with leftmost position in positional maximums vector has higher
33 // priority;
34 // - If two tensors have equal position, the one, that has usage interval with
35 // smallest positive distance (best_dist) to some of already assigned tensors,
36 // has higher priority;
37 // - If two tensors have equal position and best_dist, the one with greater
38 // tensor_size has higher priority.
operator >tflite::gpu::__anonc08423ed0111::SizeDistPriorityInfo39 bool operator>(const SizeDistPriorityInfo& other) const {
40 return position < other.position ||
41 (position == other.position &&
42 (best_dist < other.best_dist || (best_dist == other.best_dist &&
43 tensor_size > other.tensor_size)));
44 }
45
46 // Recalculate best distance and best object, based on precalculated distances
47 // in vector dist.
RecalcBestDisttflite::gpu::__anonc08423ed0111::SizeDistPriorityInfo48 void RecalcBestDist() {
49 best_dist = kNotAssigned;
50 for (size_t obj_id = 0; obj_id < dist.size(); ++obj_id) {
51 if (dist[obj_id] < best_dist) {
52 best_dist = dist[obj_id];
53 best_object = obj_id;
54 }
55 }
56 }
57
58 size_t position;
59 size_t tensor_size;
60 std::vector<size_t> dist;
61 size_t best_dist;
62 size_t best_object;
63 size_t tensor_usage_id;
64 };
65
66 } // namespace
67
GreedyBySizeAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,OffsetsAssignment * assignment)68 absl::Status GreedyBySizeAssignment(
69 const std::vector<TensorUsageRecord<size_t>>& usage_records,
70 OffsetsAssignment* assignment) {
71 const size_t num_tensors = usage_records.size();
72 assignment->offsets.resize(num_tensors);
73 assignment->total_size = 0;
74
75 // Ordered records are to be sorted by size of corresponding tensor.
76 std::vector<TensorUsageWithIndex<size_t>> ordered_records;
77 for (size_t i = 0; i < num_tensors; ++i) {
78 ordered_records.emplace_back(&usage_records[i], i);
79 }
80 std::sort(ordered_records.begin(), ordered_records.end(), CompareBySize);
81
82 // Vector of ids of already allocated tensors, ordered by offset.
83 std::vector<size_t> ordered_allocs;
84
85 for (const auto& rec_with_idx : ordered_records) {
86 const TensorUsageRecord<size_t>* rec = rec_with_idx.usage_record;
87 size_t best_diff = kNotAssigned;
88 size_t best_offset = kNotAssigned;
89 size_t prev_offset = 0;
90 for (const auto& allocated_id : ordered_allocs) {
91 if (usage_records[allocated_id].last_task < rec->first_task ||
92 usage_records[allocated_id].first_task > rec->last_task) {
93 // Tensor allocated_id has usage interval, that doesn't intersect with
94 // current tensor's usage interval, so we skip it.
95 continue;
96 }
97 size_t cur_offset = assignment->offsets[allocated_id];
98 if (cur_offset >= prev_offset) {
99 size_t diff = cur_offset - prev_offset;
100 // Check, if current_tensor fits into the gap, located directly to the
101 // left of tensor allocated_id offset, and that this gap is the smallest
102 // of previously considered suitable gaps.
103 if (diff >= rec->tensor_size && diff < best_diff) {
104 best_diff = diff;
105 best_offset = prev_offset;
106 }
107 }
108 prev_offset = std::max(
109 prev_offset, cur_offset + usage_records[allocated_id].tensor_size);
110 }
111 if (assignment->total_size < prev_offset) {
112 return absl::InternalError("Total size is wrong.");
113 }
114
115 // If no suitable gap found, we should allocate current tensor after the
116 // rightmost tensor, which usage interval intersects with the current one.
117 if (best_offset == kNotAssigned) {
118 best_offset = prev_offset;
119 }
120
121 // Assign best_offset to the current tensor and find the correct place to
122 // insert information about it into ordered_allocs to save the order.
123 auto it = ordered_allocs.begin();
124 while (it != ordered_allocs.end() &&
125 assignment->offsets[*it] <= best_offset) {
126 ++it;
127 }
128 ordered_allocs.insert(it, rec_with_idx.idx);
129 assignment->offsets[rec_with_idx.idx] = best_offset;
130 assignment->total_size =
131 std::max(assignment->total_size, best_offset + rec->tensor_size);
132 }
133 return absl::OkStatus();
134 }
135
136 // Assigns given tensors to shared objects, using the following greedy
137 // algorithm:
138 // - We have tensor usage records of all intermideate tensors as an input. Each
139 // record consists of tensor size, first and last tasks, that use it. Let's call
140 // [first_task..last_task] a tensor usage interval;
141 // - Distance between two usage intervals is the absolute difference between
142 // closest tasks in their intervals. If two usage intervals don't intersect,
143 // than the distance between them is positive;
144 // - Calculate positional maximums vector, e.g. the vector of lower bounds on
145 // size of each shared object;
146 // - For each tensor find the rightmost positional maximum, that is greater or
147 // equal, than current tensor's size (call it position);
148 // - Iterate through all tensors in non-decreasing order of their
149 // SizeDistPriority (described above);
150 // - For every such tensor, assign it to the object, that already has tensor,
151 // which usage interval has the smallest existing positive distance to the
152 // current tensor's usage interval (this distance and object id are already
153 // precalculated in its SizeDistPriority record). Size of the chosen object can
154 // possible increase;
155 // - If there are several such objects, use the largest one;
156 // - If there are no suitable shared objects, assign current tensor to the new
157 // object with size equal to current tensor's size;
158 // - Modify SizeDistPriority records of tensors, that haven't been assigned yet,
159 // to reflect distance changes after that assignment.
GreedyBySizeDistPriorityAssignment(const std::vector<TensorUsageRecord<size_t>> & usage_records,ObjectsAssignment<size_t> * assignment)160 absl::Status GreedyBySizeDistPriorityAssignment(
161 const std::vector<TensorUsageRecord<size_t>>& usage_records,
162 ObjectsAssignment<size_t>* assignment) {
163 std::vector<size_t> positional_max =
164 CalculatePositionalMaximums(usage_records);
165
166 size_t num_records = usage_records.size();
167 std::vector<SizeDistPriorityInfo> priority_info(num_records);
168 for (size_t rec_id = 0; rec_id < usage_records.size(); ++rec_id) {
169 priority_info[rec_id].tensor_usage_id = rec_id;
170 priority_info[rec_id].tensor_size = usage_records[rec_id].tensor_size;
171
172 // No objects have been created yet.
173 priority_info[rec_id].best_dist = kNotAssigned;
174 priority_info[rec_id].best_object = kNotAssigned;
175
176 // Find the rightmost positional maximum, that is greater or
177 size_t pos = 0;
178 while (pos < positional_max.size() &&
179 positional_max[pos] >= priority_info[rec_id].tensor_size) {
180 ++pos;
181 }
182 if (pos == 0) {
183 return absl::InternalError("Variable pos must be positive.");
184 }
185 priority_info[rec_id].position = pos - 1;
186 }
187
188 assignment->object_sizes.clear();
189 assignment->object_ids.assign(num_records, kNotAssigned);
190 for (size_t it = 0; it < num_records; ++it) {
191 size_t best_info_id = kNotAssigned;
192 for (size_t info_id = 0; info_id < num_records; ++info_id) {
193 if (assignment->object_ids[priority_info[info_id].tensor_usage_id] !=
194 kNotAssigned) {
195 // Tensor already assigned.
196 continue;
197 }
198 if (best_info_id == kNotAssigned ||
199 priority_info[info_id] > priority_info[best_info_id]) {
200 best_info_id = info_id;
201 }
202 }
203 if (best_info_id == kNotAssigned) {
204 // During each iteration we assign exactly one of the tensors, so some not
205 // yet assigned tensors must exist.
206 return absl::InternalError("Invalid value for variable best_info_id.");
207 }
208
209 size_t best_rec_id = priority_info[best_info_id].tensor_usage_id;
210 size_t best_obj_id = priority_info[best_info_id].best_object;
211 bool new_object = false;
212 if (priority_info[best_info_id].best_dist == kNotAssigned) {
213 // No suitable shared object, so we create a new one.
214 new_object = true;
215 best_obj_id = assignment->object_sizes.size();
216 assignment->object_ids[best_rec_id] = best_obj_id;
217 assignment->object_sizes.push_back(
218 usage_records[best_rec_id].tensor_size);
219 } else {
220 // Assign tensor best_rec_id to the already existing object best_obj_id.
221 assignment->object_ids[best_rec_id] = best_obj_id;
222 assignment->object_sizes[best_obj_id] =
223 std::max(assignment->object_sizes[best_obj_id],
224 usage_records[best_rec_id].tensor_size);
225 }
226
227 // Modify SizeDistPriority records of tensors, that haven't been assigned
228 // yet, to reflect distance changes after that assignment.
229 for (size_t info_id = 0; info_id < num_records; ++info_id) {
230 // SizeDistPriority record info_id contains priority of tensor rec_id.
231 size_t rec_id = priority_info[info_id].tensor_usage_id;
232
233 if (assignment->object_ids[rec_id] != kNotAssigned) {
234 // Tensor rec_id is already assigned.
235 continue;
236 }
237 if (!new_object &&
238 priority_info[info_id].dist[best_obj_id] == kNotAssigned) {
239 // Tensor rec_id intersects with some of the tensors, that are assigned
240 // to object best_obj_id.
241 continue;
242 }
243
244 size_t dist = kNotAssigned;
245 if (usage_records[rec_id].last_task <
246 usage_records[best_rec_id].first_task) {
247 dist = usage_records[best_rec_id].first_task -
248 usage_records[rec_id].last_task;
249 } else if (usage_records[best_rec_id].last_task <
250 usage_records[rec_id].first_task) {
251 dist = usage_records[rec_id].first_task -
252 usage_records[best_rec_id].last_task;
253 }
254
255 if (new_object) {
256 // best_rec_id is the only tensor, assigned to the new object.
257 priority_info[info_id].dist.push_back(dist);
258 } else if (dist == kNotAssigned) {
259 // Usage intervals of tensors rec_id and best_rec_id intersect. So
260 // rec_id can't be assigned to best_obj_id anymore.
261 priority_info[info_id].dist[best_obj_id] = kNotAssigned;
262 if (priority_info[info_id].best_object == best_obj_id) {
263 // best_obj_id was the best shared object for tensor rec_id, but now
264 // it's not suitable anymore, so we need some recalculation.
265 priority_info[info_id].RecalcBestDist();
266 }
267 } else {
268 // Update distance, because it has probably been changed.
269 priority_info[info_id].dist[best_obj_id] =
270 std::min(priority_info[info_id].dist[best_obj_id], dist);
271 }
272 if (dist < priority_info[info_id].best_dist) {
273 // Update best distance and best object for tensor rec_id.
274 priority_info[info_id].best_dist = dist;
275 priority_info[info_id].best_object = best_obj_id;
276 }
277 }
278 }
279 return absl::OkStatus();
280 }
281
282 } // namespace gpu
283 } // namespace tflite
284