1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_
18
19 #include <stddef.h>
20
21 #include <algorithm>
22 #include <cstddef>
23 #include <iterator>
24 #include <list>
25 #include <queue>
26 #include <set>
27 #include <vector>
28
29 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
30 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
31 #include "tensorflow/lite/delegates/gpu/common/status.h"
32
33 namespace tflite {
34 namespace gpu {
35
36 // Implements memory management with a greedy algorithm.
37 //
38 // The problem of memory management is NP-complete. This implements a
39 // greedy algorithm that approximates an optimal solution with following
40 // heuristic:
41 //
42 // 1. Iterates through all tensor usage records and for every object
43 // reference
44 // assigns shared object from the pool. When object reference is used
45 // for the last time, corresponding shared object is returned back to
46 // the pool.
47 //
48 // 2. Shared object pool grows when there are no free shared object
49 // available.
50 //
51 // 3. Shared object size may increase when tensor requests larger size.
52 template <typename TensorSizeT>
53 absl::Status GreedyInOrderAssignment(
54 const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
55 ObjectsAssignment<TensorSizeT>* assignment,
56 const UsageGraph* reallocation_graph = nullptr) {
57 std::vector<size_t> last_assigned_tensor;
58 size_t num_records = usage_records.size();
59 assignment->object_sizes.clear();
60 assignment->object_ids.assign(num_records, kNotAssigned);
61
62 // Pool of free shared objects is ordered by object size, because we perform
63 // lower_bound search in it.
64 std::set<PoolRecord<TensorSizeT>> pool;
65 // Queue of shared objects in use, ordered by their last_task.
66 std::priority_queue<QueueRecord> objects_in_use;
67 for (size_t i = 0; i < num_records; i++) {
68 // Pop from the queue and add to the pool all objects that are no longer
69 // in use at the time of execution of the first_task of i-th intermediate
70 // tensor.
71 while (!objects_in_use.empty() &&
72 objects_in_use.top().last_task < usage_records[i].first_task) {
73 auto object_id = objects_in_use.top().object_id;
74 pool.insert({assignment->object_sizes[object_id], object_id});
75 objects_in_use.pop();
76 }
77 TensorSizeT tensor_size = usage_records[i].tensor_size;
78 auto best_it = pool.end();
79 size_t best_size_diff = 0;
80 if (reallocation_graph) {
81 for (auto pool_it = pool.begin(); pool_it != pool.end(); ++pool_it) {
82 size_t size_diff = AbsDiffInElements(pool_it->object_size, tensor_size);
83 if (best_it == pool.end() || size_diff < best_size_diff) {
84 const std::vector<size_t>& realloc_options =
85 (*reallocation_graph)[last_assigned_tensor[pool_it->object_id]];
86 size_t pos = std::lower_bound(realloc_options.begin(),
87 realloc_options.end(), i) -
88 realloc_options.begin();
89 if (pos != realloc_options.size() && realloc_options[pos] == i) {
90 // We found, that memory of tensor, that was last assigned to
91 // object pool_it->object_id, can be reused for tensor i.
92 best_size_diff = size_diff;
93 best_it = pool_it;
94 }
95 }
96 }
97 } else if (!pool.empty()) {
98 // Find shared object from pool, that will waste the least possible
99 // amount of memory when reused for current tensor.
100 auto pool_it = pool.lower_bound({tensor_size, 0});
101 TensorSizeT size_diff = 0;
102 if (pool_it != pool.end()) {
103 // Try smallest shared object from pool with size >= tensor_size.
104 size_diff = pool_it->object_size - tensor_size;
105 best_it = pool_it;
106 }
107 if (pool_it != pool.begin()) {
108 // Try largest shared object from pool with size < tensor_size.
109 pool_it--;
110 if (best_it == pool.end() ||
111 tensor_size - pool_it->object_size < size_diff) {
112 size_diff = tensor_size - pool_it->object_size;
113 best_it = pool_it;
114 }
115 }
116 // best_it can't be equal to pool.end(), because pool is not empty
117 if (best_it == pool.end()) {
118 return absl::InternalError(
119 "No shared object is found in non-empty pool in "
120 "GreedyInOrderAssignment.");
121 }
122 }
123 if (best_it == pool.end()) {
124 // No free shared object, creating a new one, assign i-th tensor to
125 // it and add to the queue of objects in use.
126 assignment->object_ids[i] = assignment->object_sizes.size();
127 assignment->object_sizes.push_back(tensor_size);
128 last_assigned_tensor.push_back(i);
129 objects_in_use.push(
130 {usage_records[i].last_task, assignment->object_ids[i]});
131 } else {
132 size_t shared_id = best_it->object_id;
133 pool.erase(best_it);
134 assignment->object_ids[i] = shared_id;
135 assignment->object_sizes[shared_id] =
136 std::max(assignment->object_sizes[shared_id], tensor_size);
137 last_assigned_tensor[shared_id] = i;
138 objects_in_use.push(
139 {usage_records[i].last_task, assignment->object_ids[i]});
140 }
141 }
142 return absl::OkStatus();
143 }
144
145 // The same algorithm as above, but for multidimensional case. The only
146 // difference is that shared object dimensions can't be increased to be reused
147 // for tensor, that is larger (at least by one dimension).
148 template <typename TensorSizeT>
GreedyInOrderAssignmentMultidimensional(const std::vector<TensorUsageRecord<TensorSizeT>> & usage_records,ObjectsAssignment<TensorSizeT> * assignment)149 absl::Status GreedyInOrderAssignmentMultidimensional(
150 const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
151 ObjectsAssignment<TensorSizeT>* assignment) {
152 size_t num_records = usage_records.size();
153 assignment->object_sizes.clear();
154 assignment->object_ids.assign(num_records, kNotAssigned);
155
156 // Pool of free shared objects is unordered in multidimensional version of the
157 // algorithm.
158 std::list<size_t> pool;
159 // Queue of shared objects in use, ordered by their last_task.
160 std::priority_queue<QueueRecord> objects_in_use;
161 for (size_t i = 0; i < num_records; i++) {
162 // Pop from the queue and add to the pool all objects that are no longer
163 // in use at the time of execution of the first_task of i-th intermediate
164 // tensor.
165 while (!objects_in_use.empty() &&
166 objects_in_use.top().last_task < usage_records[i].first_task) {
167 auto object_id = objects_in_use.top().object_id;
168 pool.push_back(object_id);
169 objects_in_use.pop();
170 }
171 const TensorSizeT& tensor_size = usage_records[i].tensor_size;
172 auto best_it = pool.end();
173 size_t best_size_diff = 0;
174 // Find shared object from pool, that will waste the least possible
175 // amount of memory when reused for current tensor.
176 for (auto pool_it = pool.begin(); pool_it != pool.end(); ++pool_it) {
177 // Needed size of shared object to cover current tensor and all previous
178 // tensors assigned to it.
179 const TensorSizeT& shared_object_size =
180 assignment->object_sizes[*pool_it];
181 if (IsCoveringObject(shared_object_size, tensor_size)) {
182 // Prefer shared object that will waste less memory.
183 size_t size_diff = AbsDiffInElements(shared_object_size, tensor_size);
184 if (best_it == pool.end() || size_diff < best_size_diff) {
185 best_it = pool_it;
186 best_size_diff = size_diff;
187 }
188 }
189 }
190 if (best_it == pool.end()) {
191 // No free suitable shared object, creating a new one, assign i-th tensor
192 // to it and add to the queue of objects in use.
193 assignment->object_ids[i] = assignment->object_sizes.size();
194 assignment->object_sizes.push_back(tensor_size);
195 objects_in_use.push(
196 {usage_records[i].last_task, assignment->object_ids[i]});
197 } else {
198 size_t shared_id = *best_it;
199 pool.erase(best_it);
200 assignment->object_ids[i] = shared_id;
201 objects_in_use.push(
202 {usage_records[i].last_task, assignment->object_ids[i]});
203 }
204 }
205 return absl::OkStatus();
206 }
207
208 } // namespace gpu
209 } // namespace tflite
210
211 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_
212