• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_
18 
19 #include <stddef.h>
20 
21 #include <algorithm>
22 #include <cstddef>
23 #include <iterator>
24 #include <list>
25 #include <queue>
26 #include <set>
27 #include <vector>
28 
29 #include "tensorflow/lite/delegates/gpu/common/memory_management/internal.h"
30 #include "tensorflow/lite/delegates/gpu/common/memory_management/types.h"
31 #include "tensorflow/lite/delegates/gpu/common/status.h"
32 
33 namespace tflite {
34 namespace gpu {
35 
36 // Implements memory management with a greedy algorithm.
37 //
38 // The problem of memory management is NP-complete. This implements a
39 // greedy algorithm that approximates an optimal solution with following
40 // heuristic:
41 //
42 //   1. Iterates through all tensor usage records and for every object
43 //   reference
44 //      assigns shared object from the pool. When object reference is used
45 //      for the last time, corresponding shared object is returned back to
46 //      the pool.
47 //
48 //   2. Shared object pool grows when there are no free shared object
49 //      available.
50 //
51 //   3. Shared object size may increase when tensor requests larger size.
52 template <typename TensorSizeT>
53 absl::Status GreedyInOrderAssignment(
54     const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
55     ObjectsAssignment<TensorSizeT>* assignment,
56     const UsageGraph* reallocation_graph = nullptr) {
57   std::vector<size_t> last_assigned_tensor;
58   size_t num_records = usage_records.size();
59   assignment->object_sizes.clear();
60   assignment->object_ids.assign(num_records, kNotAssigned);
61 
62   // Pool of free shared objects is ordered by object size, because we perform
63   // lower_bound search in it.
64   std::set<PoolRecord<TensorSizeT>> pool;
65   // Queue of shared objects in use, ordered by their last_task.
66   std::priority_queue<QueueRecord> objects_in_use;
67   for (size_t i = 0; i < num_records; i++) {
68     // Pop from the queue and add to the pool all objects that are no longer
69     // in use at the time of execution of the first_task of i-th intermediate
70     // tensor.
71     while (!objects_in_use.empty() &&
72            objects_in_use.top().last_task < usage_records[i].first_task) {
73       auto object_id = objects_in_use.top().object_id;
74       pool.insert({assignment->object_sizes[object_id], object_id});
75       objects_in_use.pop();
76     }
77     TensorSizeT tensor_size = usage_records[i].tensor_size;
78     auto best_it = pool.end();
79     size_t best_size_diff = 0;
80     if (reallocation_graph) {
81       for (auto pool_it = pool.begin(); pool_it != pool.end(); ++pool_it) {
82         size_t size_diff = AbsDiffInElements(pool_it->object_size, tensor_size);
83         if (best_it == pool.end() || size_diff < best_size_diff) {
84           const std::vector<size_t>& realloc_options =
85               (*reallocation_graph)[last_assigned_tensor[pool_it->object_id]];
86           size_t pos = std::lower_bound(realloc_options.begin(),
87                                         realloc_options.end(), i) -
88                        realloc_options.begin();
89           if (pos != realloc_options.size() && realloc_options[pos] == i) {
90             // We found, that memory of tensor, that was last assigned to
91             // object pool_it->object_id, can be reused for tensor i.
92             best_size_diff = size_diff;
93             best_it = pool_it;
94           }
95         }
96       }
97     } else if (!pool.empty()) {
98       // Find shared object from pool, that will waste the least possible
99       // amount of memory when reused for current tensor.
100       auto pool_it = pool.lower_bound({tensor_size, 0});
101       TensorSizeT size_diff = 0;
102       if (pool_it != pool.end()) {
103         // Try smallest shared object from pool with size >= tensor_size.
104         size_diff = pool_it->object_size - tensor_size;
105         best_it = pool_it;
106       }
107       if (pool_it != pool.begin()) {
108         // Try largest shared object from pool with size < tensor_size.
109         pool_it--;
110         if (best_it == pool.end() ||
111             tensor_size - pool_it->object_size < size_diff) {
112           size_diff = tensor_size - pool_it->object_size;
113           best_it = pool_it;
114         }
115       }
116       // best_it can't be equal to pool.end(), because pool is not empty
117       if (best_it == pool.end()) {
118         return absl::InternalError(
119             "No shared object is found in non-empty pool in "
120             "GreedyInOrderAssignment.");
121       }
122     }
123     if (best_it == pool.end()) {
124       // No free shared object, creating a new one, assign i-th tensor to
125       // it and add to the queue of objects in use.
126       assignment->object_ids[i] = assignment->object_sizes.size();
127       assignment->object_sizes.push_back(tensor_size);
128       last_assigned_tensor.push_back(i);
129       objects_in_use.push(
130           {usage_records[i].last_task, assignment->object_ids[i]});
131     } else {
132       size_t shared_id = best_it->object_id;
133       pool.erase(best_it);
134       assignment->object_ids[i] = shared_id;
135       assignment->object_sizes[shared_id] =
136           std::max(assignment->object_sizes[shared_id], tensor_size);
137       last_assigned_tensor[shared_id] = i;
138       objects_in_use.push(
139           {usage_records[i].last_task, assignment->object_ids[i]});
140     }
141   }
142   return absl::OkStatus();
143 }
144 
145 // The same algorithm as above, but for multidimensional case. The only
146 // difference is that shared object dimensions can't be increased to be reused
147 // for tensor, that is larger (at least by one dimension).
148 template <typename TensorSizeT>
GreedyInOrderAssignmentMultidimensional(const std::vector<TensorUsageRecord<TensorSizeT>> & usage_records,ObjectsAssignment<TensorSizeT> * assignment)149 absl::Status GreedyInOrderAssignmentMultidimensional(
150     const std::vector<TensorUsageRecord<TensorSizeT>>& usage_records,
151     ObjectsAssignment<TensorSizeT>* assignment) {
152   size_t num_records = usage_records.size();
153   assignment->object_sizes.clear();
154   assignment->object_ids.assign(num_records, kNotAssigned);
155 
156   // Pool of free shared objects is unordered in multidimensional version of the
157   // algorithm.
158   std::list<size_t> pool;
159   // Queue of shared objects in use, ordered by their last_task.
160   std::priority_queue<QueueRecord> objects_in_use;
161   for (size_t i = 0; i < num_records; i++) {
162     // Pop from the queue and add to the pool all objects that are no longer
163     // in use at the time of execution of the first_task of i-th intermediate
164     // tensor.
165     while (!objects_in_use.empty() &&
166            objects_in_use.top().last_task < usage_records[i].first_task) {
167       auto object_id = objects_in_use.top().object_id;
168       pool.push_back(object_id);
169       objects_in_use.pop();
170     }
171     const TensorSizeT& tensor_size = usage_records[i].tensor_size;
172     auto best_it = pool.end();
173     size_t best_size_diff = 0;
174     // Find shared object from pool, that will waste the least possible
175     // amount of memory when reused for current tensor.
176     for (auto pool_it = pool.begin(); pool_it != pool.end(); ++pool_it) {
177       // Needed size of shared object to cover current tensor and all previous
178       // tensors assigned to it.
179       const TensorSizeT& shared_object_size =
180           assignment->object_sizes[*pool_it];
181       if (IsCoveringObject(shared_object_size, tensor_size)) {
182         // Prefer shared object that will waste less memory.
183         size_t size_diff = AbsDiffInElements(shared_object_size, tensor_size);
184         if (best_it == pool.end() || size_diff < best_size_diff) {
185           best_it = pool_it;
186           best_size_diff = size_diff;
187         }
188       }
189     }
190     if (best_it == pool.end()) {
191       // No free suitable shared object, creating a new one, assign i-th tensor
192       // to it and add to the queue of objects in use.
193       assignment->object_ids[i] = assignment->object_sizes.size();
194       assignment->object_sizes.push_back(tensor_size);
195       objects_in_use.push(
196           {usage_records[i].last_task, assignment->object_ids[i]});
197     } else {
198       size_t shared_id = *best_it;
199       pool.erase(best_it);
200       assignment->object_ids[i] = shared_id;
201       objects_in_use.push(
202           {usage_records[i].last_task, assignment->object_ids[i]});
203     }
204   }
205   return absl::OkStatus();
206 }
207 
208 }  // namespace gpu
209 }  // namespace tflite
210 
211 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEMORY_MANAGEMENT_GREEDY_IN_ORDER_ASSIGNMENT_H_
212