1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "backend/optimizer/somas/somas_solver_alg.h"
18
19 #include <algorithm>
20 #include <stack>
21 #include <utility>
22
23 namespace mindspore {
24 namespace somas {
25 // offset picking heuristics
SmallestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)26 bool SmallestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
27 return a.first < b.first || (a.first == b.first && a.second < b.second);
28 }
LargestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)29 bool LargestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
30 return a.first > b.first || (a.first == b.first && a.second < b.second);
31 }
BestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)32 bool BestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
33 return a.second < b.second || (a.second == b.second && a.first < b.first);
34 }
WorstFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)35 bool WorstFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
36 return a.second > b.second || (a.second == b.second && a.first < b.first);
37 }
SharedObjects(FootPrint * p)38 size_t SharedObjects(FootPrint *p) { return p->Next()->getOffset(); }
SingleObject(FootPrint * p)39 size_t SingleObject(FootPrint *p) { return SIZE_MAX; }
40
41 bool (*g_pBranching[kNumFittingTypes])(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) = {
42 BestFit, SmallestFit
43 #ifdef SOMAS_DEBUG
44 ,
45 LargestFit, WorstFit
46 #endif
47 };
48 size_t (*algorithm[kNumAlgorithmTypes])(FootPrint *p) = {SharedObjects, SingleObject};
49
Result()50 size_t FootPrint::Result() {
51 std::shared_ptr<FootPrint> foot_print = shared_from_this();
52 size_t upperbound = 0;
53 uint32_t total_footprints = 0;
54 while (foot_print != nullptr) {
55 foot_print->printStats();
56
57 upperbound = foot_print->getOffset();
58 foot_print = foot_print->Next();
59 total_footprints++;
60 }
61
62 MS_LOG(DEBUG) << total_footprints << " footprints allocated";
63
64 return upperbound;
65 }
findFirst(stack<Interval> * merged,const BlockTensor & block,size_t * offset)66 bool FootPrint::findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset) {
67 MS_EXCEPTION_IF_NULL(merged);
68 MS_EXCEPTION_IF_NULL(offset);
69 bool bfound = false;
70 std::set<pair<size_t, size_t>, bool (*)(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b)>
71 offsetcandidates(g_pBranching[m_branching_strategy_]);
72 size_t gap;
73
74 Interval a;
75 Interval it;
76
77 size_t block_size;
78 if (block.Alone()) {
79 block_size = block.m_size_;
80 } else {
81 block_size = block.m_start_tensor_->size_; // consider only first tensor for contiguous block
82 }
83
84 a.ub() = algorithm[m_algorithm_](this);
85 while (!(*merged).empty()) {
86 it = (*merged).top();
87 (*merged).pop();
88 a.lb() = it.ub();
89 if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
90 gap = a.ub() - a.lb() - block_size;
91 offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap));
92 }
93 a.ub() = it.lb();
94 }
95 a.lb() = m_offset_;
96 if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
97 gap = a.ub() - a.lb() - block_size;
98 offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap));
99 }
100
101 if (offsetcandidates.size() > 0) {
102 *offset = (*offsetcandidates.begin()).first;
103 m_foot_print_next_->m_offset_ = std::max(m_foot_print_next_->m_offset_, *offset + block.m_size_);
104 bfound = true;
105 }
106
107 return bfound;
108 }
Merge(vector<Interval> * interval_v,stack<Interval> * s)109 void FootPrint::Merge(vector<Interval> *interval_v, stack<Interval> *s) {
110 MS_EXCEPTION_IF_NULL(s);
111 MS_EXCEPTION_IF_NULL(interval_v);
112 sort((*interval_v).begin(), (*interval_v).end(),
113 [](Interval &i1, Interval &i2) { return (i1.lb() < i2.lb()) || (i1.lb() == i2.lb() && i1.ub() < i2.ub()); });
114 (*s).push((*interval_v)[0]);
115
116 for (size_t i = 1; i < (*interval_v).size(); i++) {
117 Interval &top = (*s).top();
118 Interval &b = (*interval_v)[i];
119 if (top.ub() < b.lb())
120 (*s).push(b);
121
122 else if (top.ub() < b.ub())
123 top.ub() = b.ub();
124 }
125
126 return;
127 }
findOffset(const std::vector<DynamicBitSet> * constraints,const BlockTensor & block,size_t * offset)128 bool FootPrint::findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset) {
129 MS_EXCEPTION_IF_NULL(offset);
130 bool bretval = true;
131 vector<Interval> l_interval;
132
133 const size_t intervals_estimation = 1000;
134 l_interval.reserve(intervals_estimation * sizeof(Interval));
135
136 *offset = m_offset_;
137
138 // transform constrained tensors in non eligible intervals
139 if (block.Alone()) {
140 if (m_algorithm_ == kManyObjects && m_starts_.size() > 0 && m_starts_[0]->Alone() &&
141 (*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[0]->m_start_tensor_->index_) == false) {
142 return false;
143 }
144 for (size_t i = 0; i < m_starts_.size(); i++) {
145 auto allocated_tensor = m_starts_[i]->m_start_tensor_;
146 while (allocated_tensor != nullptr) {
147 if ((*constraints)[block.m_start_tensor_->index_].IsBitTrue(allocated_tensor->index_) == false) {
148 l_interval.emplace_back(Interval(allocated_tensor));
149 }
150 allocated_tensor = allocated_tensor->right_;
151 }
152 }
153 } else {
154 int64_t start_offset = static_cast<int64_t>(m_offset_);
155 for (size_t i = 0; i < m_starts_.size(); i++) {
156 auto allocated_tensor = m_starts_[i]->m_start_tensor_;
157 while (allocated_tensor != nullptr) {
158 int64_t allocated_offset = static_cast<int64_t>(allocated_tensor->offset_);
159 int64_t allocated_size = static_cast<int64_t>(allocated_tensor->size_);
160 int64_t accumulator = 0;
161 for (auto block_tensor = block.m_start_tensor_; block_tensor != nullptr; block_tensor = block_tensor->right_) {
162 if ((*constraints)[block_tensor->index_].IsBitTrue(allocated_tensor->index_) == false) {
163 int64_t start_first_contiguous = allocated_offset - accumulator - SizeToLong(block_tensor->size_);
164 int64_t end_first_contiguous = allocated_offset - accumulator + allocated_size;
165 if (start_first_contiguous > start_offset) {
166 l_interval.emplace_back(Interval(start_first_contiguous, end_first_contiguous));
167 } else {
168 if (end_first_contiguous > start_offset) {
169 l_interval.emplace_back(Interval(start_offset, end_first_contiguous));
170 }
171 }
172 }
173 accumulator += SizeToLong(block_tensor->size_);
174 }
175 allocated_tensor = allocated_tensor->right_;
176 }
177 }
178 }
179
180 // merge non-eligible intervals and find a slot to allocate the tensor block
181 if (!l_interval.empty()) {
182 stack<Interval> l_mergedIntervals;
183 Merge(&l_interval, &l_mergedIntervals);
184 bretval = findFirst(&l_mergedIntervals, block, offset);
185 }
186
187 return bretval;
188 }
addElem(BlockTensor * block,const size_t & offset)189 void FootPrint::addElem(BlockTensor *block, const size_t &offset) {
190 if (m_foot_print_next_ == nullptr) {
191 m_foot_print_next_ = std::make_shared<FootPrint>();
192 size_t newoffset = m_offset_ + block->m_size_;
193 m_foot_print_next_->setOffset(newoffset);
194 m_foot_print_next_->setAlignment(m_alignment_);
195 m_foot_print_next_->m_solId_ = m_solId_;
196 m_starts_.clear();
197 MS_LOG(DEBUG) << "Creating footprint at offset: " << m_offset_;
198 }
199
200 addStart(block);
201 size_t offset1 = offset;
202 SomasSolverTensorDescPtr tensor = block->m_start_tensor_;
203 MS_LOG(DEBUG) << "Allocating block: " << tensor->index_ << " in offset: " << offset;
204 pair<uint32_t, size_t> sol_offset;
205 sol_offset.first = block->m_current_sol_;
206 sol_offset.second = offset;
207 if (block->offsets_.count(sol_offset.first))
208 MS_LOG(WARNING) << "Warning addElem: Offset overwritten at solution " << block->m_current_sol_ << " for block "
209 << block->m_start_tensor_->index_;
210 block->offsets_.insert(sol_offset);
211 while (tensor) {
212 tensor->offset_ = offset1;
213 offset1 += tensor->size_;
214
215 MS_LOG(DEBUG) << tensor->index_ << " " << tensor->size_ << " " << tensor->offset_;
216 tensor = tensor->right_;
217 }
218 }
printStats()219 void FootPrint::printStats() {
220 MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_;
221 }
Eval(vector<BlockTensor> * block_tensors_v,const std::shared_ptr<FootPrint> & foot_print,const std::vector<DynamicBitSet> * pConstraints)222 bool FastHeuristic::Eval(vector<BlockTensor> *block_tensors_v, const std::shared_ptr<FootPrint> &foot_print,
223 const std::vector<DynamicBitSet> *pConstraints) {
224 MS_EXCEPTION_IF_NULL(foot_print);
225 auto start = std::chrono::system_clock::now();
226
227 std::shared_ptr<FootPrint> p = foot_print;
228 bool bpushed = false;
229 uint32_t startscount = 0;
230 size_t offset = foot_print->getOffset();
231 m_tensors_allocated_ = 0;
232 SomasSolverTensorDescPtr tensor = nullptr;
233
234 for (auto &block : *block_tensors_v) {
235 if (!block.m_bre_allocate_) {
236 offset = block.m_start_tensor_->offset_;
237 pair<uint32_t, size_t> aux;
238 aux.first = foot_print->m_solId_;
239 aux.second = block.m_start_tensor_->offset_;
240 if (block.offsets_.count(aux.first)) {
241 MS_LOG(WARNING) << "Warning: Offset overwritten at solution " << aux.first << " for block "
242 << block.m_start_tensor_->index_;
243 }
244 block.offsets_.insert(aux);
245 continue;
246 }
247 bpushed = false;
248 p = foot_print;
249 block.m_current_sol_ = foot_print->m_solId_;
250 while (!bpushed) {
251 if (p->findOffset(pConstraints, block, &offset)) {
252 p->addElem(&block, offset);
253 startscount++;
254 tensor = block.m_start_tensor_;
255 while (tensor) {
256 m_tensors_allocated_++;
257 tensor = tensor->right_;
258 }
259 bpushed = true;
260 break;
261 }
262 // go to the next footprint slot
263 if (p->Next() != nullptr) {
264 p = p->Next();
265 } else if (bpushed == false) { // something went wrong
266 MS_LOG(WARNING) << "Could not allocate memory for tensor: " << tensor->index_;
267 return false;
268 }
269 }
270 }
271
272 MS_LOG(DEBUG)
273 << "\nElapsed time of Fast Heuristic search: "
274 << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
275 return true;
276 }
277 } // namespace somas
278 } // namespace mindspore
279