1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "backend/common/somas/somas_solver_alg.h"
18
19 #include <algorithm>
20 #include <stack>
21 #include <utility>
22
23 namespace mindspore {
24 namespace somas {
25 // offset picking heuristics
SmallestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)26 bool SmallestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
27 return a.first < b.first || (a.first == b.first && a.second < b.second);
28 }
LargestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)29 bool LargestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
30 return a.first > b.first || (a.first == b.first && a.second < b.second);
31 }
BestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)32 bool BestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
33 return a.second < b.second || (a.second == b.second && a.first < b.first);
34 }
WorstFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)35 bool WorstFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
36 return a.second > b.second || (a.second == b.second && a.first < b.first);
37 }
SharedObjects(FootPrint * p)38 size_t SharedObjects(FootPrint *p) { return p->Next()->getOffset(); }
SingleObject(FootPrint *)39 size_t SingleObject(FootPrint *) { return SIZE_MAX; }
40
41 bool (*g_pBranching[kNumFittingTypes])(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) = {
42 BestFit, SmallestFit
43 #ifdef SOMAS_DEBUG
44 ,
45 LargestFit, WorstFit
46 #endif
47 };
48 size_t (*algorithm[kNumAlgorithmTypes])(FootPrint *p) = {SharedObjects, SingleObject};
49
Result()50 size_t FootPrint::Result() {
51 std::shared_ptr<FootPrint> foot_print = shared_from_this();
52 size_t upperbound = 0;
53 uint32_t total_footprints = 0;
54 while (foot_print != nullptr) {
55 foot_print->printStats();
56
57 upperbound = foot_print->getOffset();
58 foot_print = foot_print->Next();
59 total_footprints++;
60 }
61
62 MS_LOG(DEBUG) << total_footprints << " footprints allocated";
63
64 return upperbound;
65 }
66
findFirst(vector<Interval> * interval_v,const BlockTensor & block,size_t * offset)67 bool FootPrint::findFirst(vector<Interval> *interval_v, const BlockTensor &block, size_t *offset) {
68 MS_EXCEPTION_IF_NULL(interval_v);
69 MS_EXCEPTION_IF_NULL(offset);
70 if (this->Next() == nullptr) {
71 *offset = this->getOffset();
72 return true;
73 }
74 sort((*interval_v).begin(), (*interval_v).end(),
75 [](Interval &i1, Interval &i2) { return (i1.lb() < i2.lb()) || (i1.lb() == i2.lb() && i1.ub() < i2.ub()); });
76
77 bool bfound = false;
78 auto fit_func = g_pBranching[m_branching_strategy_];
79 pair<size_t, size_t> fit_ret;
80 Interval a;
81
82 size_t block_size;
83 if (block.Alone()) {
84 block_size = block.m_size_;
85 } else {
86 block_size = block.m_start_tensor_->size_; // consider only first tensor for contiguous block
87 }
88
89 Interval top(m_offset_, m_offset_);
90 a.lb() = m_offset_;
91
92 auto update_fit = [block_size, &fit_func](Interval a, bool &bfound, pair<size_t, size_t> &fit_ret) {
93 size_t gap;
94 gap = a.ub() - a.lb() - block_size;
95 auto fit_update = pair<size_t, size_t>(a.lb(), gap);
96 if (!bfound) {
97 bfound = true;
98 fit_ret = fit_update;
99 } else if (fit_func(fit_update, fit_ret)) {
100 fit_ret = fit_update;
101 }
102 };
103
104 for (auto &b : *interval_v) {
105 if (top.ub() < b.lb()) {
106 a.ub() = b.lb();
107 if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
108 update_fit(a, bfound, fit_ret);
109 }
110 top = b;
111 } else if (top.ub() < b.ub()) {
112 top.ub() = b.ub();
113 }
114 a.lb() = top.ub();
115 }
116
117 a.ub() = algorithm[m_algorithm_](this);
118 if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
119 update_fit(a, bfound, fit_ret);
120 }
121
122 if (bfound) {
123 *offset = fit_ret.first;
124 m_foot_print_next_->m_offset_ = std::max(m_foot_print_next_->m_offset_, *offset + block.m_size_);
125 }
126
127 return bfound;
128 }
129
findOffset(const std::vector<VectorBitSet> * constraints,const BlockTensor & block,size_t * offset)130 bool FootPrint::findOffset(const std::vector<VectorBitSet> *constraints, const BlockTensor &block, size_t *offset) {
131 MS_EXCEPTION_IF_NULL(offset);
132 vector<Interval> l_interval;
133
134 const size_t intervals_estimation = 1000;
135 l_interval.reserve(intervals_estimation * sizeof(Interval));
136
137 *offset = m_offset_;
138
139 // transform constrained tensors in non eligible intervals
140 if (block.Alone()) {
141 if (m_algorithm_ == static_cast<uint32_t>(kManyObjects) && m_starts_.size() > 0 && m_starts_[0]->Alone() &&
142 !(*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[0]->m_start_tensor_->index_)) {
143 return false;
144 }
145
146 for (const auto &allocated_tensor_info : m_tensors_info_) {
147 if (!(*constraints)[block.m_start_tensor_->index_].IsBitTrue(allocated_tensor_info.index_)) {
148 l_interval.emplace_back(allocated_tensor_info.offset_,
149 allocated_tensor_info.offset_ + allocated_tensor_info.size_);
150 }
151 }
152 } else {
153 auto start_offset = static_cast<int64_t>(m_offset_);
154 int64_t accumulator = 0;
155 for (auto block_tensor = block.m_start_tensor_; block_tensor != nullptr; block_tensor = block_tensor->right_) {
156 for (const auto &allocated_tensor_info : m_tensors_info_) {
157 auto allocated_offset = static_cast<int64_t>(allocated_tensor_info.offset_);
158 auto allocated_size = static_cast<int64_t>(allocated_tensor_info.size_);
159 if (!(*constraints)[block_tensor->index_].IsBitTrue(allocated_tensor_info.index_)) {
160 int64_t start_first_contiguous = allocated_offset - accumulator - SizeToLong(block_tensor->size_);
161 int64_t end_first_contiguous = allocated_offset - accumulator + allocated_size;
162 if (start_first_contiguous > start_offset) {
163 l_interval.emplace_back(start_first_contiguous, end_first_contiguous);
164 } else {
165 if (end_first_contiguous > start_offset) {
166 l_interval.emplace_back(start_offset, end_first_contiguous);
167 }
168 }
169 }
170 }
171 accumulator += SizeToLong(block_tensor->size_);
172 }
173 }
174
175 // merge non-eligible intervals and find a slot to allocate the tensor block
176 return findFirst(&l_interval, block, offset);
177 }
178
addTensorsInfo(BlockTensor * elemIndex)179 void FootPrint::addTensorsInfo(BlockTensor *elemIndex) {
180 MS_EXCEPTION_IF_NULL(elemIndex);
181 m_starts_.push_back(elemIndex);
182 auto allocated_tensor = elemIndex->m_start_tensor_;
183 while (allocated_tensor != nullptr) {
184 m_tensors_info_.emplace_back(allocated_tensor);
185 allocated_tensor = allocated_tensor->right_;
186 }
187 }
188
addElem(BlockTensor * block,const size_t & offset)189 void FootPrint::addElem(BlockTensor *block, const size_t &offset) {
190 MS_EXCEPTION_IF_NULL(block);
191 if (m_foot_print_next_ == nullptr) {
192 m_foot_print_next_ = std::make_shared<FootPrint>();
193 size_t newoffset = m_offset_ + block->m_size_;
194 m_foot_print_next_->setOffset(newoffset);
195 m_foot_print_next_->setAlignment(m_alignment_);
196 m_foot_print_next_->m_solId_ = m_solId_;
197 m_starts_.clear();
198 m_tensors_info_.clear();
199 MS_LOG(DEBUG) << "Creating footprint at offset: " << m_offset_;
200 }
201
202 size_t offset1 = offset;
203 SomasSolverTensorDescPtr tensor = block->m_start_tensor_;
204 MS_EXCEPTION_IF_NULL(tensor);
205 MS_LOG(DEBUG) << "Allocating block: " << tensor->index_ << " in offset: " << offset;
206 auto sol_id = block->m_current_sol_;
207 if (block->offsets_.find(sol_id) != block->offsets_.end()) {
208 MS_LOG(WARNING) << "Warning addElem: Offset overwritten at solution " << sol_id << " for block "
209 << block->m_start_tensor_->index_;
210 }
211 (void)block->offsets_.emplace(sol_id, offset);
212 while (tensor) {
213 tensor->offset_ = offset1;
214 offset1 += tensor->size_;
215
216 MS_LOG(DEBUG) << tensor->index_ << " " << tensor->size_ << " " << tensor->offset_;
217 tensor = tensor->right_;
218 }
219 addTensorsInfo(block);
220 }
printStats()221 void FootPrint::printStats() {
222 MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_;
223 }
Eval(vector<BlockTensor> * block_tensors_v,const std::shared_ptr<FootPrint> & foot_print,const std::vector<VectorBitSet> * pConstraints)224 bool FastHeuristic::Eval(vector<BlockTensor> *block_tensors_v, const std::shared_ptr<FootPrint> &foot_print,
225 const std::vector<VectorBitSet> *pConstraints) {
226 MS_EXCEPTION_IF_NULL(foot_print);
227 auto start = std::chrono::system_clock::now();
228
229 std::shared_ptr<FootPrint> p = foot_print;
230 bool bpushed = false;
231 size_t offset = foot_print->getOffset();
232 m_tensors_allocated_ = 0;
233 SomasSolverTensorDescPtr tensor = nullptr;
234
235 for (auto &block : *block_tensors_v) {
236 if (!block.m_bre_allocate_) {
237 offset = block.m_start_tensor_->offset_;
238 auto aux_id = foot_print->m_solId_;
239 auto aux_offset = block.m_start_tensor_->offset_;
240 if (block.offsets_.find(aux_id) != block.offsets_.end()) {
241 MS_LOG(WARNING) << "Warning: Offset overwritten at solution " << aux_id << " for block "
242 << block.m_start_tensor_->index_;
243 }
244 (void)block.offsets_.emplace(aux_id, aux_offset);
245 continue;
246 }
247 bpushed = false;
248 p = foot_print;
249 block.m_current_sol_ = foot_print->m_solId_;
250 while (!bpushed) {
251 if (p->findOffset(pConstraints, block, &offset)) {
252 p->addElem(&block, offset);
253 tensor = block.m_start_tensor_;
254 while (tensor) {
255 m_tensors_allocated_++;
256 tensor = tensor->right_;
257 }
258 bpushed = true;
259 break;
260 }
261 // go to the next footprint slot
262 if (p->Next() != nullptr) {
263 p = p->Next();
264 } else if (bpushed == false) { // something went wrong
265 MS_LOG(WARNING) << "Internal Error: Could not allocate memory for tensor: " << tensor->index_;
266 return false;
267 }
268 }
269 }
270
271 MS_LOG(DEBUG)
272 << "\nElapsed time of Fast Heuristic search: "
273 << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
274 return true;
275 }
276 } // namespace somas
277 } // namespace mindspore
278