• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 
17 #include "backend/common/somas/somas_solver_alg.h"
18 
19 #include <algorithm>
20 #include <stack>
21 #include <utility>
22 
23 namespace mindspore {
24 namespace somas {
25 // offset picking heuristics
SmallestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)26 bool SmallestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
27   return a.first < b.first || (a.first == b.first && a.second < b.second);
28 }
LargestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)29 bool LargestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
30   return a.first > b.first || (a.first == b.first && a.second < b.second);
31 }
BestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)32 bool BestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
33   return a.second < b.second || (a.second == b.second && a.first < b.first);
34 }
WorstFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)35 bool WorstFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
36   return a.second > b.second || (a.second == b.second && a.first < b.first);
37 }
SharedObjects(FootPrint * p)38 size_t SharedObjects(FootPrint *p) { return p->Next()->getOffset(); }
SingleObject(FootPrint *)39 size_t SingleObject(FootPrint *) { return SIZE_MAX; }
40 
41 bool (*g_pBranching[kNumFittingTypes])(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) = {
42   BestFit, SmallestFit
43 #ifdef SOMAS_DEBUG
44   ,
45   LargestFit, WorstFit
46 #endif
47 };
48 size_t (*algorithm[kNumAlgorithmTypes])(FootPrint *p) = {SharedObjects, SingleObject};
49 
Result()50 size_t FootPrint::Result() {
51   std::shared_ptr<FootPrint> foot_print = shared_from_this();
52   size_t upperbound = 0;
53   uint32_t total_footprints = 0;
54   while (foot_print != nullptr) {
55     foot_print->printStats();
56 
57     upperbound = foot_print->getOffset();
58     foot_print = foot_print->Next();
59     total_footprints++;
60   }
61 
62   MS_LOG(DEBUG) << total_footprints << " footprints allocated";
63 
64   return upperbound;
65 }
66 
findFirst(vector<Interval> * interval_v,const BlockTensor & block,size_t * offset)67 bool FootPrint::findFirst(vector<Interval> *interval_v, const BlockTensor &block, size_t *offset) {
68   MS_EXCEPTION_IF_NULL(interval_v);
69   MS_EXCEPTION_IF_NULL(offset);
70   if (this->Next() == nullptr) {
71     *offset = this->getOffset();
72     return true;
73   }
74   sort((*interval_v).begin(), (*interval_v).end(),
75        [](Interval &i1, Interval &i2) { return (i1.lb() < i2.lb()) || (i1.lb() == i2.lb() && i1.ub() < i2.ub()); });
76 
77   bool bfound = false;
78   auto fit_func = g_pBranching[m_branching_strategy_];
79   pair<size_t, size_t> fit_ret;
80   Interval a;
81 
82   size_t block_size;
83   if (block.Alone()) {
84     block_size = block.m_size_;
85   } else {
86     block_size = block.m_start_tensor_->size_;  // consider only first tensor for contiguous block
87   }
88 
89   Interval top(m_offset_, m_offset_);
90   a.lb() = m_offset_;
91 
92   auto update_fit = [block_size, &fit_func](Interval a, bool &bfound, pair<size_t, size_t> &fit_ret) {
93     size_t gap;
94     gap = a.ub() - a.lb() - block_size;
95     auto fit_update = pair<size_t, size_t>(a.lb(), gap);
96     if (!bfound) {
97       bfound = true;
98       fit_ret = fit_update;
99     } else if (fit_func(fit_update, fit_ret)) {
100       fit_ret = fit_update;
101     }
102   };
103 
104   for (auto &b : *interval_v) {
105     if (top.ub() < b.lb()) {
106       a.ub() = b.lb();
107       if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
108         update_fit(a, bfound, fit_ret);
109       }
110       top = b;
111     } else if (top.ub() < b.ub()) {
112       top.ub() = b.ub();
113     }
114     a.lb() = top.ub();
115   }
116 
117   a.ub() = algorithm[m_algorithm_](this);
118   if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
119     update_fit(a, bfound, fit_ret);
120   }
121 
122   if (bfound) {
123     *offset = fit_ret.first;
124     m_foot_print_next_->m_offset_ = std::max(m_foot_print_next_->m_offset_, *offset + block.m_size_);
125   }
126 
127   return bfound;
128 }
129 
findOffset(const std::vector<VectorBitSet> * constraints,const BlockTensor & block,size_t * offset)130 bool FootPrint::findOffset(const std::vector<VectorBitSet> *constraints, const BlockTensor &block, size_t *offset) {
131   MS_EXCEPTION_IF_NULL(offset);
132   vector<Interval> l_interval;
133 
134   const size_t intervals_estimation = 1000;
135   l_interval.reserve(intervals_estimation * sizeof(Interval));
136 
137   *offset = m_offset_;
138 
139   // transform constrained tensors in non eligible intervals
140   if (block.Alone()) {
141     if (m_algorithm_ == static_cast<uint32_t>(kManyObjects) && m_starts_.size() > 0 && m_starts_[0]->Alone() &&
142         !(*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[0]->m_start_tensor_->index_)) {
143       return false;
144     }
145 
146     for (const auto &allocated_tensor_info : m_tensors_info_) {
147       if (!(*constraints)[block.m_start_tensor_->index_].IsBitTrue(allocated_tensor_info.index_)) {
148         l_interval.emplace_back(allocated_tensor_info.offset_,
149                                 allocated_tensor_info.offset_ + allocated_tensor_info.size_);
150       }
151     }
152   } else {
153     auto start_offset = static_cast<int64_t>(m_offset_);
154     int64_t accumulator = 0;
155     for (auto block_tensor = block.m_start_tensor_; block_tensor != nullptr; block_tensor = block_tensor->right_) {
156       for (const auto &allocated_tensor_info : m_tensors_info_) {
157         auto allocated_offset = static_cast<int64_t>(allocated_tensor_info.offset_);
158         auto allocated_size = static_cast<int64_t>(allocated_tensor_info.size_);
159         if (!(*constraints)[block_tensor->index_].IsBitTrue(allocated_tensor_info.index_)) {
160           int64_t start_first_contiguous = allocated_offset - accumulator - SizeToLong(block_tensor->size_);
161           int64_t end_first_contiguous = allocated_offset - accumulator + allocated_size;
162           if (start_first_contiguous > start_offset) {
163             l_interval.emplace_back(start_first_contiguous, end_first_contiguous);
164           } else {
165             if (end_first_contiguous > start_offset) {
166               l_interval.emplace_back(start_offset, end_first_contiguous);
167             }
168           }
169         }
170       }
171       accumulator += SizeToLong(block_tensor->size_);
172     }
173   }
174 
175   // merge non-eligible intervals and find a slot to allocate the tensor block
176   return findFirst(&l_interval, block, offset);
177 }
178 
addTensorsInfo(BlockTensor * elemIndex)179 void FootPrint::addTensorsInfo(BlockTensor *elemIndex) {
180   MS_EXCEPTION_IF_NULL(elemIndex);
181   m_starts_.push_back(elemIndex);
182   auto allocated_tensor = elemIndex->m_start_tensor_;
183   while (allocated_tensor != nullptr) {
184     m_tensors_info_.emplace_back(allocated_tensor);
185     allocated_tensor = allocated_tensor->right_;
186   }
187 }
188 
addElem(BlockTensor * block,const size_t & offset)189 void FootPrint::addElem(BlockTensor *block, const size_t &offset) {
190   MS_EXCEPTION_IF_NULL(block);
191   if (m_foot_print_next_ == nullptr) {
192     m_foot_print_next_ = std::make_shared<FootPrint>();
193     size_t newoffset = m_offset_ + block->m_size_;
194     m_foot_print_next_->setOffset(newoffset);
195     m_foot_print_next_->setAlignment(m_alignment_);
196     m_foot_print_next_->m_solId_ = m_solId_;
197     m_starts_.clear();
198     m_tensors_info_.clear();
199     MS_LOG(DEBUG) << "Creating footprint at offset: " << m_offset_;
200   }
201 
202   size_t offset1 = offset;
203   SomasSolverTensorDescPtr tensor = block->m_start_tensor_;
204   MS_EXCEPTION_IF_NULL(tensor);
205   MS_LOG(DEBUG) << "Allocating block: " << tensor->index_ << " in offset: " << offset;
206   auto sol_id = block->m_current_sol_;
207   if (block->offsets_.find(sol_id) != block->offsets_.end()) {
208     MS_LOG(WARNING) << "Warning addElem: Offset overwritten at solution " << sol_id << " for block "
209                     << block->m_start_tensor_->index_;
210   }
211   (void)block->offsets_.emplace(sol_id, offset);
212   while (tensor) {
213     tensor->offset_ = offset1;
214     offset1 += tensor->size_;
215 
216     MS_LOG(DEBUG) << tensor->index_ << " " << tensor->size_ << " " << tensor->offset_;
217     tensor = tensor->right_;
218   }
219   addTensorsInfo(block);
220 }
printStats()221 void FootPrint::printStats() {
222   MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_;
223 }
Eval(vector<BlockTensor> * block_tensors_v,const std::shared_ptr<FootPrint> & foot_print,const std::vector<VectorBitSet> * pConstraints)224 bool FastHeuristic::Eval(vector<BlockTensor> *block_tensors_v, const std::shared_ptr<FootPrint> &foot_print,
225                          const std::vector<VectorBitSet> *pConstraints) {
226   MS_EXCEPTION_IF_NULL(foot_print);
227   auto start = std::chrono::system_clock::now();
228 
229   std::shared_ptr<FootPrint> p = foot_print;
230   bool bpushed = false;
231   size_t offset = foot_print->getOffset();
232   m_tensors_allocated_ = 0;
233   SomasSolverTensorDescPtr tensor = nullptr;
234 
235   for (auto &block : *block_tensors_v) {
236     if (!block.m_bre_allocate_) {
237       offset = block.m_start_tensor_->offset_;
238       auto aux_id = foot_print->m_solId_;
239       auto aux_offset = block.m_start_tensor_->offset_;
240       if (block.offsets_.find(aux_id) != block.offsets_.end()) {
241         MS_LOG(WARNING) << "Warning: Offset overwritten at solution " << aux_id << " for block "
242                         << block.m_start_tensor_->index_;
243       }
244       (void)block.offsets_.emplace(aux_id, aux_offset);
245       continue;
246     }
247     bpushed = false;
248     p = foot_print;
249     block.m_current_sol_ = foot_print->m_solId_;
250     while (!bpushed) {
251       if (p->findOffset(pConstraints, block, &offset)) {
252         p->addElem(&block, offset);
253         tensor = block.m_start_tensor_;
254         while (tensor) {
255           m_tensors_allocated_++;
256           tensor = tensor->right_;
257         }
258         bpushed = true;
259         break;
260       }
261       // go to the next footprint slot
262       if (p->Next() != nullptr) {
263         p = p->Next();
264       } else if (bpushed == false) {  // something went wrong
265         MS_LOG(WARNING) << "Internal Error: Could not allocate memory for tensor: " << tensor->index_;
266         return false;
267       }
268     }
269   }
270 
271   MS_LOG(DEBUG)
272     << "\nElapsed time of Fast Heuristic search: "
273     << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
274   return true;
275 }
276 }  // namespace somas
277 }  // namespace mindspore
278