• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 
17 #include "backend/optimizer/somas/somas_solver_alg.h"
18 
19 #include <algorithm>
20 #include <stack>
21 #include <utility>
22 
23 namespace mindspore {
24 namespace somas {
25 // offset picking heuristics
SmallestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)26 bool SmallestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
27   return a.first < b.first || (a.first == b.first && a.second < b.second);
28 }
LargestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)29 bool LargestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
30   return a.first > b.first || (a.first == b.first && a.second < b.second);
31 }
BestFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)32 bool BestFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
33   return a.second < b.second || (a.second == b.second && a.first < b.first);
34 }
WorstFit(const pair<size_t,size_t> & a,const pair<size_t,size_t> & b)35 bool WorstFit(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) {
36   return a.second > b.second || (a.second == b.second && a.first < b.first);
37 }
SharedObjects(FootPrint * p)38 size_t SharedObjects(FootPrint *p) { return p->Next()->getOffset(); }
SingleObject(FootPrint * p)39 size_t SingleObject(FootPrint *p) { return SIZE_MAX; }
40 
41 bool (*g_pBranching[kNumFittingTypes])(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b) = {
42   BestFit, SmallestFit
43 #ifdef SOMAS_DEBUG
44   ,
45   LargestFit, WorstFit
46 #endif
47 };
48 size_t (*algorithm[kNumAlgorithmTypes])(FootPrint *p) = {SharedObjects, SingleObject};
49 
Result()50 size_t FootPrint::Result() {
51   std::shared_ptr<FootPrint> foot_print = shared_from_this();
52   size_t upperbound = 0;
53   uint32_t total_footprints = 0;
54   while (foot_print != nullptr) {
55     foot_print->printStats();
56 
57     upperbound = foot_print->getOffset();
58     foot_print = foot_print->Next();
59     total_footprints++;
60   }
61 
62   MS_LOG(DEBUG) << total_footprints << " footprints allocated";
63 
64   return upperbound;
65 }
findFirst(stack<Interval> * merged,const BlockTensor & block,size_t * offset)66 bool FootPrint::findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset) {
67   MS_EXCEPTION_IF_NULL(merged);
68   MS_EXCEPTION_IF_NULL(offset);
69   bool bfound = false;
70   std::set<pair<size_t, size_t>, bool (*)(const pair<size_t, size_t> &a, const pair<size_t, size_t> &b)>
71     offsetcandidates(g_pBranching[m_branching_strategy_]);
72   size_t gap;
73 
74   Interval a;
75   Interval it;
76 
77   size_t block_size;
78   if (block.Alone()) {
79     block_size = block.m_size_;
80   } else {
81     block_size = block.m_start_tensor_->size_;  // consider only first tensor for contiguous block
82   }
83 
84   a.ub() = algorithm[m_algorithm_](this);
85   while (!(*merged).empty()) {
86     it = (*merged).top();
87     (*merged).pop();
88     a.lb() = it.ub();
89     if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
90       gap = a.ub() - a.lb() - block_size;
91       offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap));
92     }
93     a.ub() = it.lb();
94   }
95   a.lb() = m_offset_;
96   if (a.contains(block_size) && a.lb() + block.m_size_ <= algorithm[m_algorithm_](this)) {
97     gap = a.ub() - a.lb() - block_size;
98     offsetcandidates.emplace(pair<size_t, size_t>(a.lb(), gap));
99   }
100 
101   if (offsetcandidates.size() > 0) {
102     *offset = (*offsetcandidates.begin()).first;
103     m_foot_print_next_->m_offset_ = std::max(m_foot_print_next_->m_offset_, *offset + block.m_size_);
104     bfound = true;
105   }
106 
107   return bfound;
108 }
Merge(vector<Interval> * interval_v,stack<Interval> * s)109 void FootPrint::Merge(vector<Interval> *interval_v, stack<Interval> *s) {
110   MS_EXCEPTION_IF_NULL(s);
111   MS_EXCEPTION_IF_NULL(interval_v);
112   sort((*interval_v).begin(), (*interval_v).end(),
113        [](Interval &i1, Interval &i2) { return (i1.lb() < i2.lb()) || (i1.lb() == i2.lb() && i1.ub() < i2.ub()); });
114   (*s).push((*interval_v)[0]);
115 
116   for (size_t i = 1; i < (*interval_v).size(); i++) {
117     Interval &top = (*s).top();
118     Interval &b = (*interval_v)[i];
119     if (top.ub() < b.lb())
120       (*s).push(b);
121 
122     else if (top.ub() < b.ub())
123       top.ub() = b.ub();
124   }
125 
126   return;
127 }
findOffset(const std::vector<DynamicBitSet> * constraints,const BlockTensor & block,size_t * offset)128 bool FootPrint::findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset) {
129   MS_EXCEPTION_IF_NULL(offset);
130   bool bretval = true;
131   vector<Interval> l_interval;
132 
133   const size_t intervals_estimation = 1000;
134   l_interval.reserve(intervals_estimation * sizeof(Interval));
135 
136   *offset = m_offset_;
137 
138   // transform constrained tensors in non eligible intervals
139   if (block.Alone()) {
140     if (m_algorithm_ == kManyObjects && m_starts_.size() > 0 && m_starts_[0]->Alone() &&
141         (*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[0]->m_start_tensor_->index_) == false) {
142       return false;
143     }
144     for (size_t i = 0; i < m_starts_.size(); i++) {
145       auto allocated_tensor = m_starts_[i]->m_start_tensor_;
146       while (allocated_tensor != nullptr) {
147         if ((*constraints)[block.m_start_tensor_->index_].IsBitTrue(allocated_tensor->index_) == false) {
148           l_interval.emplace_back(Interval(allocated_tensor));
149         }
150         allocated_tensor = allocated_tensor->right_;
151       }
152     }
153   } else {
154     int64_t start_offset = static_cast<int64_t>(m_offset_);
155     for (size_t i = 0; i < m_starts_.size(); i++) {
156       auto allocated_tensor = m_starts_[i]->m_start_tensor_;
157       while (allocated_tensor != nullptr) {
158         int64_t allocated_offset = static_cast<int64_t>(allocated_tensor->offset_);
159         int64_t allocated_size = static_cast<int64_t>(allocated_tensor->size_);
160         int64_t accumulator = 0;
161         for (auto block_tensor = block.m_start_tensor_; block_tensor != nullptr; block_tensor = block_tensor->right_) {
162           if ((*constraints)[block_tensor->index_].IsBitTrue(allocated_tensor->index_) == false) {
163             int64_t start_first_contiguous = allocated_offset - accumulator - SizeToLong(block_tensor->size_);
164             int64_t end_first_contiguous = allocated_offset - accumulator + allocated_size;
165             if (start_first_contiguous > start_offset) {
166               l_interval.emplace_back(Interval(start_first_contiguous, end_first_contiguous));
167             } else {
168               if (end_first_contiguous > start_offset) {
169                 l_interval.emplace_back(Interval(start_offset, end_first_contiguous));
170               }
171             }
172           }
173           accumulator += SizeToLong(block_tensor->size_);
174         }
175         allocated_tensor = allocated_tensor->right_;
176       }
177     }
178   }
179 
180   // merge non-eligible intervals and find a slot to allocate the tensor block
181   if (!l_interval.empty()) {
182     stack<Interval> l_mergedIntervals;
183     Merge(&l_interval, &l_mergedIntervals);
184     bretval = findFirst(&l_mergedIntervals, block, offset);
185   }
186 
187   return bretval;
188 }
addElem(BlockTensor * block,const size_t & offset)189 void FootPrint::addElem(BlockTensor *block, const size_t &offset) {
190   if (m_foot_print_next_ == nullptr) {
191     m_foot_print_next_ = std::make_shared<FootPrint>();
192     size_t newoffset = m_offset_ + block->m_size_;
193     m_foot_print_next_->setOffset(newoffset);
194     m_foot_print_next_->setAlignment(m_alignment_);
195     m_foot_print_next_->m_solId_ = m_solId_;
196     m_starts_.clear();
197     MS_LOG(DEBUG) << "Creating footprint at offset: " << m_offset_;
198   }
199 
200   addStart(block);
201   size_t offset1 = offset;
202   SomasSolverTensorDescPtr tensor = block->m_start_tensor_;
203   MS_LOG(DEBUG) << "Allocating block: " << tensor->index_ << " in offset: " << offset;
204   pair<uint32_t, size_t> sol_offset;
205   sol_offset.first = block->m_current_sol_;
206   sol_offset.second = offset;
207   if (block->offsets_.count(sol_offset.first))
208     MS_LOG(WARNING) << "Warning addElem: Offset overwritten at solution " << block->m_current_sol_ << " for block "
209                     << block->m_start_tensor_->index_;
210   block->offsets_.insert(sol_offset);
211   while (tensor) {
212     tensor->offset_ = offset1;
213     offset1 += tensor->size_;
214 
215     MS_LOG(DEBUG) << tensor->index_ << " " << tensor->size_ << " " << tensor->offset_;
216     tensor = tensor->right_;
217   }
218 }
printStats()219 void FootPrint::printStats() {
220   MS_LOG(DEBUG) << "Footprint blocks: " << m_starts_.size() << " \toffset: " << m_offset_;
221 }
Eval(vector<BlockTensor> * block_tensors_v,const std::shared_ptr<FootPrint> & foot_print,const std::vector<DynamicBitSet> * pConstraints)222 bool FastHeuristic::Eval(vector<BlockTensor> *block_tensors_v, const std::shared_ptr<FootPrint> &foot_print,
223                          const std::vector<DynamicBitSet> *pConstraints) {
224   MS_EXCEPTION_IF_NULL(foot_print);
225   auto start = std::chrono::system_clock::now();
226 
227   std::shared_ptr<FootPrint> p = foot_print;
228   bool bpushed = false;
229   uint32_t startscount = 0;
230   size_t offset = foot_print->getOffset();
231   m_tensors_allocated_ = 0;
232   SomasSolverTensorDescPtr tensor = nullptr;
233 
234   for (auto &block : *block_tensors_v) {
235     if (!block.m_bre_allocate_) {
236       offset = block.m_start_tensor_->offset_;
237       pair<uint32_t, size_t> aux;
238       aux.first = foot_print->m_solId_;
239       aux.second = block.m_start_tensor_->offset_;
240       if (block.offsets_.count(aux.first)) {
241         MS_LOG(WARNING) << "Warning: Offset overwritten at solution " << aux.first << " for block "
242                         << block.m_start_tensor_->index_;
243       }
244       block.offsets_.insert(aux);
245       continue;
246     }
247     bpushed = false;
248     p = foot_print;
249     block.m_current_sol_ = foot_print->m_solId_;
250     while (!bpushed) {
251       if (p->findOffset(pConstraints, block, &offset)) {
252         p->addElem(&block, offset);
253         startscount++;
254         tensor = block.m_start_tensor_;
255         while (tensor) {
256           m_tensors_allocated_++;
257           tensor = tensor->right_;
258         }
259         bpushed = true;
260         break;
261       }
262       // go to the next footprint slot
263       if (p->Next() != nullptr) {
264         p = p->Next();
265       } else if (bpushed == false) {  // something went wrong
266         MS_LOG(WARNING) << "Could not allocate memory for tensor: " << tensor->index_;
267         return false;
268       }
269     }
270   }
271 
272   MS_LOG(DEBUG)
273     << "\nElapsed time of Fast Heuristic search: "
274     << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
275   return true;
276 }
277 }  // namespace somas
278 }  // namespace mindspore
279