• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 
17 #include <algorithm>
18 #include <chrono>
19 #include <cstdio>
20 #include <ctime>
21 #include <memory>
22 #include <string>
23 #include <unordered_map>
24 #include <vector>
25 #include <map>
26 #include "backend/optimizer/somas/somas_solver_alg.h"
27 #include "backend/optimizer/somas/somas_solver_core.h"
28 #include "backend/optimizer/somas/somas_solver_pre.h"
29 
30 using std::sort;
31 using std::unordered_map;
32 using std::vector;
33 
34 namespace mindspore {
35 namespace somas {
MemoryAllocationSolver()36 Status SomasSolverCore::MemoryAllocationSolver() {
37   auto start = std::chrono::system_clock::now();
38   Status retval = SUCCESS;
39   size_t best = SIZE_MAX;
40   size_t best_timing = SIZE_MAX;
41   if (all_) {  // loop over all heuristics
42     FittingType best_branching = kBest;
43     SortingType best_sorting = kGreaterSizeSmallerIndex;
44     AlgorithmType best_algorithm = kManyObjects;
45     uint32_t best_sol = 0;
46     size_t worst = 0;
47     BuildBlocks();
48     Clean();
49     MS_LOG(INFO) << "time\tSol#\tResult\t\t\t\tAlgorithm\tSorting Strategy\tOffset Strategy";
50     for (size_t algorithm = 0; algorithm < kNumAlgorithmTypes; algorithm++) {
51       algorithm_ = static_cast<AlgorithmType>(algorithm);
52       for (size_t sort_strategy = 0; sort_strategy < kNumSortingTypes; sort_strategy++) {
53         sort_strategy_ = static_cast<SortingType>(sort_strategy);
54         SortTensors();
55         for (size_t branching_strategy = 0; branching_strategy < kNumFittingTypes; branching_strategy++) {
56           branching_strategy_ = static_cast<FittingType>(branching_strategy);
57           Clean();
58           MS_LOG(DEBUG) << "Timing Start " << tensors_.size() << " Tensors";
59           auto start_upper = std::chrono::system_clock::now();
60           upperbound_ = FindSolutions();
61           MS_LOG(DEBUG) << "Elapsed time of upper bound testing: "
62                         << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() -
63                                                                                  start_upper)
64                              .count()
65                         << " ms";
66           if (upperbound_ > worst) {
67             worst = upperbound_;
68           }
69           if (upperbound_ < best || upperbound_ == best) {
70             best = upperbound_;
71             best_algorithm = algorithm_;
72             best_branching = branching_strategy_;
73             best_sorting = sort_strategy_;
74             best_sol = sol_count_;
75             best_timing = LongToSize(timing_);
76           }
77           Verify();
78           sol_count_++;
79         }
80       }
81     }
82     upperbound_ = best;
83     auto end = std::chrono::system_clock::now();
84     size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count();
85     const double giga = 1024. * 1024. * 1024.;
86     const double cent = 100.;
87     MS_LOG(INFO) << "SOMAS SOLVER RESUME:";
88     MS_LOG(INFO) << "Best Solution:[" << 1 + best_sol << "/" << sol_count_ << "] ";
89     MS_LOG(INFO) << "Best result:" << best << " Bytes " << (best) / (giga) << " GB ("
90                  << (best - lifelong_memory_) / (giga) << " GB + " << lifelong_memory_ / (giga)
91                  << " GB from lifelong tensors)";
92 
93     MS_LOG(INFO) << "Best timing:" << best_timing << " ms";
94     MS_LOG(INFO) << "Best algorithm: " << algorithmTypeNames[best_algorithm];
95     MS_LOG(INFO) << "Best sorting strategy: " << sortingNames[best_sorting];
96     MS_LOG(INFO) << "Best offset strategy: " << branchingNames[best_branching];
97     MS_LOG(INFO) << "Time elapsed: " << total_time << " ms";
98     MS_LOG(INFO) << "Spread:" << static_cast<double>((worst - best) / static_cast<double>(best * cent)) << " %%";
99     best_sol_ = best_sol;
100     SetBestSolution();
101   } else {
102     // print only for single heuristic no multi thread
103     if (!is_multi_thread_valid_) {
104       MS_LOG(INFO) << "Algorithm strategy: " << algorithmTypeNames[algorithm_];
105       MS_LOG(INFO) << "Sorting strategy: " << sortingNames[sort_strategy_];
106       MS_LOG(INFO) << "Offset strategy: " << branchingNames[branching_strategy_];
107     }
108     BuildBlocks();
109     SortTensors();
110     upperbound_ = FindSolutions();
111     Verify();
112   }
113   return retval;
114 }
115 
Verify()116 Status SomasSolverCore::Verify() {
117   Status retval = SUCCESS;
118   if (verify_) {
119     MS_LOG(INFO) << "Verifying solution..";
120 
121     if (!Verify(upperbound_)) {
122       MS_LOG(WARNING) << "Solver Allocation Memory Check FAILS";
123       retval = FAILED;
124     } else {
125       const double giga = 1024. * 1024. * 1024.;
126       MS_LOG(INFO) << "Solver Allocation Memory Check SUCCESS !!";
127       MS_LOG(INFO) << "Result: " << upperbound_ << " (" << (upperbound_) / (giga) << " GB)";
128       retval = SUCCESS;
129     }
130   }
131 
132   return retval;
133 }
134 
Verify(const size_t & upperbound)135 bool SomasSolverCore::Verify(const size_t &upperbound) {
136   auto start = std::chrono::system_clock::now();
137   bool retval = true;
138   size_t result = 0;
139   SomasSolverTensorDescPtr t1;
140   SomasSolverTensorDescPtr t2;
141 
142   for (auto t1_ : tensors_) {
143     // check alignment
144     result = std::max(result, t1_.second->size_ + t1_.second->offset_);
145     for (auto t2_ : tensors_) {
146       t1 = t1_.second;
147       t2 = t2_.second;
148       if (t1->index_ == t2->index_) continue;
149       bool blifelong = (t1->lifelong_ || t2->lifelong_) && (t1->index_ != t2->index_);
150       if (t2->right_ == t1) {  // continuous constraint
151         // t1 must be continuous to t2
152         bool bcontinuous = t1->offset_ == (t2->offset_ + t2->size_);
153         if (!bcontinuous) {
154           MS_LOG(WARNING) << "Continuous constraint violation in tensors " << t1->index_ << " and" << t2->index_;
155           retval = false;
156         }
157       } else if (blifelong || constraints_[t1->index_].IsBitTrue(t2->index_) == false) {  // conflict constraint
158         size_t t1_ub = t1->offset_ + t1->size_;
159         size_t t2_ub = t2->offset_ + t2->size_;
160         bool b_overlap_lb = ((t2->offset_ >= t1->offset_) && (t2->offset_ < t1_ub));
161         bool b_overlap_ub = ((t2_ub > t1->offset_) && (t2_ub < t1_ub));
162         bool b_overlap = b_overlap_lb || b_overlap_ub;
163         bool biszerosized = t1->size_ == 0 || t2->size_ == 0;
164         if (b_overlap && !biszerosized) {
165           MS_LOG(WARNING) << "Non-overlap constraint violation in tensors " << t1->index_ << " and" << t2->index_;
166           retval = false;
167         }
168       }
169     }
170   }
171   if (upperbound != result) {
172     MS_LOG(WARNING) << "ERROR Invalid upperbound result --> Footprint Result: " << upperbound_
173                     << " Tensor Result: " << result + lifelong_memory_;
174     retval = false;
175   }
176   MS_LOG(DEBUG)
177     << "\nElapsed time of Fast Heuristic Check: "
178     << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
179   return retval;
180 }
181 
BuildBlocks()182 void SomasSolverCore::BuildBlocks() {
183   MS_LOG(DEBUG) << "Building block of tensors";
184 
185   lifelong_memory_ = 0;
186   uint64_t tensors_block_count = 0;
187   for (auto tensor : tensors_) {
188     SomasSolverTensorDescPtr pTensor = tensor.second;
189     if (pTensor->blocked_) continue;
190     if (pTensor->lifelong_) {
191       lifelong_memory_ += pTensor->size_;
192       continue;
193     }
194     // move to the left
195     while (pTensor->left_) pTensor = pTensor->left_;
196 
197     // set start tensor
198     BlockTensor bTensor;
199     bTensor.m_bre_allocate_ = true;
200     bTensor.m_start_tensor_ = pTensor;
201     // find size
202     bTensor.m_size_ = 0;
203 
204     do {
205       bTensor.m_size_ += pTensor->size_;
206       pTensor->blocked_ = true;
207       pTensor = pTensor->right_;
208       tensors_block_count++;
209     } while (pTensor != nullptr);
210 
211     // add to the list
212     this->block_tensors_.emplace_back(bTensor);
213   }
214 
215   if (tensors_block_count != tensors_.size())
216     MS_LOG(INFO) << static_cast<int>(tensors_.size() - tensors_block_count) << " lifelong tensors found";
217 }
218 
Clean()219 void SomasSolverCore::Clean() {
220   for (auto &block : block_tensors_) {
221     block.m_bre_allocate_ = true;
222     auto pTensor = block.m_start_tensor_;
223     while (pTensor) {
224       pTensor->offset_ = 0;
225       pTensor = pTensor->right_;
226     }
227   }
228   upperbound_ = SIZE_MAX;
229 }
230 
GreaterSizeSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)231 static bool GreaterSizeSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
232   return t1.m_size_ > t2.m_size_ ||
233          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
234 }
235 #ifdef SOMAS_DEBUG
GreaterSizeGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)236 static bool GreaterSizeGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
237   return t1.m_size_ > t2.m_size_ ||
238          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
239 }
GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)240 static bool GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
241   return t1.m_size_ > t2.m_size_ ||
242          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
243          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
244           t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
245 }
GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)246 static bool GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
247   return t1.m_size_ > t2.m_size_ ||
248          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
249          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
250           t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
251 }
GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)252 static bool GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
253   return t1.m_size_ > t2.m_size_ ||
254          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
255          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
256           t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
257 }
GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)258 static bool GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
259   return t1.m_size_ > t2.m_size_ ||
260          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
261          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
262           t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
263 }
264 #endif
265 
SortTensors()266 void SomasSolverCore::SortTensors() {  // need to sort the tensors for Fast Heuristic
267   MS_LOG(DEBUG) << "Sorting Blocks of tensor, strategy: " << sortingNames[sort_strategy_];
268   typedef bool (*SortingFunction)(const BlockTensor &, const BlockTensor &);
269   std::unordered_map<SortingType, SortingFunction> sort_map;
270   sort_map[kGreaterSizeSmallerIndex] = &GreaterSizeSmallerIndex;
271 #ifdef SOMAS_DEBUG
272   sort_map[kGreaterSizeGreaterIndex] = &GreaterSizeGreaterIndex;
273   sort_map[kGreaterSizeSmallerConstraintsSmallerIndex] = &GreaterSizeSmallerConstraintsSmallerIndex;
274   sort_map[kGreaterSizeSmallerConstraintsGreaterIndex] = &GreaterSizeSmallerConstraintsGreaterIndex;
275   sort_map[kGreaterSizeGreaterConstraintsSmallerIndex] = &GreaterSizeGreaterConstraintsSmallerIndex;
276   sort_map[kGreaterSizeGreaterConstraintsGreaterIndex] = &GreaterSizeGreaterConstraintsGreaterIndex;
277 #endif
278   if (sort_strategy_ < kNumSortingTypes) {
279     sort(block_tensors_.begin(), block_tensors_.end(), *(sort_map[sort_strategy_]));
280   }
281 }
282 
RestoreSolution(uint32_t sol_id)283 void SomasSolverCore::RestoreSolution(uint32_t sol_id) {
284   for (auto block : block_tensors_) {
285     if (block.offsets_.count(sol_id) == 0) MS_ASSERT(0);
286     size_t bestOffset = block.offsets_[sol_id];
287     size_t offset = bestOffset;
288     SomasSolverTensorDescPtr pTensor = block.m_start_tensor_;
289 
290     while (pTensor) {
291       pTensor->offset_ = offset;
292       offset += pTensor->size_;
293       pTensor = pTensor->right_;
294     }
295   }
296 }
Search(const std::shared_ptr<FootPrint> & pFootprint)297 size_t SomasSolverCore::Search(const std::shared_ptr<FootPrint> &pFootprint) {
298   size_t result = 0;
299   FastHeuristic fh;
300   MS_LOG(INFO) << "Calling FastSolver Search for " << block_tensors_.size() << " tensors ";
301   auto start = std::chrono::system_clock::now();
302   if (fh.Eval(&block_tensors_, pFootprint, &constraints_)) {
303     result = pFootprint->Result();
304     auto end = std::chrono::system_clock::now();
305     timing_ = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count();
306     // print for serial all_ or multi thread solver
307     if (all_ || is_multi_thread_valid_) {
308       const double giga = 1073741824.;
309       MS_LOG(INFO) << timing_ << " ms\t" << sol_count_ + 1 << "/"
310                    << kNumFittingTypes * kNumAlgorithmTypes * kNumSortingTypes << "\t" << result << " Bytes ("
311                    << result / giga << " GB)\t" << algorithmTypeNames[algorithm_] << "\t"
312                    << sortingNames[sort_strategy_] << "\t" << branchingNames[branching_strategy_];
313     }
314   } else {
315     MS_LOG(INFO) << "FastSolver could not find solution";
316   }
317 
318   if (result < upperbound_) {
319     upperbound_ = result;
320     best_sol_ = pFootprint->m_solId_;
321   }
322 
323   return upperbound_;
324 }
325 
AppendLifelongTensors()326 void SomasSolverCore::AppendLifelongTensors() {
327   MS_LOG(DEBUG) << "Appending lifelong tensors to solution";
328   size_t offset = upperbound_;
329   std::map<size_t, SomasSolverTensorDescPtr> lifelongTensors;
330   for (auto t_ : tensors_) {
331     if (t_.second->lifelong_) {
332       lifelongTensors.insert(t_);
333     }
334   }
335   for (auto t_ : lifelongTensors) {
336     SomasSolverTensorDescPtr pTensor = t_.second;
337     pTensor->offset_ = offset;
338     offset += pTensor->size_;
339   }
340   upperbound_ += lifelong_memory_;
341   MS_LOG(DEBUG) << lifelong_memory_ << " bytes from lifelong tensors added to solution";
342 }
343 
FindSolutions()344 size_t SomasSolverCore::FindSolutions() {
345   MS_LOG(DEBUG) << "Start allocating blocks,offset strategy: " << branchingNames[branching_strategy_];
346 
347   std::shared_ptr<FootPrint> pFootprint = std::make_shared<FootPrint>();
348   pFootprint->setBranchingStrategy(branching_strategy_);
349   pFootprint->setCurrentSol(sol_count_);
350   pFootprint->setAlgorithm(algorithm_);
351   Search(pFootprint);
352   AppendLifelongTensors();
353   Destroy(pFootprint);
354   return upperbound_;
355 }
356 
Destroy(std::shared_ptr<FootPrint> & pFootprint)357 void SomasSolverCore::Destroy(std::shared_ptr<FootPrint> &pFootprint) {
358   while (pFootprint != nullptr) {
359     if (pFootprint->Next() != nullptr) {
360       std::shared_ptr<FootPrint> &p = pFootprint;
361       pFootprint = pFootprint->Next();
362       p = nullptr;
363     } else {
364       pFootprint = nullptr;
365     }
366   }
367 }
368 }  // namespace somas
369 }  // namespace mindspore
370