• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 
17 #include <algorithm>
18 #include <chrono>
19 #include <cstdio>
20 #include <ctime>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 #include <map>
25 #include "utils/hash_map.h"
26 #include "backend/common/somas/somas_solver_alg.h"
27 #include "backend/common/somas/somas_solver_core.h"
28 #include "backend/common/somas/somas_solver_pre.h"
29 
30 using mindspore::HashMap;
31 using std::sort;
32 using std::vector;
33 
34 namespace mindspore {
35 namespace somas {
36 constexpr auto kSolBytesThreshold = 100 * 1024 * 1024;
MemoryAllocationSolver()37 Status SomasSolverCore::MemoryAllocationSolver() {
38   Status retval = SUCCESS;
39   // print only for single heuristic no multi thread
40   if (!is_multi_thread_valid_) {
41     MS_LOG(INFO) << "Algorithm strategy: " << algorithmTypeNames[algorithm_];
42     MS_LOG(INFO) << "Sorting strategy: " << sortingNames[sort_strategy_];
43     MS_LOG(INFO) << "Offset strategy: " << branchingNames[branching_strategy_];
44   }
45   BuildBlocks();
46   SortTensors();
47   upperbound_ = FindSolutions();
48   Verify();
49   return retval;
50 }
51 
Verify()52 Status SomasSolverCore::Verify() {
53   Status retval = SUCCESS;
54   if (verify_) {
55     MS_LOG(INFO) << "Verifying solution..";
56 
57     if (!Verify(upperbound_)) {
58       MS_LOG(WARNING) << "Solver Allocation Memory Check FAILS";
59       retval = FAILED;
60     } else {
61       const double giga = 1024. * 1024. * 1024.;
62       MS_LOG(INFO) << "Solver Allocation Memory Check SUCCESS !!";
63       MS_LOG(INFO) << "Result: " << upperbound_ << " (" << (upperbound_) / (giga) << " GB)";
64       retval = SUCCESS;
65     }
66   }
67 
68   return retval;
69 }
70 
Verify(const size_t & upperbound)71 bool SomasSolverCore::Verify(const size_t &upperbound) {
72   auto start = std::chrono::system_clock::now();
73   bool retval = true;
74   size_t result = 0;
75   SomasSolverTensorDescPtr t1;
76   SomasSolverTensorDescPtr t2;
77 
78   for (auto t1_ : tensors_) {
79     // check alignment
80     result = std::max(result, t1_.second->size_ + t1_.second->offset_);
81     for (auto t2_ : tensors_) {
82       t1 = t1_.second;
83       t2 = t2_.second;
84       if (t1->index_ == t2->index_) {
85         continue;
86       }
87       bool blifelong = (t1->lifelong_ || t2->lifelong_) && (t1->index_ != t2->index_);
88       if (t2->right_ == t1) {  // continuous constraint
89         // t1 must be continuous to t2
90         bool bcontinuous = t1->offset_ == (t2->offset_ + t2->size_);
91         if (!bcontinuous) {
92           MS_LOG(WARNING) << "Continuous constraint violation in tensors " << t1->index_ << " and" << t2->index_;
93           retval = false;
94         }
95       } else if (blifelong || constraints_[t1->index_].IsBitTrue(t2->index_) == false) {  // conflict constraint
96         size_t t1_ub = t1->offset_ + t1->size_;
97         size_t t2_ub = t2->offset_ + t2->size_;
98         bool b_overlap_lb = ((t2->offset_ >= t1->offset_) && (t2->offset_ < t1_ub));
99         bool b_overlap_ub = ((t2_ub > t1->offset_) && (t2_ub < t1_ub));
100         bool b_overlap = b_overlap_lb || b_overlap_ub;
101         bool biszerosized = t1->size_ == 0 || t2->size_ == 0;
102         if (b_overlap && !biszerosized) {
103           MS_LOG(WARNING) << "Non-overlap constraint violation in tensors " << t1->index_ << " and" << t2->index_;
104           retval = false;
105         }
106       }
107     }
108   }
109   if (upperbound != result) {
110     MS_LOG(WARNING) << "ERROR Invalid upperbound result --> Footprint Result: " << upperbound_
111                     << " Tensor Result: " << result + lifelong_memory_;
112     retval = false;
113   }
114   MS_LOG(DEBUG)
115     << "\nElapsed time of Fast Heuristic Check: "
116     << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
117   return retval;
118 }
119 
BuildBlocks()120 void SomasSolverCore::BuildBlocks() {
121   MS_LOG(DEBUG) << "Building block of tensors";
122 
123   lifelong_memory_ = 0;
124   uint64_t tensors_block_count = 0;
125   for (auto tensor : tensors_) {
126     SomasSolverTensorDescPtr pTensor = tensor.second;
127     MS_EXCEPTION_IF_NULL(pTensor);
128     if (pTensor->blocked_) {
129       continue;
130     }
131     if (pTensor->lifelong_) {
132       lifelong_memory_ += pTensor->size_;
133       continue;
134     }
135     // move to the left
136     while (pTensor->left_) {
137       pTensor = pTensor->left_;
138     }
139 
140     // set start tensor
141     BlockTensor bTensor;
142     bTensor.m_bre_allocate_ = true;
143     bTensor.m_start_tensor_ = pTensor;
144     // find size
145     bTensor.m_size_ = 0;
146 
147     do {
148       bTensor.m_size_ += pTensor->size_;
149       pTensor->blocked_ = true;
150       pTensor = pTensor->right_;
151       tensors_block_count++;
152     } while (pTensor != nullptr);
153 
154     // add to the list
155     this->block_tensors_.emplace_back(bTensor);
156   }
157 
158   if (tensors_block_count != tensors_.size()) {
159     MS_LOG(INFO) << static_cast<int>(tensors_.size() - tensors_block_count) << " lifelong tensors found";
160   }
161 }
162 
Clean()163 void SomasSolverCore::Clean() {
164   for (auto &block : block_tensors_) {
165     block.m_bre_allocate_ = true;
166     auto pTensor = block.m_start_tensor_;
167     while (pTensor) {
168       pTensor->offset_ = 0;
169       pTensor = pTensor->right_;
170     }
171   }
172   upperbound_ = SIZE_MAX;
173 }
174 
GreaterSizeSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)175 static bool GreaterSizeSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
176   if (t1.m_start_tensor_->is_graph_output_ != t2.m_start_tensor_->is_graph_output_) {
177     return t1.m_start_tensor_->is_graph_output_;
178   }
179   return t1.m_size_ > t2.m_size_ ||
180          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
181 }
SmallerReusePeakMemGreaterSizeSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)182 static bool SmallerReusePeakMemGreaterSizeSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
183   if (t1.m_start_tensor_->is_graph_output_ != t2.m_start_tensor_->is_graph_output_) {
184     return t1.m_start_tensor_->is_graph_output_;
185   }
186   if (t1.m_start_tensor_->can_reuse_peak_mem_ != t2.m_start_tensor_->can_reuse_peak_mem_) {
187     return t1.m_start_tensor_->can_reuse_peak_mem_ < t2.m_start_tensor_->can_reuse_peak_mem_;
188   }
189   return t1.m_size_ > t2.m_size_ ||
190          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
191 }
192 #ifdef SOMAS_DEBUG
GreaterSizeGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)193 static bool GreaterSizeGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
194   return t1.m_size_ > t2.m_size_ ||
195          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
196 }
GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)197 static bool GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
198   return t1.m_size_ > t2.m_size_ ||
199          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
200          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
201           t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
202 }
GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)203 static bool GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
204   return t1.m_size_ > t2.m_size_ ||
205          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
206          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
207           t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
208 }
GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)209 static bool GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
210   return t1.m_size_ > t2.m_size_ ||
211          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
212          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
213           t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
214 }
GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)215 static bool GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
216   return t1.m_size_ > t2.m_size_ ||
217          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
218          (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
219           t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
220 }
221 #endif
222 
SortTensors()223 void SomasSolverCore::SortTensors() {  // need to sort the tensors for Fast Heuristic
224   MS_LOG(DEBUG) << "Sorting Blocks of tensor, strategy: " << sortingNames[sort_strategy_];
225   typedef bool (*SortingFunction)(const BlockTensor &, const BlockTensor &);
226   mindspore::HashMap<SortingType, SortingFunction> sort_map;
227   sort_map[kGreaterSizeSmallerIndex] = &GreaterSizeSmallerIndex;
228   sort_map[kSmallerReusePeakMemGreaterSizeSmallerIndex] = &SmallerReusePeakMemGreaterSizeSmallerIndex;
229 #ifdef SOMAS_DEBUG
230   sort_map[kGreaterSizeGreaterIndex] = &GreaterSizeGreaterIndex;
231   sort_map[kGreaterSizeSmallerConstraintsSmallerIndex] = &GreaterSizeSmallerConstraintsSmallerIndex;
232   sort_map[kGreaterSizeSmallerConstraintsGreaterIndex] = &GreaterSizeSmallerConstraintsGreaterIndex;
233   sort_map[kGreaterSizeGreaterConstraintsSmallerIndex] = &GreaterSizeGreaterConstraintsSmallerIndex;
234   sort_map[kGreaterSizeGreaterConstraintsGreaterIndex] = &GreaterSizeGreaterConstraintsGreaterIndex;
235 #endif
236   if (sort_strategy_ < kNumSortingTypes) {
237     sort(block_tensors_.begin(), block_tensors_.end(), *(sort_map[sort_strategy_]));
238   }
239 }
240 
Search(const std::shared_ptr<FootPrint> & pFootprint)241 size_t SomasSolverCore::Search(const std::shared_ptr<FootPrint> &pFootprint) {
242   size_t result = 0;
243   FastHeuristic fh;
244   MS_LOG(INFO) << "Calling FastSolver Search for " << block_tensors_.size() << " tensors ";
245   auto start = std::chrono::system_clock::now();
246   if (fh.Eval(&block_tensors_, pFootprint, &constraints_)) {
247     result = pFootprint->Result();
248     auto end = std::chrono::system_clock::now();
249     timing_ = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count();
250     // print for serial all_ or multi thread solver
251     if (is_multi_thread_valid_) {
252       const double giga = 1073741824.;
253       MS_LOG(INFO) << timing_ << " ms\t" << sol_count_ + 1 << "/"
254                    << static_cast<size_t>(kNumFittingTypes) * static_cast<size_t>(kNumAlgorithmTypes) *
255                         static_cast<size_t>(kNumSortingTypes)
256                    << "\t" << result << " Bytes (" << result / giga << " GB)\t" << algorithmTypeNames[algorithm_]
257                    << "\t" << sortingNames[sort_strategy_] << "\t" << branchingNames[branching_strategy_];
258     }
259   } else {
260     MS_LOG(INFO) << "FastSolver could not find solution";
261   }
262 
263   if (result < upperbound_) {
264     upperbound_ = result;
265     best_sol_ = pFootprint->m_solId_;
266   }
267 
268   return upperbound_;
269 }
270 
AppendLifelongTensors()271 void SomasSolverCore::AppendLifelongTensors() {
272   MS_LOG(DEBUG) << "Appending lifelong tensors to solution";
273   size_t offset = upperbound_;
274   std::map<size_t, SomasSolverTensorDescPtr> lifelongTensors;
275   for (const auto &t : tensors_) {
276     if (t.second->lifelong_) {
277       (void)lifelongTensors.emplace(t.first, t.second);
278     }
279   }
280   for (const auto &t : lifelongTensors) {
281     auto &pTensor = t.second;
282     pTensor->offset_ = offset;
283     offset += pTensor->size_;
284   }
285   upperbound_ += lifelong_memory_;
286   MS_LOG(DEBUG) << lifelong_memory_ << " bytes from lifelong tensors added to solution";
287 }
288 
FindSolutions()289 size_t SomasSolverCore::FindSolutions() {
290   MS_LOG(DEBUG) << "Start allocating blocks,offset strategy: " << branchingNames[branching_strategy_];
291 
292   std::shared_ptr<FootPrint> pFootprint = std::make_shared<FootPrint>();
293   pFootprint->setBranchingStrategy(static_cast<uint32_t>(branching_strategy_));
294   pFootprint->setCurrentSol(sol_count_);
295   pFootprint->setAlgorithm(static_cast<uint32_t>(algorithm_));
296   Search(pFootprint);
297   AppendLifelongTensors();
298   Destroy(&pFootprint);
299   return upperbound_;
300 }
301 
Destroy(std::shared_ptr<FootPrint> * pFootprint) const302 void SomasSolverCore::Destroy(std::shared_ptr<FootPrint> *pFootprint) const {
303   while ((*pFootprint) != nullptr) {
304     if ((*pFootprint)->Next() != nullptr) {
305       std::shared_ptr<FootPrint> &p = (*pFootprint);
306       (*pFootprint) = (*pFootprint)->Next();
307       p = nullptr;
308     } else {
309       (*pFootprint) = nullptr;
310     }
311   }
312 }
313 }  // namespace somas
314 }  // namespace mindspore
315