1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <algorithm>
18 #include <chrono>
19 #include <cstdio>
20 #include <ctime>
21 #include <memory>
22 #include <string>
23 #include <unordered_map>
24 #include <vector>
25 #include <map>
26 #include "backend/optimizer/somas/somas_solver_alg.h"
27 #include "backend/optimizer/somas/somas_solver_core.h"
28 #include "backend/optimizer/somas/somas_solver_pre.h"
29
30 using std::sort;
31 using std::unordered_map;
32 using std::vector;
33
34 namespace mindspore {
35 namespace somas {
MemoryAllocationSolver()36 Status SomasSolverCore::MemoryAllocationSolver() {
37 auto start = std::chrono::system_clock::now();
38 Status retval = SUCCESS;
39 size_t best = SIZE_MAX;
40 size_t best_timing = SIZE_MAX;
41 if (all_) { // loop over all heuristics
42 FittingType best_branching = kBest;
43 SortingType best_sorting = kGreaterSizeSmallerIndex;
44 AlgorithmType best_algorithm = kManyObjects;
45 uint32_t best_sol = 0;
46 size_t worst = 0;
47 BuildBlocks();
48 Clean();
49 MS_LOG(INFO) << "time\tSol#\tResult\t\t\t\tAlgorithm\tSorting Strategy\tOffset Strategy";
50 for (size_t algorithm = 0; algorithm < kNumAlgorithmTypes; algorithm++) {
51 algorithm_ = static_cast<AlgorithmType>(algorithm);
52 for (size_t sort_strategy = 0; sort_strategy < kNumSortingTypes; sort_strategy++) {
53 sort_strategy_ = static_cast<SortingType>(sort_strategy);
54 SortTensors();
55 for (size_t branching_strategy = 0; branching_strategy < kNumFittingTypes; branching_strategy++) {
56 branching_strategy_ = static_cast<FittingType>(branching_strategy);
57 Clean();
58 MS_LOG(DEBUG) << "Timing Start " << tensors_.size() << " Tensors";
59 auto start_upper = std::chrono::system_clock::now();
60 upperbound_ = FindSolutions();
61 MS_LOG(DEBUG) << "Elapsed time of upper bound testing: "
62 << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() -
63 start_upper)
64 .count()
65 << " ms";
66 if (upperbound_ > worst) {
67 worst = upperbound_;
68 }
69 if (upperbound_ < best || upperbound_ == best) {
70 best = upperbound_;
71 best_algorithm = algorithm_;
72 best_branching = branching_strategy_;
73 best_sorting = sort_strategy_;
74 best_sol = sol_count_;
75 best_timing = LongToSize(timing_);
76 }
77 Verify();
78 sol_count_++;
79 }
80 }
81 }
82 upperbound_ = best;
83 auto end = std::chrono::system_clock::now();
84 size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count();
85 const double giga = 1024. * 1024. * 1024.;
86 const double cent = 100.;
87 MS_LOG(INFO) << "SOMAS SOLVER RESUME:";
88 MS_LOG(INFO) << "Best Solution:[" << 1 + best_sol << "/" << sol_count_ << "] ";
89 MS_LOG(INFO) << "Best result:" << best << " Bytes " << (best) / (giga) << " GB ("
90 << (best - lifelong_memory_) / (giga) << " GB + " << lifelong_memory_ / (giga)
91 << " GB from lifelong tensors)";
92
93 MS_LOG(INFO) << "Best timing:" << best_timing << " ms";
94 MS_LOG(INFO) << "Best algorithm: " << algorithmTypeNames[best_algorithm];
95 MS_LOG(INFO) << "Best sorting strategy: " << sortingNames[best_sorting];
96 MS_LOG(INFO) << "Best offset strategy: " << branchingNames[best_branching];
97 MS_LOG(INFO) << "Time elapsed: " << total_time << " ms";
98 MS_LOG(INFO) << "Spread:" << static_cast<double>((worst - best) / static_cast<double>(best * cent)) << " %%";
99 best_sol_ = best_sol;
100 SetBestSolution();
101 } else {
102 // print only for single heuristic no multi thread
103 if (!is_multi_thread_valid_) {
104 MS_LOG(INFO) << "Algorithm strategy: " << algorithmTypeNames[algorithm_];
105 MS_LOG(INFO) << "Sorting strategy: " << sortingNames[sort_strategy_];
106 MS_LOG(INFO) << "Offset strategy: " << branchingNames[branching_strategy_];
107 }
108 BuildBlocks();
109 SortTensors();
110 upperbound_ = FindSolutions();
111 Verify();
112 }
113 return retval;
114 }
115
Verify()116 Status SomasSolverCore::Verify() {
117 Status retval = SUCCESS;
118 if (verify_) {
119 MS_LOG(INFO) << "Verifying solution..";
120
121 if (!Verify(upperbound_)) {
122 MS_LOG(WARNING) << "Solver Allocation Memory Check FAILS";
123 retval = FAILED;
124 } else {
125 const double giga = 1024. * 1024. * 1024.;
126 MS_LOG(INFO) << "Solver Allocation Memory Check SUCCESS !!";
127 MS_LOG(INFO) << "Result: " << upperbound_ << " (" << (upperbound_) / (giga) << " GB)";
128 retval = SUCCESS;
129 }
130 }
131
132 return retval;
133 }
134
Verify(const size_t & upperbound)135 bool SomasSolverCore::Verify(const size_t &upperbound) {
136 auto start = std::chrono::system_clock::now();
137 bool retval = true;
138 size_t result = 0;
139 SomasSolverTensorDescPtr t1;
140 SomasSolverTensorDescPtr t2;
141
142 for (auto t1_ : tensors_) {
143 // check alignment
144 result = std::max(result, t1_.second->size_ + t1_.second->offset_);
145 for (auto t2_ : tensors_) {
146 t1 = t1_.second;
147 t2 = t2_.second;
148 if (t1->index_ == t2->index_) continue;
149 bool blifelong = (t1->lifelong_ || t2->lifelong_) && (t1->index_ != t2->index_);
150 if (t2->right_ == t1) { // continuous constraint
151 // t1 must be continuous to t2
152 bool bcontinuous = t1->offset_ == (t2->offset_ + t2->size_);
153 if (!bcontinuous) {
154 MS_LOG(WARNING) << "Continuous constraint violation in tensors " << t1->index_ << " and" << t2->index_;
155 retval = false;
156 }
157 } else if (blifelong || constraints_[t1->index_].IsBitTrue(t2->index_) == false) { // conflict constraint
158 size_t t1_ub = t1->offset_ + t1->size_;
159 size_t t2_ub = t2->offset_ + t2->size_;
160 bool b_overlap_lb = ((t2->offset_ >= t1->offset_) && (t2->offset_ < t1_ub));
161 bool b_overlap_ub = ((t2_ub > t1->offset_) && (t2_ub < t1_ub));
162 bool b_overlap = b_overlap_lb || b_overlap_ub;
163 bool biszerosized = t1->size_ == 0 || t2->size_ == 0;
164 if (b_overlap && !biszerosized) {
165 MS_LOG(WARNING) << "Non-overlap constraint violation in tensors " << t1->index_ << " and" << t2->index_;
166 retval = false;
167 }
168 }
169 }
170 }
171 if (upperbound != result) {
172 MS_LOG(WARNING) << "ERROR Invalid upperbound result --> Footprint Result: " << upperbound_
173 << " Tensor Result: " << result + lifelong_memory_;
174 retval = false;
175 }
176 MS_LOG(DEBUG)
177 << "\nElapsed time of Fast Heuristic Check: "
178 << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count() << " ms";
179 return retval;
180 }
181
BuildBlocks()182 void SomasSolverCore::BuildBlocks() {
183 MS_LOG(DEBUG) << "Building block of tensors";
184
185 lifelong_memory_ = 0;
186 uint64_t tensors_block_count = 0;
187 for (auto tensor : tensors_) {
188 SomasSolverTensorDescPtr pTensor = tensor.second;
189 if (pTensor->blocked_) continue;
190 if (pTensor->lifelong_) {
191 lifelong_memory_ += pTensor->size_;
192 continue;
193 }
194 // move to the left
195 while (pTensor->left_) pTensor = pTensor->left_;
196
197 // set start tensor
198 BlockTensor bTensor;
199 bTensor.m_bre_allocate_ = true;
200 bTensor.m_start_tensor_ = pTensor;
201 // find size
202 bTensor.m_size_ = 0;
203
204 do {
205 bTensor.m_size_ += pTensor->size_;
206 pTensor->blocked_ = true;
207 pTensor = pTensor->right_;
208 tensors_block_count++;
209 } while (pTensor != nullptr);
210
211 // add to the list
212 this->block_tensors_.emplace_back(bTensor);
213 }
214
215 if (tensors_block_count != tensors_.size())
216 MS_LOG(INFO) << static_cast<int>(tensors_.size() - tensors_block_count) << " lifelong tensors found";
217 }
218
Clean()219 void SomasSolverCore::Clean() {
220 for (auto &block : block_tensors_) {
221 block.m_bre_allocate_ = true;
222 auto pTensor = block.m_start_tensor_;
223 while (pTensor) {
224 pTensor->offset_ = 0;
225 pTensor = pTensor->right_;
226 }
227 }
228 upperbound_ = SIZE_MAX;
229 }
230
GreaterSizeSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)231 static bool GreaterSizeSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
232 return t1.m_size_ > t2.m_size_ ||
233 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
234 }
235 #ifdef SOMAS_DEBUG
GreaterSizeGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)236 static bool GreaterSizeGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
237 return t1.m_size_ > t2.m_size_ ||
238 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
239 }
GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)240 static bool GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
241 return t1.m_size_ > t2.m_size_ ||
242 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
243 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
244 t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
245 }
GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)246 static bool GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
247 return t1.m_size_ > t2.m_size_ ||
248 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
249 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
250 t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
251 }
GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor & t1,const BlockTensor & t2)252 static bool GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
253 return t1.m_size_ > t2.m_size_ ||
254 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
255 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
256 t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
257 }
GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor & t1,const BlockTensor & t2)258 static bool GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
259 return t1.m_size_ > t2.m_size_ ||
260 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
261 (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
262 t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
263 }
264 #endif
265
SortTensors()266 void SomasSolverCore::SortTensors() { // need to sort the tensors for Fast Heuristic
267 MS_LOG(DEBUG) << "Sorting Blocks of tensor, strategy: " << sortingNames[sort_strategy_];
268 typedef bool (*SortingFunction)(const BlockTensor &, const BlockTensor &);
269 std::unordered_map<SortingType, SortingFunction> sort_map;
270 sort_map[kGreaterSizeSmallerIndex] = &GreaterSizeSmallerIndex;
271 #ifdef SOMAS_DEBUG
272 sort_map[kGreaterSizeGreaterIndex] = &GreaterSizeGreaterIndex;
273 sort_map[kGreaterSizeSmallerConstraintsSmallerIndex] = &GreaterSizeSmallerConstraintsSmallerIndex;
274 sort_map[kGreaterSizeSmallerConstraintsGreaterIndex] = &GreaterSizeSmallerConstraintsGreaterIndex;
275 sort_map[kGreaterSizeGreaterConstraintsSmallerIndex] = &GreaterSizeGreaterConstraintsSmallerIndex;
276 sort_map[kGreaterSizeGreaterConstraintsGreaterIndex] = &GreaterSizeGreaterConstraintsGreaterIndex;
277 #endif
278 if (sort_strategy_ < kNumSortingTypes) {
279 sort(block_tensors_.begin(), block_tensors_.end(), *(sort_map[sort_strategy_]));
280 }
281 }
282
RestoreSolution(uint32_t sol_id)283 void SomasSolverCore::RestoreSolution(uint32_t sol_id) {
284 for (auto block : block_tensors_) {
285 if (block.offsets_.count(sol_id) == 0) MS_ASSERT(0);
286 size_t bestOffset = block.offsets_[sol_id];
287 size_t offset = bestOffset;
288 SomasSolverTensorDescPtr pTensor = block.m_start_tensor_;
289
290 while (pTensor) {
291 pTensor->offset_ = offset;
292 offset += pTensor->size_;
293 pTensor = pTensor->right_;
294 }
295 }
296 }
Search(const std::shared_ptr<FootPrint> & pFootprint)297 size_t SomasSolverCore::Search(const std::shared_ptr<FootPrint> &pFootprint) {
298 size_t result = 0;
299 FastHeuristic fh;
300 MS_LOG(INFO) << "Calling FastSolver Search for " << block_tensors_.size() << " tensors ";
301 auto start = std::chrono::system_clock::now();
302 if (fh.Eval(&block_tensors_, pFootprint, &constraints_)) {
303 result = pFootprint->Result();
304 auto end = std::chrono::system_clock::now();
305 timing_ = std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count();
306 // print for serial all_ or multi thread solver
307 if (all_ || is_multi_thread_valid_) {
308 const double giga = 1073741824.;
309 MS_LOG(INFO) << timing_ << " ms\t" << sol_count_ + 1 << "/"
310 << kNumFittingTypes * kNumAlgorithmTypes * kNumSortingTypes << "\t" << result << " Bytes ("
311 << result / giga << " GB)\t" << algorithmTypeNames[algorithm_] << "\t"
312 << sortingNames[sort_strategy_] << "\t" << branchingNames[branching_strategy_];
313 }
314 } else {
315 MS_LOG(INFO) << "FastSolver could not find solution";
316 }
317
318 if (result < upperbound_) {
319 upperbound_ = result;
320 best_sol_ = pFootprint->m_solId_;
321 }
322
323 return upperbound_;
324 }
325
AppendLifelongTensors()326 void SomasSolverCore::AppendLifelongTensors() {
327 MS_LOG(DEBUG) << "Appending lifelong tensors to solution";
328 size_t offset = upperbound_;
329 std::map<size_t, SomasSolverTensorDescPtr> lifelongTensors;
330 for (auto t_ : tensors_) {
331 if (t_.second->lifelong_) {
332 lifelongTensors.insert(t_);
333 }
334 }
335 for (auto t_ : lifelongTensors) {
336 SomasSolverTensorDescPtr pTensor = t_.second;
337 pTensor->offset_ = offset;
338 offset += pTensor->size_;
339 }
340 upperbound_ += lifelong_memory_;
341 MS_LOG(DEBUG) << lifelong_memory_ << " bytes from lifelong tensors added to solution";
342 }
343
FindSolutions()344 size_t SomasSolverCore::FindSolutions() {
345 MS_LOG(DEBUG) << "Start allocating blocks,offset strategy: " << branchingNames[branching_strategy_];
346
347 std::shared_ptr<FootPrint> pFootprint = std::make_shared<FootPrint>();
348 pFootprint->setBranchingStrategy(branching_strategy_);
349 pFootprint->setCurrentSol(sol_count_);
350 pFootprint->setAlgorithm(algorithm_);
351 Search(pFootprint);
352 AppendLifelongTensors();
353 Destroy(pFootprint);
354 return upperbound_;
355 }
356
Destroy(std::shared_ptr<FootPrint> & pFootprint)357 void SomasSolverCore::Destroy(std::shared_ptr<FootPrint> &pFootprint) {
358 while (pFootprint != nullptr) {
359 if (pFootprint->Next() != nullptr) {
360 std::shared_ptr<FootPrint> &p = pFootprint;
361 pFootprint = pFootprint->Next();
362 p = nullptr;
363 } else {
364 pFootprint = nullptr;
365 }
366 }
367 }
368 } // namespace somas
369 } // namespace mindspore
370