• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 
17 #include <cstdio>
18 #include <fstream>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include "common/thread_pool.h"
23 
24 #include "backend/optimizer/somas/somas_solver_core.h"
25 #include "backend/optimizer/somas/somas_solver_pre.h"
26 #include "debug/common.h"
27 
28 namespace mindspore {
29 namespace somas {
30 constexpr auto kSolNumThresholdMultiThread = 8;
CheckTensors(const TensorsDescMap * pTensors,uint32_t index1,uint32_t index2)31 Status SomasSolverPre::CheckTensors(const TensorsDescMap *pTensors, uint32_t index1, uint32_t index2) {
32   auto tensors = *pTensors;
33   if (tensors[index1] == nullptr) {
34     MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index1 << ")";
35     return FAILED;
36   }
37   if (tensors[index2] == nullptr) {
38     MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index2 << ")";
39     return FAILED;
40   }
41 
42   if (tensors[index1]->right_)
43     MS_LOG(WARNING) << "Warning:tensor " << index1
44                     << " already has a right tensor (id: " << tensors[index1]->right_->index_;
45   if (tensors[index2]->left_)
46     MS_LOG(WARNING) << "Warning:tensor " << index2
47                     << " already has a left tensor (id: " << tensors[index2]->left_->index_;
48   return SUCCESS;
49 }
AddContiguousInfoInMap(const vector<vector<size_t>> & continuous_v,TensorsDescMap * pTensors)50 Status SomasSolverPre::AddContiguousInfoInMap(const vector<vector<size_t>> &continuous_v, TensorsDescMap *pTensors) {
51   auto &tensors = *pTensors;
52   // creating S Lists
53   for (auto &aux : continuous_v) {
54     for (size_t i = 0; i < aux.size() - 1; i++) {
55       auto index1 = aux[i];
56       auto index2 = aux[i + 1];
57       if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
58         return FAILED;
59       }
60       tensors[index1]->right_ = tensors[index2];
61       tensors[index2]->left_ = tensors[index1];
62     }
63   }
64   return SUCCESS;
65 }
AddContiguousInfoInMultiMaps(const vector<vector<size_t>> & continuous_v,vector<TensorsDescMap> * vecTensorsMap,const TensorsDescMap * pTensors)66 Status SomasSolverPre::AddContiguousInfoInMultiMaps(const vector<vector<size_t>> &continuous_v,
67                                                     vector<TensorsDescMap> *vecTensorsMap,
68                                                     const TensorsDescMap *pTensors) {
69   // creating S Lists
70   for (auto &aux : continuous_v) {
71     for (size_t i = 0; i < aux.size() - 1; i++) {
72       auto index1 = aux[i];
73       auto index2 = aux[i + 1];
74       if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
75         return FAILED;
76       }
77       for (size_t sol = 0; sol < vecTensorsMap->size(); sol++) {
78         auto &tensors_sol = (*vecTensorsMap)[sol];
79         tensors_sol[index1]->right_ = tensors_sol[index2];
80         tensors_sol[index2]->left_ = tensors_sol[index1];
81       }
82     }
83   }
84   return SUCCESS;
85 }
CreateTensorsMaps(const TensorsDescMap & tensors,size_t total_sol)86 vector<TensorsDescMap> SomasSolverPre::CreateTensorsMaps(const TensorsDescMap &tensors, size_t total_sol) {
87   vector<TensorsDescMap> vecTensorsMap(total_sol);
88   vecTensorsMap[0] = tensors;
89   for (auto &pairT : tensors) {
90     for (size_t sol = 1; sol < total_sol; sol++) {
91       SomasSolverTensorDesc newDesc = *(pairT.second.get());
92       SomasSolverTensorDescPtr newDescPtr = std::make_shared<SomasSolverTensorDesc>(newDesc);
93       vecTensorsMap[sol].insert(std::make_pair(pairT.first, newDescPtr));
94     }
95   }
96   return vecTensorsMap;
97 }
Solving(const session::KernelGraph * graph,TensorsDescMap * ptensors,const std::vector<DynamicBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v,bool bVerifySolution,bool ball,SortingType sorting,FittingType fitting,AlgorithmType algorithm)98 Status SomasSolverPre::Solving(const session::KernelGraph *graph, TensorsDescMap *ptensors,
99                                const std::vector<DynamicBitSet> *pConstraints,
100                                const vector<vector<size_t>> &continuous_v, bool bVerifySolution, bool ball,
101                                SortingType sorting, FittingType fitting, AlgorithmType algorithm) {
102   Status ret = SUCCESS;
103   try {
104     TensorsDescMap &tensors = *ptensors;
105     size_t total_sol = kNumSortingTypes * kNumFittingTypes * kNumAlgorithmTypes;
106     size_t process_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
107     bool isMultiThreadPermit = ball && process_num >= total_sol && total_sol > 1;
108     bool isMultiThreadValid = isMultiThreadPermit && (total_sol > kSolNumThresholdMultiThread ||
109                                                       kParallelComputeSizeThreshold <= tensors.size());
110     const double giga = 1024. * 1024. * 1024.;
111     if (isMultiThreadValid) {
112       vector<std::shared_ptr<SomasSolverCore>> solvers;
113       std::vector<common::Task> tasks;
114       vector<TensorsDescMap> vecTensorsMap = CreateTensorsMaps(tensors, total_sol);
115       if (AddContiguousInfoInMultiMaps(continuous_v, &vecTensorsMap, ptensors) == FAILED) {
116         return FAILED;
117       }
118       auto start = std::chrono::system_clock::now();
119       for (size_t algorithm_strategy = 0, sol = 0; algorithm_strategy < kNumAlgorithmTypes; algorithm_strategy++) {
120         for (size_t sort_strategy = 0; sort_strategy < kNumSortingTypes; sort_strategy++) {
121           for (size_t branching_strategy = 0; branching_strategy < kNumFittingTypes; branching_strategy++) {
122             std::shared_ptr<SomasSolverCore> pSolver =
123               std::make_shared<SomasSolverCore>(vecTensorsMap[sol], pConstraints, sol);
124             pSolver->SetAlgorithmStrategy(AlgorithmType(algorithm));
125             pSolver->SetSortingStrategy(SortingType(sort_strategy));
126             pSolver->SetFittingStrategy(FittingType(branching_strategy));
127             pSolver->SetAllStrategies(false);
128             pSolver->VerifySolution(bVerifySolution);
129             auto task = [pSolver]() {
130               return pSolver->MemoryAllocationSolver() == SUCCESS ? common::SUCCESS : common::FAIL;
131             };
132             tasks.emplace_back(task);
133             solvers.emplace_back(pSolver);
134             sol++;
135           }
136         }
137       }
138       common::ThreadPool::GetInstance().SyncRun(tasks);
139       size_t best_sol = 0, worst = 0, best = SIZE_MAX, best_timing = SIZE_MAX;
140       for (size_t sol = 0; sol < total_sol; sol++) {
141         auto &solver = solvers[sol];
142         auto &upperbound = solver->GetUpperbound();
143         if (upperbound > worst) {
144           worst = upperbound;
145         }
146         if (upperbound <= best) {
147           best = upperbound;
148           best_sol = sol;
149           best_timing = LongToSize(solver->timing_);
150         }
151       }
152       auto end = std::chrono::system_clock::now();
153       size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
154       auto &best_solver = solvers[best_sol];
155       for (auto &tensor : tensors) {
156         *(tensor.second.get()) = *(vecTensorsMap[best_sol][tensor.first]);
157       }
158       max_offset_ = best_solver->GetUpperbound();
159       constexpr float kFloatPresent = 100.0;
160       MS_LOG(INFO) << "SOMAS SOLVER RESUME:";
161       MS_LOG(INFO) << "Best Solution:[" << 1 + best_sol << "/" << total_sol << "] ";
162       MS_LOG(INFO) << "Best result:" << best << " Bytes " << (best) / (giga) << " GB ("
163                    << (best - best_solver->Getlifelongmemory()) / (giga) << " GB + "
164                    << best_solver->Getlifelongmemory() / (giga) << " GB from lifelong tensors)";
165       MS_LOG(INFO) << "Best timing:" << best_timing << " ms";
166       MS_LOG(INFO) << "Best algorithm: " << algorithmTypeNames[best_solver->algorithm_];
167       MS_LOG(INFO) << "Best sorting strategy: " << sortingNames[best_solver->sort_strategy_];
168       MS_LOG(INFO) << "Best offset strategy: " << branchingNames[best_solver->branching_strategy_];
169       MS_LOG(INFO) << "Time elapsed: " << total_time << " ms";
170       MS_LOG(INFO) << "Spread:" << static_cast<double>((worst - best) / static_cast<double>(best * kFloatPresent))
171                    << " %%";
172     } else {
173       if (AddContiguousInfoInMap(continuous_v, ptensors) == FAILED) {
174         return FAILED;
175       }
176       std::shared_ptr<SomasSolverCore> pSolver = std::make_shared<SomasSolverCore>(tensors, pConstraints, 0, false);
177       pSolver->SetAlgorithmStrategy(algorithm);
178       pSolver->SetSortingStrategy(sorting);
179       pSolver->SetFittingStrategy(fitting);
180       pSolver->SetAllStrategies(ball);
181       pSolver->VerifySolution(bVerifySolution);
182       if (SUCCESS == (pSolver->MemoryAllocationSolver())) {
183         max_offset_ = pSolver->GetUpperbound();
184         MS_LOG(INFO) << "SomasSolver::Solving SUCCESS";
185         MS_LOG(INFO) << "SomasSolver::Solving RESULT: " << max_offset_ << " (" << max_offset_ / (giga) << " GB)";
186       }
187     }
188     Log(graph, tensors, pConstraints, continuous_v);
189   } catch (const std::exception &e) {
190     MS_LOG(EXCEPTION) << "SomasSolver::Solving FAILED: " << e.what();
191     ret = FAILED;
192   }
193   return ret;
194 }
195 
Log(const session::KernelGraph * graph,const TensorsDescMap & tensors,const std::vector<DynamicBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v)196 void SomasSolverPre::Log(const session::KernelGraph *graph, const TensorsDescMap &tensors,
197                          const std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v) {
198   auto context_ptr = MsContext::GetInstance();
199   MS_EXCEPTION_IF_NULL(context_ptr);
200   bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
201   if (!save_graphs) {
202     return;
203   }
204   SolverInputLog(graph, tensors, continuous_v);
205   SolverOutputLog(graph, tensors);
206   TensorRelationLog(pConstraints, graph);
207 }
208 
TensorRelationLog(const std::vector<DynamicBitSet> * pConstraints,const session::KernelGraph * graph)209 void SomasSolverPre::TensorRelationLog(const std::vector<DynamicBitSet> *pConstraints,
210                                        const session::KernelGraph *graph) {
211   MS_LOG(INFO) << "SomasSolver::Log Writing somas_tensor_relation.ir..";
212   auto context_ptr = MsContext::GetInstance();
213   MS_EXCEPTION_IF_NULL(context_ptr);
214   auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
215   std::string filename =
216     GetSaveGraphsPathName("somas_tensor_relation_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
217   std::ostringstream oss;
218   for (size_t tid1 = 0; tid1 < pConstraints->size(); tid1++) {
219     oss << 't' << tid1 << ' ';
220     for (size_t tid2 = 0; tid2 < (*pConstraints)[tid1].bit_size_; tid2++) {
221       oss << 'H' << std::hex << (*pConstraints)[tid1].bit_[tid2];
222     }
223     oss << std::endl << std::dec;
224   }
225   (void)Common::SaveStringToFile(filename, oss.str());
226   MS_LOG(INFO) << "SomasSolver somas_tensor_relation Log done";
227 }
228 
SolverInputLog(const session::KernelGraph * graph,const TensorsDescMap & tensors,const vector<vector<size_t>> & continuous_v)229 void SomasSolverPre::SolverInputLog(const session::KernelGraph *graph, const TensorsDescMap &tensors,
230                                     const vector<vector<size_t>> &continuous_v) {
231   MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_input..";
232   auto context_ptr = MsContext::GetInstance();
233   MS_EXCEPTION_IF_NULL(context_ptr);
234   auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
235   std::string filename =
236     GetSaveGraphsPathName("somas_solver_input_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
237   std::ostringstream oss;
238   for (auto &t : tensors) {
239     oss << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl;
240   }
241 
242   for (auto &s : continuous_v) {
243     oss << "S";
244     for (auto idx : s) {
245       oss << " " << idx;
246     }
247     oss << std::endl;
248   }
249   (void)Common::SaveStringToFile(filename, oss.str());
250   MS_LOG(INFO) << "SomasSolver input Log done";
251 }
252 
SolverOutputLog(const session::KernelGraph * graph,const TensorsDescMap & tensors) const253 void SomasSolverPre::SolverOutputLog(const session::KernelGraph *graph, const TensorsDescMap &tensors) const {
254   MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_output_..";
255   auto context_ptr = MsContext::GetInstance();
256   MS_EXCEPTION_IF_NULL(context_ptr);
257   auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
258   std::string out_filename =
259     GetSaveGraphsPathName("somas_solver_output_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
260   std::ostringstream oss;
261   constexpr size_t contiguous_left = 1;
262   constexpr size_t contiguous_mid = 2;
263   constexpr size_t contiguous_right = 3;
264   for (auto &t : tensors) {
265     SomasSolverTensorDescPtr tensor = t.second;
266     int continuous = 0;
267     if (tensor->left_ == nullptr && tensor->right_ != nullptr)
268       continuous = contiguous_left;
269     else if (tensor->left_ != nullptr && tensor->right_ != nullptr)
270       continuous = contiguous_mid;
271     else if (tensor->left_ != nullptr && tensor->right_ == nullptr)
272       continuous = contiguous_right;
273     const size_t alignment = 512;
274     bool size_aligned = tensor->size_ % alignment == 0;
275     bool offset_aligned = tensor->offset_ % alignment == 0;
276 
277     oss << std::endl
278         << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_
279         << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned << "\toffset_aligned=" << offset_aligned;
280   }
281   (void)Common::SaveStringToFile(out_filename, oss.str());
282   MS_LOG(INFO) << "SomasSolver output Log done";
283 }
284 }  // namespace somas
285 }  // namespace mindspore
286