• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7 
8  * http://www.apache.org/licenses/LICENSE-2.0
9 
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15 */
16 
17 #include <cstdio>
18 #include <fstream>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include "include/common/thread_pool.h"
23 
24 #include "backend/common/somas/somas_solver_core.h"
25 #include "backend/common/somas/somas_solver_pre.h"
26 #include "include/common/debug/common.h"
27 
28 namespace mindspore {
29 namespace somas {
30 constexpr auto kSolBytesThreshold = 100 * 1024 * 1024;
31 constexpr auto kSolNumThresholdMultiThread = 8;
CheckTensors(const TensorsDescMap * pTensors,uint32_t index1,uint32_t index2) const32 Status SomasSolverPre::CheckTensors(const TensorsDescMap *pTensors, uint32_t index1, uint32_t index2) const {
33   auto tensors = *pTensors;
34   if (tensors[index1] == nullptr) {
35     MS_LOG(EXCEPTION) << "NULL tensor received in continuous constraint (tensor index " << index1
36                       << "), there may be kGraphInput or kGraphOutput in the input tensors or output tensors of the "
37                          "fused communication op.";
38   }
39   if (tensors[index2] == nullptr) {
40     MS_LOG(EXCEPTION) << "NULL tensor received in continuous constraint (tensor index " << index2
41                       << "), there may be kGraphInput or kGraphOutput in the input tensors or output tensors of the "
42                          "fused communication op.";
43   }
44 
45   if (tensors[index1]->right_) {
46     MS_LOG(WARNING) << "Warning:tensor " << index1
47                     << " already has a right tensor (id: " << tensors[index1]->right_->index_;
48   }
49   if (tensors[index2]->left_) {
50     MS_LOG(WARNING) << "Warning:tensor " << index2
51                     << " already has a left tensor (id: " << tensors[index2]->left_->index_;
52   }
53   return SUCCESS;
54 }
AddContiguousInfoInMap(const vector<vector<size_t>> & continuous_v,TensorsDescMap * pTensors) const55 Status SomasSolverPre::AddContiguousInfoInMap(const vector<vector<size_t>> &continuous_v,
56                                               TensorsDescMap *pTensors) const {
57   auto &tensors = *pTensors;
58   // creating S Lists
59   for (auto &aux : continuous_v) {
60     for (size_t i = 0; i < aux.size() - 1; i++) {
61       auto index1 = aux[i];
62       auto index2 = aux[i + 1];
63       if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
64         return FAILED;
65       }
66       tensors[index1]->right_ = tensors[index2];
67       tensors[index2]->left_ = tensors[index1];
68     }
69   }
70   return SUCCESS;
71 }
AddContiguousInfoInMultiMaps(const vector<vector<size_t>> & continuous_v,vector<TensorsDescMap> * vecTensorsMap,const TensorsDescMap * pTensors) const72 Status SomasSolverPre::AddContiguousInfoInMultiMaps(const vector<vector<size_t>> &continuous_v,
73                                                     vector<TensorsDescMap> *vecTensorsMap,
74                                                     const TensorsDescMap *pTensors) const {
75   // creating S Lists
76   for (auto &aux : continuous_v) {
77     for (size_t i = 0; i < aux.size() - 1; i++) {
78       auto index1 = aux[i];
79       auto index2 = aux[i + 1];
80       if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
81         return FAILED;
82       }
83       for (size_t sol = 0; sol < vecTensorsMap->size(); sol++) {
84         auto &tensors_sol = (*vecTensorsMap)[sol];
85         tensors_sol[index1]->right_ = tensors_sol[index2];
86         tensors_sol[index2]->left_ = tensors_sol[index1];
87       }
88     }
89   }
90   return SUCCESS;
91 }
CreateTensorsMaps(const TensorsDescMap & tensors,size_t total_sol) const92 vector<TensorsDescMap> SomasSolverPre::CreateTensorsMaps(const TensorsDescMap &tensors, size_t total_sol) const {
93   vector<TensorsDescMap> vecTensorsMap(total_sol);
94   vecTensorsMap[0] = tensors;
95   for (auto &pairT : tensors) {
96     for (size_t sol = 1; sol < total_sol; sol++) {
97       SomasSolverTensorDesc newDesc = *(pairT.second.get());
98       SomasSolverTensorDescPtr newDescPtr = std::make_shared<SomasSolverTensorDesc>(newDesc);
99       (void)vecTensorsMap[sol].emplace(pairT.first, newDescPtr);
100     }
101   }
102   return vecTensorsMap;
103 }
FindBest(size_t total_sol,const vector<std::shared_ptr<SomasSolverCore>> & solvers,BestInfo * best_info)104 void FindBest(size_t total_sol, const vector<std::shared_ptr<SomasSolverCore>> &solvers, BestInfo *best_info) {
105   MS_EXCEPTION_IF_NULL(best_info);
106   for (size_t sol = 0; sol < total_sol; sol++) {
107     auto &solver = solvers[sol];
108     MS_EXCEPTION_IF_NULL(solver);
109     auto &upperbound = solver->GetUpperbound();
110     if (upperbound > best_info->worst) {
111       best_info->worst = upperbound;
112     }
113     if (upperbound >= best_info->best) {
114       continue;
115     }
116     if (best_info->best_algo == kManyObjects && solver->algorithm_ == kSingleObject &&
117         best_info->best - upperbound <= kSolBytesThreshold) {
118       continue;
119     }
120     best_info->best = upperbound;
121     best_info->best_sol = sol;
122     best_info->best_algo = solver->algorithm_;
123     best_info->best_timing = LongToSize(solver->timing_);
124   }
125 }
Solving(const session::KernelGraph & graph,TensorsDescMap * ptensors,const std::vector<VectorBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v,const std::vector<int> & core_list,bool bVerifySolution,bool,SortingType,FittingType,AlgorithmType)126 Status SomasSolverPre::Solving(const session::KernelGraph &graph, TensorsDescMap *ptensors,
127                                const std::vector<VectorBitSet> *pConstraints,
128                                const vector<vector<size_t>> &continuous_v, const std::vector<int> &core_list,
129                                bool bVerifySolution, bool, SortingType, FittingType, AlgorithmType) {
130   Status ret = SUCCESS;
131   try {
132     TensorsDescMap &tensors = *ptensors;
133     constexpr size_t numSortingTypes = static_cast<size_t>(kNumSortingTypes);
134     constexpr size_t numFittingTypes = static_cast<size_t>(kNumFittingTypes);
135     constexpr size_t numAlgorithmTypes = static_cast<size_t>(kNumAlgorithmTypes);
136     constexpr size_t total_sol = numSortingTypes * numFittingTypes * numAlgorithmTypes;
137     const double giga = 1024. * 1024. * 1024.;
138 
139     vector<std::shared_ptr<SomasSolverCore>> solvers;
140     std::vector<common::Task> tasks;
141     vector<TensorsDescMap> vecTensorsMap = CreateTensorsMaps(tensors, total_sol);
142     if (AddContiguousInfoInMultiMaps(continuous_v, &vecTensorsMap, ptensors) == FAILED) {
143       return FAILED;
144     }
145     auto start = std::chrono::system_clock::now();
146     for (size_t algorithm_strategy = 0, sol = 0; algorithm_strategy < numAlgorithmTypes; algorithm_strategy++) {
147       for (size_t sort_strategy = 0; sort_strategy < numSortingTypes; sort_strategy++) {
148         for (size_t branching_strategy = 0; branching_strategy < numFittingTypes; branching_strategy++) {
149           std::shared_ptr<SomasSolverCore> pSolver =
150             std::make_shared<SomasSolverCore>(vecTensorsMap[sol], pConstraints, sol);
151           pSolver->SetAlgorithmStrategy(AlgorithmType(algorithm_strategy));
152           pSolver->SetSortingStrategy(SortingType(sort_strategy));
153           pSolver->SetFittingStrategy(FittingType(branching_strategy));
154           pSolver->VerifySolution(bVerifySolution);
155           auto task = [pSolver]() {
156             return pSolver->MemoryAllocationSolver() == SUCCESS ? common::SUCCESS : common::FAIL;
157           };
158           tasks.emplace_back(task);
159           solvers.emplace_back(pSolver);
160           sol++;
161         }
162       }
163     }
164     (void)common::ThreadPool::GetInstance().SyncRun(tasks, core_list);
165     BestInfo best_info;
166     FindBest(total_sol, solvers, &best_info);
167     auto end = std::chrono::system_clock::now();
168     size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
169     auto &best_solver = solvers[best_info.best_sol];
170     for (auto &tensor : tensors) {
171       *(tensor.second.get()) = *(vecTensorsMap[best_info.best_sol][tensor.first]);
172     }
173     MS_EXCEPTION_IF_NULL(best_solver);
174     max_offset_ = best_solver->GetUpperbound();
175     constexpr float kFloatPresent = 100.0;
176     MS_LOG(INFO) << "SOMAS SOLVER RESUME:";
177     MS_LOG(INFO) << "Best Solution:[" << 1 + best_info.best_sol << "/" << total_sol << "] ";
178     MS_LOG(INFO) << "Best result:" << best_info.best << " Bytes " << (best_info.best) / (giga) << " GB ("
179                  << (best_info.best - best_solver->Getlifelongmemory()) / (giga) << " GB + "
180                  << best_solver->Getlifelongmemory() / (giga) << " GB from lifelong tensors)";
181     MS_LOG(INFO) << "Best timing:" << best_info.best_timing << " ms";
182     MS_LOG(INFO) << "Best algorithm: " << algorithmTypeNames[best_solver->algorithm_];
183     MS_LOG(INFO) << "Best sorting strategy: " << sortingNames[best_solver->sort_strategy_];
184     MS_LOG(INFO) << "Best offset strategy: " << branchingNames[best_solver->branching_strategy_];
185     MS_LOG(INFO) << "Time elapsed: " << total_time << " ms";
186     MS_LOG(INFO) << "Spread:"
187                  << static_cast<double>((best_info.worst - best_info.best) /
188                                         static_cast<double>(best_info.best * kFloatPresent))
189                  << " %%";
190     Log(graph, tensors, pConstraints, continuous_v);
191   } catch (const std::exception &e) {
192     MS_LOG(EXCEPTION) << "SomasSolver::Solving FAILED: " << e.what();
193   }
194   return ret;
195 }
196 
Log(const session::KernelGraph & graph,const TensorsDescMap & tensors,const std::vector<VectorBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v) const197 void SomasSolverPre::Log(const session::KernelGraph &graph, const TensorsDescMap &tensors,
198                          const std::vector<VectorBitSet> *pConstraints,
199                          const vector<vector<size_t>> &continuous_v) const {
200   auto context_ptr = MsContext::GetInstance();
201   MS_EXCEPTION_IF_NULL(context_ptr);
202   if (context_ptr->CanDump(kFully)) {
203     SolverInputLog(graph, tensors, continuous_v);
204     SolverOutputLog(graph, tensors);
205     TensorRelationLog(pConstraints, graph);
206   }
207 }
208 
TensorRelationLog(const std::vector<VectorBitSet> * pConstraints,const session::KernelGraph & graph) const209 void SomasSolverPre::TensorRelationLog(const std::vector<VectorBitSet> *pConstraints,
210                                        const session::KernelGraph &graph) const {
211   MS_LOG(INFO) << "SomasSolver::Log Writing somas_tensor_relation.ir..";
212   auto context_ptr = MsContext::GetInstance();
213   MS_EXCEPTION_IF_NULL(context_ptr);
214   auto save_graphs_path = context_ptr->GetSaveGraphsPath();
215   std::string filename =
216     GetSaveGraphsPathName("somas_tensor_relation_" + std::to_string(graph.graph_id()) + ".ir", save_graphs_path);
217   std::ostringstream oss;
218   for (size_t tid1 = 0; tid1 < pConstraints->size(); tid1++) {
219     oss << 't' << tid1 << ' ';
220     for (size_t tid2 = 0; tid2 < (*pConstraints)[tid1].bit_size_; tid2++) {
221       oss << 'H' << std::hex << (*pConstraints)[tid1].bit_[tid2];
222     }
223     oss << std::endl << std::dec;
224   }
225   (void)Common::SaveStringToFile(filename, oss.str());
226   MS_LOG(INFO) << "SomasSolver somas_tensor_relation Log done";
227 }
228 
SolverInputLog(const session::KernelGraph & graph,const TensorsDescMap & tensors,const vector<vector<size_t>> & continuous_v) const229 void SomasSolverPre::SolverInputLog(const session::KernelGraph &graph, const TensorsDescMap &tensors,
230                                     const vector<vector<size_t>> &continuous_v) const {
231   MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_input..";
232   auto context_ptr = MsContext::GetInstance();
233   MS_EXCEPTION_IF_NULL(context_ptr);
234   auto save_graphs_path = context_ptr->GetSaveGraphsPath();
235   std::string filename =
236     GetSaveGraphsPathName("somas_solver_input_" + std::to_string(graph.graph_id()) + ".ir", save_graphs_path);
237   std::ostringstream oss;
238   for (auto &t : tensors) {
239     oss << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl;
240   }
241 
242   for (auto &s : continuous_v) {
243     oss << "S";
244     for (auto idx : s) {
245       oss << " " << idx;
246     }
247     oss << std::endl;
248   }
249   (void)Common::SaveStringToFile(filename, oss.str());
250   MS_LOG(INFO) << "SomasSolver input Log done";
251 }
252 
SolverOutputLog(const session::KernelGraph & graph,const TensorsDescMap & tensors) const253 void SomasSolverPre::SolverOutputLog(const session::KernelGraph &graph, const TensorsDescMap &tensors) const {
254   MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_output_..";
255   auto context_ptr = MsContext::GetInstance();
256   MS_EXCEPTION_IF_NULL(context_ptr);
257   auto save_graphs_path = context_ptr->GetSaveGraphsPath();
258   std::string out_filename =
259     GetSaveGraphsPathName("somas_solver_output_" + std::to_string(graph.graph_id()) + ".ir", save_graphs_path);
260   std::ostringstream oss;
261   constexpr size_t contiguous_left = 1;
262   constexpr size_t contiguous_mid = 2;
263   constexpr size_t contiguous_right = 3;
264   for (auto &t : tensors) {
265     SomasSolverTensorDescPtr tensor = t.second;
266     MS_EXCEPTION_IF_NULL(tensor);
267     int continuous = 0;
268     if (tensor->left_ == nullptr && tensor->right_ != nullptr) {
269       continuous = contiguous_left;
270     } else if (tensor->left_ != nullptr && tensor->right_ != nullptr) {
271       continuous = contiguous_mid;
272     } else if (tensor->left_ != nullptr && tensor->right_ == nullptr) {
273       continuous = contiguous_right;
274     }
275     const size_t alignment = 512;
276     bool size_aligned = tensor->size_ % alignment == 0;
277     bool offset_aligned = tensor->offset_ % alignment == 0;
278 
279     oss << std::endl
280         << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_
281         << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned << "\toffset_aligned=" << offset_aligned;
282   }
283   (void)Common::SaveStringToFile(out_filename, oss.str());
284   MS_LOG(INFO) << "SomasSolver output Log done";
285 }
286 }  // namespace somas
287 }  // namespace mindspore
288