1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <cstdio>
18 #include <fstream>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include "include/common/thread_pool.h"
23
24 #include "backend/common/somas/somas_solver_core.h"
25 #include "backend/common/somas/somas_solver_pre.h"
26 #include "include/common/debug/common.h"
27
28 namespace mindspore {
29 namespace somas {
30 constexpr auto kSolBytesThreshold = 100 * 1024 * 1024;
31 constexpr auto kSolNumThresholdMultiThread = 8;
CheckTensors(const TensorsDescMap * pTensors,uint32_t index1,uint32_t index2) const32 Status SomasSolverPre::CheckTensors(const TensorsDescMap *pTensors, uint32_t index1, uint32_t index2) const {
33 auto tensors = *pTensors;
34 if (tensors[index1] == nullptr) {
35 MS_LOG(EXCEPTION) << "NULL tensor received in continuous constraint (tensor index " << index1
36 << "), there may be kGraphInput or kGraphOutput in the input tensors or output tensors of the "
37 "fused communication op.";
38 }
39 if (tensors[index2] == nullptr) {
40 MS_LOG(EXCEPTION) << "NULL tensor received in continuous constraint (tensor index " << index2
41 << "), there may be kGraphInput or kGraphOutput in the input tensors or output tensors of the "
42 "fused communication op.";
43 }
44
45 if (tensors[index1]->right_) {
46 MS_LOG(WARNING) << "Warning:tensor " << index1
47 << " already has a right tensor (id: " << tensors[index1]->right_->index_;
48 }
49 if (tensors[index2]->left_) {
50 MS_LOG(WARNING) << "Warning:tensor " << index2
51 << " already has a left tensor (id: " << tensors[index2]->left_->index_;
52 }
53 return SUCCESS;
54 }
AddContiguousInfoInMap(const vector<vector<size_t>> & continuous_v,TensorsDescMap * pTensors) const55 Status SomasSolverPre::AddContiguousInfoInMap(const vector<vector<size_t>> &continuous_v,
56 TensorsDescMap *pTensors) const {
57 auto &tensors = *pTensors;
58 // creating S Lists
59 for (auto &aux : continuous_v) {
60 for (size_t i = 0; i < aux.size() - 1; i++) {
61 auto index1 = aux[i];
62 auto index2 = aux[i + 1];
63 if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
64 return FAILED;
65 }
66 tensors[index1]->right_ = tensors[index2];
67 tensors[index2]->left_ = tensors[index1];
68 }
69 }
70 return SUCCESS;
71 }
AddContiguousInfoInMultiMaps(const vector<vector<size_t>> & continuous_v,vector<TensorsDescMap> * vecTensorsMap,const TensorsDescMap * pTensors) const72 Status SomasSolverPre::AddContiguousInfoInMultiMaps(const vector<vector<size_t>> &continuous_v,
73 vector<TensorsDescMap> *vecTensorsMap,
74 const TensorsDescMap *pTensors) const {
75 // creating S Lists
76 for (auto &aux : continuous_v) {
77 for (size_t i = 0; i < aux.size() - 1; i++) {
78 auto index1 = aux[i];
79 auto index2 = aux[i + 1];
80 if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
81 return FAILED;
82 }
83 for (size_t sol = 0; sol < vecTensorsMap->size(); sol++) {
84 auto &tensors_sol = (*vecTensorsMap)[sol];
85 tensors_sol[index1]->right_ = tensors_sol[index2];
86 tensors_sol[index2]->left_ = tensors_sol[index1];
87 }
88 }
89 }
90 return SUCCESS;
91 }
CreateTensorsMaps(const TensorsDescMap & tensors,size_t total_sol) const92 vector<TensorsDescMap> SomasSolverPre::CreateTensorsMaps(const TensorsDescMap &tensors, size_t total_sol) const {
93 vector<TensorsDescMap> vecTensorsMap(total_sol);
94 vecTensorsMap[0] = tensors;
95 for (auto &pairT : tensors) {
96 for (size_t sol = 1; sol < total_sol; sol++) {
97 SomasSolverTensorDesc newDesc = *(pairT.second.get());
98 SomasSolverTensorDescPtr newDescPtr = std::make_shared<SomasSolverTensorDesc>(newDesc);
99 (void)vecTensorsMap[sol].emplace(pairT.first, newDescPtr);
100 }
101 }
102 return vecTensorsMap;
103 }
FindBest(size_t total_sol,const vector<std::shared_ptr<SomasSolverCore>> & solvers,BestInfo * best_info)104 void FindBest(size_t total_sol, const vector<std::shared_ptr<SomasSolverCore>> &solvers, BestInfo *best_info) {
105 MS_EXCEPTION_IF_NULL(best_info);
106 for (size_t sol = 0; sol < total_sol; sol++) {
107 auto &solver = solvers[sol];
108 MS_EXCEPTION_IF_NULL(solver);
109 auto &upperbound = solver->GetUpperbound();
110 if (upperbound > best_info->worst) {
111 best_info->worst = upperbound;
112 }
113 if (upperbound >= best_info->best) {
114 continue;
115 }
116 if (best_info->best_algo == kManyObjects && solver->algorithm_ == kSingleObject &&
117 best_info->best - upperbound <= kSolBytesThreshold) {
118 continue;
119 }
120 best_info->best = upperbound;
121 best_info->best_sol = sol;
122 best_info->best_algo = solver->algorithm_;
123 best_info->best_timing = LongToSize(solver->timing_);
124 }
125 }
Solving(const session::KernelGraph & graph,TensorsDescMap * ptensors,const std::vector<VectorBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v,const std::vector<int> & core_list,bool bVerifySolution,bool,SortingType,FittingType,AlgorithmType)126 Status SomasSolverPre::Solving(const session::KernelGraph &graph, TensorsDescMap *ptensors,
127 const std::vector<VectorBitSet> *pConstraints,
128 const vector<vector<size_t>> &continuous_v, const std::vector<int> &core_list,
129 bool bVerifySolution, bool, SortingType, FittingType, AlgorithmType) {
130 Status ret = SUCCESS;
131 try {
132 TensorsDescMap &tensors = *ptensors;
133 constexpr size_t numSortingTypes = static_cast<size_t>(kNumSortingTypes);
134 constexpr size_t numFittingTypes = static_cast<size_t>(kNumFittingTypes);
135 constexpr size_t numAlgorithmTypes = static_cast<size_t>(kNumAlgorithmTypes);
136 constexpr size_t total_sol = numSortingTypes * numFittingTypes * numAlgorithmTypes;
137 const double giga = 1024. * 1024. * 1024.;
138
139 vector<std::shared_ptr<SomasSolverCore>> solvers;
140 std::vector<common::Task> tasks;
141 vector<TensorsDescMap> vecTensorsMap = CreateTensorsMaps(tensors, total_sol);
142 if (AddContiguousInfoInMultiMaps(continuous_v, &vecTensorsMap, ptensors) == FAILED) {
143 return FAILED;
144 }
145 auto start = std::chrono::system_clock::now();
146 for (size_t algorithm_strategy = 0, sol = 0; algorithm_strategy < numAlgorithmTypes; algorithm_strategy++) {
147 for (size_t sort_strategy = 0; sort_strategy < numSortingTypes; sort_strategy++) {
148 for (size_t branching_strategy = 0; branching_strategy < numFittingTypes; branching_strategy++) {
149 std::shared_ptr<SomasSolverCore> pSolver =
150 std::make_shared<SomasSolverCore>(vecTensorsMap[sol], pConstraints, sol);
151 pSolver->SetAlgorithmStrategy(AlgorithmType(algorithm_strategy));
152 pSolver->SetSortingStrategy(SortingType(sort_strategy));
153 pSolver->SetFittingStrategy(FittingType(branching_strategy));
154 pSolver->VerifySolution(bVerifySolution);
155 auto task = [pSolver]() {
156 return pSolver->MemoryAllocationSolver() == SUCCESS ? common::SUCCESS : common::FAIL;
157 };
158 tasks.emplace_back(task);
159 solvers.emplace_back(pSolver);
160 sol++;
161 }
162 }
163 }
164 (void)common::ThreadPool::GetInstance().SyncRun(tasks, core_list);
165 BestInfo best_info;
166 FindBest(total_sol, solvers, &best_info);
167 auto end = std::chrono::system_clock::now();
168 size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
169 auto &best_solver = solvers[best_info.best_sol];
170 for (auto &tensor : tensors) {
171 *(tensor.second.get()) = *(vecTensorsMap[best_info.best_sol][tensor.first]);
172 }
173 MS_EXCEPTION_IF_NULL(best_solver);
174 max_offset_ = best_solver->GetUpperbound();
175 constexpr float kFloatPresent = 100.0;
176 MS_LOG(INFO) << "SOMAS SOLVER RESUME:";
177 MS_LOG(INFO) << "Best Solution:[" << 1 + best_info.best_sol << "/" << total_sol << "] ";
178 MS_LOG(INFO) << "Best result:" << best_info.best << " Bytes " << (best_info.best) / (giga) << " GB ("
179 << (best_info.best - best_solver->Getlifelongmemory()) / (giga) << " GB + "
180 << best_solver->Getlifelongmemory() / (giga) << " GB from lifelong tensors)";
181 MS_LOG(INFO) << "Best timing:" << best_info.best_timing << " ms";
182 MS_LOG(INFO) << "Best algorithm: " << algorithmTypeNames[best_solver->algorithm_];
183 MS_LOG(INFO) << "Best sorting strategy: " << sortingNames[best_solver->sort_strategy_];
184 MS_LOG(INFO) << "Best offset strategy: " << branchingNames[best_solver->branching_strategy_];
185 MS_LOG(INFO) << "Time elapsed: " << total_time << " ms";
186 MS_LOG(INFO) << "Spread:"
187 << static_cast<double>((best_info.worst - best_info.best) /
188 static_cast<double>(best_info.best * kFloatPresent))
189 << " %%";
190 Log(graph, tensors, pConstraints, continuous_v);
191 } catch (const std::exception &e) {
192 MS_LOG(EXCEPTION) << "SomasSolver::Solving FAILED: " << e.what();
193 }
194 return ret;
195 }
196
Log(const session::KernelGraph & graph,const TensorsDescMap & tensors,const std::vector<VectorBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v) const197 void SomasSolverPre::Log(const session::KernelGraph &graph, const TensorsDescMap &tensors,
198 const std::vector<VectorBitSet> *pConstraints,
199 const vector<vector<size_t>> &continuous_v) const {
200 auto context_ptr = MsContext::GetInstance();
201 MS_EXCEPTION_IF_NULL(context_ptr);
202 if (context_ptr->CanDump(kFully)) {
203 SolverInputLog(graph, tensors, continuous_v);
204 SolverOutputLog(graph, tensors);
205 TensorRelationLog(pConstraints, graph);
206 }
207 }
208
TensorRelationLog(const std::vector<VectorBitSet> * pConstraints,const session::KernelGraph & graph) const209 void SomasSolverPre::TensorRelationLog(const std::vector<VectorBitSet> *pConstraints,
210 const session::KernelGraph &graph) const {
211 MS_LOG(INFO) << "SomasSolver::Log Writing somas_tensor_relation.ir..";
212 auto context_ptr = MsContext::GetInstance();
213 MS_EXCEPTION_IF_NULL(context_ptr);
214 auto save_graphs_path = context_ptr->GetSaveGraphsPath();
215 std::string filename =
216 GetSaveGraphsPathName("somas_tensor_relation_" + std::to_string(graph.graph_id()) + ".ir", save_graphs_path);
217 std::ostringstream oss;
218 for (size_t tid1 = 0; tid1 < pConstraints->size(); tid1++) {
219 oss << 't' << tid1 << ' ';
220 for (size_t tid2 = 0; tid2 < (*pConstraints)[tid1].bit_size_; tid2++) {
221 oss << 'H' << std::hex << (*pConstraints)[tid1].bit_[tid2];
222 }
223 oss << std::endl << std::dec;
224 }
225 (void)Common::SaveStringToFile(filename, oss.str());
226 MS_LOG(INFO) << "SomasSolver somas_tensor_relation Log done";
227 }
228
SolverInputLog(const session::KernelGraph & graph,const TensorsDescMap & tensors,const vector<vector<size_t>> & continuous_v) const229 void SomasSolverPre::SolverInputLog(const session::KernelGraph &graph, const TensorsDescMap &tensors,
230 const vector<vector<size_t>> &continuous_v) const {
231 MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_input..";
232 auto context_ptr = MsContext::GetInstance();
233 MS_EXCEPTION_IF_NULL(context_ptr);
234 auto save_graphs_path = context_ptr->GetSaveGraphsPath();
235 std::string filename =
236 GetSaveGraphsPathName("somas_solver_input_" + std::to_string(graph.graph_id()) + ".ir", save_graphs_path);
237 std::ostringstream oss;
238 for (auto &t : tensors) {
239 oss << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl;
240 }
241
242 for (auto &s : continuous_v) {
243 oss << "S";
244 for (auto idx : s) {
245 oss << " " << idx;
246 }
247 oss << std::endl;
248 }
249 (void)Common::SaveStringToFile(filename, oss.str());
250 MS_LOG(INFO) << "SomasSolver input Log done";
251 }
252
SolverOutputLog(const session::KernelGraph & graph,const TensorsDescMap & tensors) const253 void SomasSolverPre::SolverOutputLog(const session::KernelGraph &graph, const TensorsDescMap &tensors) const {
254 MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_output_..";
255 auto context_ptr = MsContext::GetInstance();
256 MS_EXCEPTION_IF_NULL(context_ptr);
257 auto save_graphs_path = context_ptr->GetSaveGraphsPath();
258 std::string out_filename =
259 GetSaveGraphsPathName("somas_solver_output_" + std::to_string(graph.graph_id()) + ".ir", save_graphs_path);
260 std::ostringstream oss;
261 constexpr size_t contiguous_left = 1;
262 constexpr size_t contiguous_mid = 2;
263 constexpr size_t contiguous_right = 3;
264 for (auto &t : tensors) {
265 SomasSolverTensorDescPtr tensor = t.second;
266 MS_EXCEPTION_IF_NULL(tensor);
267 int continuous = 0;
268 if (tensor->left_ == nullptr && tensor->right_ != nullptr) {
269 continuous = contiguous_left;
270 } else if (tensor->left_ != nullptr && tensor->right_ != nullptr) {
271 continuous = contiguous_mid;
272 } else if (tensor->left_ != nullptr && tensor->right_ == nullptr) {
273 continuous = contiguous_right;
274 }
275 const size_t alignment = 512;
276 bool size_aligned = tensor->size_ % alignment == 0;
277 bool offset_aligned = tensor->offset_ % alignment == 0;
278
279 oss << std::endl
280 << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_
281 << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned << "\toffset_aligned=" << offset_aligned;
282 }
283 (void)Common::SaveStringToFile(out_filename, oss.str());
284 MS_LOG(INFO) << "SomasSolver output Log done";
285 }
286 } // namespace somas
287 } // namespace mindspore
288