1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7
8 * http://www.apache.org/licenses/LICENSE-2.0
9
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <cstdio>
18 #include <fstream>
19 #include <memory>
20 #include <string>
21 #include <utility>
22 #include "common/thread_pool.h"
23
24 #include "backend/optimizer/somas/somas_solver_core.h"
25 #include "backend/optimizer/somas/somas_solver_pre.h"
26 #include "debug/common.h"
27
28 namespace mindspore {
29 namespace somas {
30 constexpr auto kSolNumThresholdMultiThread = 8;
CheckTensors(const TensorsDescMap * pTensors,uint32_t index1,uint32_t index2)31 Status SomasSolverPre::CheckTensors(const TensorsDescMap *pTensors, uint32_t index1, uint32_t index2) {
32 auto tensors = *pTensors;
33 if (tensors[index1] == nullptr) {
34 MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index1 << ")";
35 return FAILED;
36 }
37 if (tensors[index2] == nullptr) {
38 MS_LOG(WARNING) << "NULL tensor received in continuous constraint (tensor index " << index2 << ")";
39 return FAILED;
40 }
41
42 if (tensors[index1]->right_)
43 MS_LOG(WARNING) << "Warning:tensor " << index1
44 << " already has a right tensor (id: " << tensors[index1]->right_->index_;
45 if (tensors[index2]->left_)
46 MS_LOG(WARNING) << "Warning:tensor " << index2
47 << " already has a left tensor (id: " << tensors[index2]->left_->index_;
48 return SUCCESS;
49 }
AddContiguousInfoInMap(const vector<vector<size_t>> & continuous_v,TensorsDescMap * pTensors)50 Status SomasSolverPre::AddContiguousInfoInMap(const vector<vector<size_t>> &continuous_v, TensorsDescMap *pTensors) {
51 auto &tensors = *pTensors;
52 // creating S Lists
53 for (auto &aux : continuous_v) {
54 for (size_t i = 0; i < aux.size() - 1; i++) {
55 auto index1 = aux[i];
56 auto index2 = aux[i + 1];
57 if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
58 return FAILED;
59 }
60 tensors[index1]->right_ = tensors[index2];
61 tensors[index2]->left_ = tensors[index1];
62 }
63 }
64 return SUCCESS;
65 }
AddContiguousInfoInMultiMaps(const vector<vector<size_t>> & continuous_v,vector<TensorsDescMap> * vecTensorsMap,const TensorsDescMap * pTensors)66 Status SomasSolverPre::AddContiguousInfoInMultiMaps(const vector<vector<size_t>> &continuous_v,
67 vector<TensorsDescMap> *vecTensorsMap,
68 const TensorsDescMap *pTensors) {
69 // creating S Lists
70 for (auto &aux : continuous_v) {
71 for (size_t i = 0; i < aux.size() - 1; i++) {
72 auto index1 = aux[i];
73 auto index2 = aux[i + 1];
74 if (CheckTensors(pTensors, SizeToUint(index1), SizeToUint(index2)) == FAILED) {
75 return FAILED;
76 }
77 for (size_t sol = 0; sol < vecTensorsMap->size(); sol++) {
78 auto &tensors_sol = (*vecTensorsMap)[sol];
79 tensors_sol[index1]->right_ = tensors_sol[index2];
80 tensors_sol[index2]->left_ = tensors_sol[index1];
81 }
82 }
83 }
84 return SUCCESS;
85 }
CreateTensorsMaps(const TensorsDescMap & tensors,size_t total_sol)86 vector<TensorsDescMap> SomasSolverPre::CreateTensorsMaps(const TensorsDescMap &tensors, size_t total_sol) {
87 vector<TensorsDescMap> vecTensorsMap(total_sol);
88 vecTensorsMap[0] = tensors;
89 for (auto &pairT : tensors) {
90 for (size_t sol = 1; sol < total_sol; sol++) {
91 SomasSolverTensorDesc newDesc = *(pairT.second.get());
92 SomasSolverTensorDescPtr newDescPtr = std::make_shared<SomasSolverTensorDesc>(newDesc);
93 vecTensorsMap[sol].insert(std::make_pair(pairT.first, newDescPtr));
94 }
95 }
96 return vecTensorsMap;
97 }
Solving(const session::KernelGraph * graph,TensorsDescMap * ptensors,const std::vector<DynamicBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v,bool bVerifySolution,bool ball,SortingType sorting,FittingType fitting,AlgorithmType algorithm)98 Status SomasSolverPre::Solving(const session::KernelGraph *graph, TensorsDescMap *ptensors,
99 const std::vector<DynamicBitSet> *pConstraints,
100 const vector<vector<size_t>> &continuous_v, bool bVerifySolution, bool ball,
101 SortingType sorting, FittingType fitting, AlgorithmType algorithm) {
102 Status ret = SUCCESS;
103 try {
104 TensorsDescMap &tensors = *ptensors;
105 size_t total_sol = kNumSortingTypes * kNumFittingTypes * kNumAlgorithmTypes;
106 size_t process_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
107 bool isMultiThreadPermit = ball && process_num >= total_sol && total_sol > 1;
108 bool isMultiThreadValid = isMultiThreadPermit && (total_sol > kSolNumThresholdMultiThread ||
109 kParallelComputeSizeThreshold <= tensors.size());
110 const double giga = 1024. * 1024. * 1024.;
111 if (isMultiThreadValid) {
112 vector<std::shared_ptr<SomasSolverCore>> solvers;
113 std::vector<common::Task> tasks;
114 vector<TensorsDescMap> vecTensorsMap = CreateTensorsMaps(tensors, total_sol);
115 if (AddContiguousInfoInMultiMaps(continuous_v, &vecTensorsMap, ptensors) == FAILED) {
116 return FAILED;
117 }
118 auto start = std::chrono::system_clock::now();
119 for (size_t algorithm_strategy = 0, sol = 0; algorithm_strategy < kNumAlgorithmTypes; algorithm_strategy++) {
120 for (size_t sort_strategy = 0; sort_strategy < kNumSortingTypes; sort_strategy++) {
121 for (size_t branching_strategy = 0; branching_strategy < kNumFittingTypes; branching_strategy++) {
122 std::shared_ptr<SomasSolverCore> pSolver =
123 std::make_shared<SomasSolverCore>(vecTensorsMap[sol], pConstraints, sol);
124 pSolver->SetAlgorithmStrategy(AlgorithmType(algorithm));
125 pSolver->SetSortingStrategy(SortingType(sort_strategy));
126 pSolver->SetFittingStrategy(FittingType(branching_strategy));
127 pSolver->SetAllStrategies(false);
128 pSolver->VerifySolution(bVerifySolution);
129 auto task = [pSolver]() {
130 return pSolver->MemoryAllocationSolver() == SUCCESS ? common::SUCCESS : common::FAIL;
131 };
132 tasks.emplace_back(task);
133 solvers.emplace_back(pSolver);
134 sol++;
135 }
136 }
137 }
138 common::ThreadPool::GetInstance().SyncRun(tasks);
139 size_t best_sol = 0, worst = 0, best = SIZE_MAX, best_timing = SIZE_MAX;
140 for (size_t sol = 0; sol < total_sol; sol++) {
141 auto &solver = solvers[sol];
142 auto &upperbound = solver->GetUpperbound();
143 if (upperbound > worst) {
144 worst = upperbound;
145 }
146 if (upperbound <= best) {
147 best = upperbound;
148 best_sol = sol;
149 best_timing = LongToSize(solver->timing_);
150 }
151 }
152 auto end = std::chrono::system_clock::now();
153 size_t total_time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
154 auto &best_solver = solvers[best_sol];
155 for (auto &tensor : tensors) {
156 *(tensor.second.get()) = *(vecTensorsMap[best_sol][tensor.first]);
157 }
158 max_offset_ = best_solver->GetUpperbound();
159 constexpr float kFloatPresent = 100.0;
160 MS_LOG(INFO) << "SOMAS SOLVER RESUME:";
161 MS_LOG(INFO) << "Best Solution:[" << 1 + best_sol << "/" << total_sol << "] ";
162 MS_LOG(INFO) << "Best result:" << best << " Bytes " << (best) / (giga) << " GB ("
163 << (best - best_solver->Getlifelongmemory()) / (giga) << " GB + "
164 << best_solver->Getlifelongmemory() / (giga) << " GB from lifelong tensors)";
165 MS_LOG(INFO) << "Best timing:" << best_timing << " ms";
166 MS_LOG(INFO) << "Best algorithm: " << algorithmTypeNames[best_solver->algorithm_];
167 MS_LOG(INFO) << "Best sorting strategy: " << sortingNames[best_solver->sort_strategy_];
168 MS_LOG(INFO) << "Best offset strategy: " << branchingNames[best_solver->branching_strategy_];
169 MS_LOG(INFO) << "Time elapsed: " << total_time << " ms";
170 MS_LOG(INFO) << "Spread:" << static_cast<double>((worst - best) / static_cast<double>(best * kFloatPresent))
171 << " %%";
172 } else {
173 if (AddContiguousInfoInMap(continuous_v, ptensors) == FAILED) {
174 return FAILED;
175 }
176 std::shared_ptr<SomasSolverCore> pSolver = std::make_shared<SomasSolverCore>(tensors, pConstraints, 0, false);
177 pSolver->SetAlgorithmStrategy(algorithm);
178 pSolver->SetSortingStrategy(sorting);
179 pSolver->SetFittingStrategy(fitting);
180 pSolver->SetAllStrategies(ball);
181 pSolver->VerifySolution(bVerifySolution);
182 if (SUCCESS == (pSolver->MemoryAllocationSolver())) {
183 max_offset_ = pSolver->GetUpperbound();
184 MS_LOG(INFO) << "SomasSolver::Solving SUCCESS";
185 MS_LOG(INFO) << "SomasSolver::Solving RESULT: " << max_offset_ << " (" << max_offset_ / (giga) << " GB)";
186 }
187 }
188 Log(graph, tensors, pConstraints, continuous_v);
189 } catch (const std::exception &e) {
190 MS_LOG(EXCEPTION) << "SomasSolver::Solving FAILED: " << e.what();
191 ret = FAILED;
192 }
193 return ret;
194 }
195
Log(const session::KernelGraph * graph,const TensorsDescMap & tensors,const std::vector<DynamicBitSet> * pConstraints,const vector<vector<size_t>> & continuous_v)196 void SomasSolverPre::Log(const session::KernelGraph *graph, const TensorsDescMap &tensors,
197 const std::vector<DynamicBitSet> *pConstraints, const vector<vector<size_t>> &continuous_v) {
198 auto context_ptr = MsContext::GetInstance();
199 MS_EXCEPTION_IF_NULL(context_ptr);
200 bool save_graphs = context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG);
201 if (!save_graphs) {
202 return;
203 }
204 SolverInputLog(graph, tensors, continuous_v);
205 SolverOutputLog(graph, tensors);
206 TensorRelationLog(pConstraints, graph);
207 }
208
TensorRelationLog(const std::vector<DynamicBitSet> * pConstraints,const session::KernelGraph * graph)209 void SomasSolverPre::TensorRelationLog(const std::vector<DynamicBitSet> *pConstraints,
210 const session::KernelGraph *graph) {
211 MS_LOG(INFO) << "SomasSolver::Log Writing somas_tensor_relation.ir..";
212 auto context_ptr = MsContext::GetInstance();
213 MS_EXCEPTION_IF_NULL(context_ptr);
214 auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
215 std::string filename =
216 GetSaveGraphsPathName("somas_tensor_relation_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
217 std::ostringstream oss;
218 for (size_t tid1 = 0; tid1 < pConstraints->size(); tid1++) {
219 oss << 't' << tid1 << ' ';
220 for (size_t tid2 = 0; tid2 < (*pConstraints)[tid1].bit_size_; tid2++) {
221 oss << 'H' << std::hex << (*pConstraints)[tid1].bit_[tid2];
222 }
223 oss << std::endl << std::dec;
224 }
225 (void)Common::SaveStringToFile(filename, oss.str());
226 MS_LOG(INFO) << "SomasSolver somas_tensor_relation Log done";
227 }
228
SolverInputLog(const session::KernelGraph * graph,const TensorsDescMap & tensors,const vector<vector<size_t>> & continuous_v)229 void SomasSolverPre::SolverInputLog(const session::KernelGraph *graph, const TensorsDescMap &tensors,
230 const vector<vector<size_t>> &continuous_v) {
231 MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_input..";
232 auto context_ptr = MsContext::GetInstance();
233 MS_EXCEPTION_IF_NULL(context_ptr);
234 auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
235 std::string filename =
236 GetSaveGraphsPathName("somas_solver_input_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
237 std::ostringstream oss;
238 for (auto &t : tensors) {
239 oss << "T " << t.second->index_ << " " << t.second->size_ << " " << t.second->lifelong_ << std::endl;
240 }
241
242 for (auto &s : continuous_v) {
243 oss << "S";
244 for (auto idx : s) {
245 oss << " " << idx;
246 }
247 oss << std::endl;
248 }
249 (void)Common::SaveStringToFile(filename, oss.str());
250 MS_LOG(INFO) << "SomasSolver input Log done";
251 }
252
SolverOutputLog(const session::KernelGraph * graph,const TensorsDescMap & tensors) const253 void SomasSolverPre::SolverOutputLog(const session::KernelGraph *graph, const TensorsDescMap &tensors) const {
254 MS_LOG(INFO) << "SomasSolver::Log Writing somas_solver_output_..";
255 auto context_ptr = MsContext::GetInstance();
256 MS_EXCEPTION_IF_NULL(context_ptr);
257 auto save_graphs_path = context_ptr->get_param<std::string>(MS_CTX_SAVE_GRAPHS_PATH);
258 std::string out_filename =
259 GetSaveGraphsPathName("somas_solver_output_" + std::to_string(graph->graph_id()) + ".ir", save_graphs_path);
260 std::ostringstream oss;
261 constexpr size_t contiguous_left = 1;
262 constexpr size_t contiguous_mid = 2;
263 constexpr size_t contiguous_right = 3;
264 for (auto &t : tensors) {
265 SomasSolverTensorDescPtr tensor = t.second;
266 int continuous = 0;
267 if (tensor->left_ == nullptr && tensor->right_ != nullptr)
268 continuous = contiguous_left;
269 else if (tensor->left_ != nullptr && tensor->right_ != nullptr)
270 continuous = contiguous_mid;
271 else if (tensor->left_ != nullptr && tensor->right_ == nullptr)
272 continuous = contiguous_right;
273 const size_t alignment = 512;
274 bool size_aligned = tensor->size_ % alignment == 0;
275 bool offset_aligned = tensor->offset_ % alignment == 0;
276
277 oss << std::endl
278 << "tensor_id=" << tensor->index_ << "\tsize=" << tensor->size_ << "\toffset=" << tensor->offset_
279 << "\tcontinuous=" << continuous << "\tsize_aligned=" << size_aligned << "\toffset_aligned=" << offset_aligned;
280 }
281 (void)Common::SaveStringToFile(out_filename, oss.str());
282 MS_LOG(INFO) << "SomasSolver output Log done";
283 }
284 } // namespace somas
285 } // namespace mindspore
286