1 /**
2 * Copyright 2021-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "include/backend/debug/profiler/profiling.h"
18 #include <chrono>
19 #include <cmath>
20 #include <sstream>
21 #include "utils/log_adapter.h"
22 #include "utils/ms_context.h"
23 #ifdef __linux__
24 #include <unistd.h>
25 #include "utils/profile.h"
26 #include "include/backend/debug/common/csv_writer.h"
27 #endif
28
29 namespace mindspore {
30 namespace profiler {
31 #ifdef __linux__
32 constexpr auto HostDataHeader =
33 "tid,pid,parent_pid,module_name,event,stage,level,start_end,custom_info,memory_usage(kB),time_stamp(us)\n";
34 const auto kVmRSS = "VmRSS";
35 std::mutex file_line_mutex;
36 static bool first_open_file = true;
37 const int profile_all = 0;
38 const int profile_memory = 1;
39 const int profile_time = 2;
40 #endif
GetInstanceMap()41 std::map<std::string, std::shared_ptr<Profiler>> &Profiler::GetInstanceMap() {
42 static std::map<std::string, std::shared_ptr<Profiler>> instance_map = {};
43 return instance_map;
44 }
45
GetInstance(const std::string & name)46 std::shared_ptr<Profiler> Profiler::GetInstance(const std::string &name) noexcept {
47 if (auto iter = GetInstanceMap().find(name); iter != GetInstanceMap().end()) {
48 return iter->second;
49 }
50
51 return nullptr;
52 }
53
SyncEnable(const bool enable_flag)54 void Profiler::SyncEnable(const bool enable_flag) {
55 MS_LOG(INFO) << "Profiler synchronous enable flag:" << enable_flag;
56 sync_enable_flag_ = enable_flag;
57 }
58
DataProcessEnable(const bool enable_flag)59 void Profiler::DataProcessEnable(const bool enable_flag) {
60 MS_LOG(INFO) << "Profiler data process enable flag:" << enable_flag;
61 data_process_enable_ = enable_flag;
62 }
63
Register(const std::string & name,const std::shared_ptr<Profiler> & instance)64 bool Profiler::Register(const std::string &name, const std::shared_ptr<Profiler> &instance) {
65 if (GetInstanceMap().find(name) != GetInstanceMap().end()) {
66 MS_LOG(WARNING) << name << " has been registered.";
67 } else {
68 (void)GetInstanceMap().emplace(name, instance);
69 }
70
71 return true;
72 }
73
Clear()74 void Profiler::Clear() { GetInstanceMap().clear(); }
75
GetHostMonoTimeStamp() const76 uint64_t Profiler::GetHostMonoTimeStamp() const {
77 auto now = std::chrono::system_clock::now();
78 int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
79 return static_cast<uint64_t>(ns);
80 }
81
GetRealTimeStamp() const82 uint64_t Profiler::GetRealTimeStamp() const {
83 auto now = std::chrono::steady_clock::now();
84 int64_t ms = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
85 return static_cast<uint64_t>(ms);
86 }
87
SetRunTimeData(const std::string & op_name,const float time_elapsed)88 void Profiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) {
89 std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
90 auto iter = op_info_map_.find(op_name);
91 if (iter != op_info_map_.end()) {
92 iter->second.op_host_cost_time += time_elapsed;
93 }
94 }
95
SetRunTimeData(const std::string & op_name,const uint64_t start,const float duration)96 void Profiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) {
97 std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
98 auto iter = op_info_map_.find(op_name);
99 if (iter != op_info_map_.end()) {
100 iter->second.start_duration.emplace_back(StartDuration({start, duration}));
101 }
102 }
103
RecordOneStepStartEndInfo()104 void Profiler::RecordOneStepStartEndInfo() {
105 // Multi-graph dotting data is not supported.
106 std::lock_guard<std::mutex> locker(record_mutex_);
107 uint32_t vector_size = static_cast<uint32_t>(step_start_end_info_vector_.size());
108 if (vector_size == 0) {
109 return;
110 }
111 step_start_end_info_.iter_start_op_name = step_start_end_info_vector_[0];
112 step_start_end_info_.fp_start_op_name = step_start_end_info_vector_[0];
113 // If is the first step, the step_start_end_info_vector_ length is 1.
114 if (vector_size > 1) {
115 FindOneStepFpStartOp(vector_size);
116 FindOneStepIterEndOp(vector_size);
117 if (has_find_) {
118 all_step_start_end_info_.push_back(step_start_end_info_);
119 // Delete the operator of the current step.
120 (void)step_start_end_info_vector_.erase(step_start_end_info_vector_.begin(),
121 step_start_end_info_vector_.begin() + iter_end_op_index_ + 1);
122 } else {
123 all_step_start_end_info_.push_back(step_start_end_info_);
124 step_start_end_info_vector_.clear();
125 }
126 } else {
127 all_step_start_end_info_.push_back(step_start_end_info_);
128 step_start_end_info_vector_.clear();
129 }
130 step_start_end_info_.iter_start_op_name = "";
131 step_start_end_info_.fp_start_op_name = "";
132 step_start_end_info_.iter_end_op_name = "";
133 has_find_ = false;
134 iter_end_op_index_ = 0;
135 }
136
FindOneStepFpStartOp(uint32_t vector_size)137 void Profiler::FindOneStepFpStartOp(uint32_t vector_size) {
138 std::string type = "";
139 bool find_fp_start = false;
140 for (uint32_t index = 0; index < vector_size; ++index) {
141 std::string op_name = step_start_end_info_vector_[index];
142 auto begin_iter = op_name.rfind('/') + 1;
143 auto end_iter = op_name.rfind('-');
144 if (begin_iter != std::string::npos && end_iter != std::string::npos && begin_iter < end_iter) {
145 type = op_name.substr(begin_iter, end_iter - begin_iter);
146 }
147 if (type == op_type_) {
148 find_fp_start = true;
149 if (index == 0) {
150 // If the type of the first operator is GetNext, the next operator of it is the fp_start operator.
151 step_start_end_info_.fp_start_op_name = step_start_end_info_vector_[index + 1];
152 } else {
153 // If the data processing operator is iter_start, the type of the fp_start operator should be GetNext.
154 step_start_end_info_.fp_start_op_name = op_name;
155 }
156 break;
157 }
158 }
159 if (!find_fp_start) {
160 step_start_end_info_.fp_start_op_name = step_start_end_info_vector_[fp_start_op_index_];
161 }
162 }
163
FindOneStepIterEndOp(uint32_t vector_size)164 void Profiler::FindOneStepIterEndOp(uint32_t vector_size) {
165 // Iterate through step_start_end_info_vector_ for the repeat operator, which is the operator of the next step and
166 // is preceded by iter_end_op of the current step.
167 std::string step_end_op_name;
168 for (uint32_t rindex = vector_size - 1; rindex > 0; --rindex) {
169 step_end_op_name = step_start_end_info_vector_[rindex];
170 uint32_t lindex = 0;
171 for (; lindex < rindex; ++lindex) {
172 if (step_end_op_name == step_start_end_info_vector_[lindex]) {
173 has_find_ = true;
174 iter_end_op_index_ = rindex - 1;
175 break;
176 }
177 }
178 if (rindex == lindex) {
179 break;
180 }
181 }
182 if (has_find_) {
183 step_start_end_info_.iter_end_op_name = step_start_end_info_vector_[iter_end_op_index_];
184 } else {
185 step_start_end_info_.iter_end_op_name = step_start_end_info_vector_[step_start_end_info_vector_.size() - 1];
186 }
187 }
188
RecordOneStepStartEndInfo(const std::string op_name)189 void Profiler::RecordOneStepStartEndInfo(const std::string op_name) {
190 std::lock_guard<std::mutex> locker(record_mutex_);
191 if (step_start_end_info_.iter_start_op_name.empty()) {
192 step_start_end_info_.iter_start_op_name = op_name;
193 step_start_end_info_.fp_start_op_name = op_name;
194 }
195
196 std::string fp_start_op_name = step_start_end_info_.fp_start_op_name;
197
198 auto op_type_begin_iter = fp_start_op_name.rfind('/') + 1;
199 auto op_type_end_iter = fp_start_op_name.rfind('-');
200 auto op_type = fp_start_op_name.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
201 if (op_type == "InitDataSetQueue" || op_type == "GetNext") {
202 step_start_end_info_.fp_start_op_name = op_name;
203 }
204 step_start_end_info_.iter_end_op_name = op_name;
205 step_start_end_info_vector_.push_back(op_name);
206 }
207
GetInstance()208 std::shared_ptr<ProfilerManager> &ProfilerManager::GetInstance() {
209 MS_EXCEPTION_IF_NULL(profiler_manager_inst_);
210 return profiler_manager_inst_;
211 }
212
GetProfilingEnableFlag() const213 bool ProfilerManager::GetProfilingEnableFlag() const {
214 if (auto gpu_instance = Profiler::GetInstance(kGPUDevice); gpu_instance != nullptr) {
215 return gpu_instance->GetEnableFlag();
216 }
217
218 if (auto ascend_instance = Profiler::GetInstance(kAscendDevice); ascend_instance != nullptr) {
219 return ascend_instance->GetEnableFlag();
220 }
221
222 return false;
223 }
224
RecordOneStepStartEndInfo() const225 void ProfilerManager::RecordOneStepStartEndInfo() const {
226 if (auto gpu_instance = Profiler::GetInstance(kGPUDevice); gpu_instance != nullptr && gpu_instance->GetEnableFlag()) {
227 gpu_instance->RecordOneStepStartEndInfo();
228 }
229 }
230
GetProfilingOptions() const231 std::string ProfilerManager::GetProfilingOptions() const {
232 if (auto ascend_instance = Profiler::GetInstance(kAscendDevice); ascend_instance != nullptr) {
233 return ascend_instance->GetProfilingOptions();
234 }
235
236 return "";
237 }
238
ProfileDataPath() const239 std::string ProfilerManager::ProfileDataPath() const {
240 if (auto gpu_instance = Profiler::GetInstance(kGPUDevice); gpu_instance != nullptr) {
241 return gpu_instance->ProfileDataPath();
242 }
243
244 if (auto ascend_instance = Profiler::GetInstance(kAscendDevice); ascend_instance != nullptr) {
245 return ascend_instance->ProfileDataPath();
246 }
247
248 return "";
249 }
250
SetProfileFramework(const std::string & profile_framework)251 void ProfilerManager::SetProfileFramework(const std::string &profile_framework) {
252 profile_framework_ = profile_framework;
253 }
254
NeedCollectHostTime() const255 bool ProfilerManager::NeedCollectHostTime() const {
256 return profile_framework_ == "all" || profile_framework_ == "time";
257 }
258
NeedCollectHostMemory() const259 bool ProfilerManager::NeedCollectHostMemory() const {
260 return profile_framework_ == "memory" || profile_framework_ == "all";
261 }
262
EnableCollectHost() const263 bool ProfilerManager::EnableCollectHost() const { return profile_framework_ != "NULL"; }
264
GetClockTime()265 uint64_t GetClockTime() {
266 auto ts = std::chrono::system_clock::now();
267 int64_t system_t = std::chrono::duration_cast<std::chrono::microseconds>(ts.time_since_epoch()).count();
268 return static_cast<uint64_t>(system_t);
269 }
270
GetClockSyscnt()271 uint64_t GetClockSyscnt() {
272 uint64_t cycles;
273 #if defined(__aarch64__)
274 asm volatile("mrs %0, cntvct_el0" : "=r"(cycles));
275 #elif defined(__x86_64__)
276 constexpr uint32_t uint32Bits = 32U;
277 uint32_t hi = 0;
278 uint32_t lo = 0;
279 __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
280 cycles = (static_cast<uint64_t>(lo)) | ((static_cast<uint64_t>(hi)) << uint32Bits);
281 #elif defined(__arm__)
282 const uint32_t uint32Bits = 32U;
283 uint32_t hi = 0;
284 uint32_t lo = 0;
285 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r"(lo), "=r"(hi));
286 cycles = (static_cast<uint64_t>(lo)) | ((static_cast<uint64_t>(hi)) << uint32Bits);
287 #else
288 cycles = 0;
289 #endif
290 return cycles;
291 }
292
CollectHostInfo(const std::string & module_name,const std::string & event,const std::string & stage,int level,int profile_framework,int start_end,const std::map<std::string,std::string> & custom_info)293 void CollectHostInfo(const std::string &module_name, const std::string &event, const std::string &stage, int level,
294 int profile_framework, int start_end, const std::map<std::string, std::string> &custom_info) {
295 #ifndef ENABLE_SECURITY
296 #ifndef __linux__
297 return;
298 #else
299 auto profiler_manager = profiler::ProfilerManager::GetInstance();
300 MS_EXCEPTION_IF_NULL(profiler_manager);
301 if (!profiler_manager->GetProfilingEnableFlag() || !profiler_manager->EnableCollectHost()) {
302 first_open_file = true;
303 MS_LOG(DEBUG) << "Profiler or profile_framework is not enabled, no need to record Host info.";
304 return;
305 }
306 auto output_path = profiler_manager->ProfileDataPath();
307 if (output_path.empty()) {
308 MS_LOG(ERROR) << "The output path is empty, skip collect host info.";
309 return;
310 }
311 HostProfileData host_profile_data;
312 host_profile_data.module_name = module_name;
313 host_profile_data.event = event;
314 host_profile_data.stage = stage;
315 host_profile_data.level = level;
316 host_profile_data.start_end = start_end;
317 host_profile_data.custom_info = custom_info;
318 auto tid = std::this_thread::get_id();
319 host_profile_data.tid = tid;
320
321 host_profile_data.pid = getpid();
322 host_profile_data.parent_pid = getppid();
323
324 // Collect Host info.
325 if (profiler_manager->NeedCollectHostTime()) {
326 auto now = std::chrono::system_clock::now();
327 int64_t us = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
328 uint64_t time_stamp = static_cast<uint64_t>(us);
329 host_profile_data.time_stamp = time_stamp;
330 }
331 if ((profile_framework == profile_all || profile_framework == profile_memory) &&
332 profiler_manager->NeedCollectHostMemory()) {
333 ProcessStatus process_status = ProcessStatus::GetInstance();
334 int64_t memory_usage = process_status.GetMemoryCost(kVmRSS);
335 host_profile_data.memory_usage = memory_usage;
336 }
337 std::lock_guard<std::mutex> lock(file_line_mutex);
338 WriteHostDataToFile(host_profile_data, output_path);
339 return;
340 #endif
341 #endif
342 }
343 #ifdef __linux__
WriteHostDataToFile(const HostProfileData & host_profile_data,const std::string & output_path)344 void WriteHostDataToFile(const HostProfileData &host_profile_data, const std::string &output_path) {
345 if (host_profile_data.memory_usage == 0 && host_profile_data.time_stamp == 0) {
346 return;
347 }
348 std::string file_name = "host_info_0.csv";
349 std::string rank_id = common::GetEnv("RANK_ID");
350 auto context = MsContext::GetInstance();
351 MS_EXCEPTION_IF_NULL(context);
352 if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice) {
353 rank_id = std::to_string(context->get_param<uint32_t>(MS_CTX_DEVICE_ID));
354 }
355 if (!rank_id.empty()) {
356 file_name = "host_info_" + rank_id + ".csv";
357 }
358 std::string csv_file = output_path + "/host_info/" + file_name;
359 // try to open file
360 CsvWriter &csv = CsvWriter::GetInstance();
361 int retry = 2;
362 while (retry > 0) {
363 if (first_open_file && csv.OpenFile(csv_file, HostDataHeader, true)) {
364 first_open_file = false;
365 break;
366 } else if (csv.OpenFile(csv_file, HostDataHeader)) {
367 break;
368 }
369 retry--;
370 }
371 if (retry == 0) {
372 MS_LOG(WARNING) << "Open csv file failed, skipping saving host info to file.";
373 return;
374 }
375 std::string row = "";
376 const std::string kSeparator = ",";
377 std::ostringstream ss_tid;
378 ss_tid << host_profile_data.tid;
379 row.append(ss_tid.str() + kSeparator);
380 row.append(std::to_string(host_profile_data.pid) + kSeparator);
381 row.append(std::to_string(host_profile_data.parent_pid) + kSeparator);
382 row.append(host_profile_data.module_name + kSeparator);
383 row.append(host_profile_data.event + kSeparator);
384 row.append(host_profile_data.stage + kSeparator);
385 row.append(std::to_string(host_profile_data.level) + kSeparator);
386 row.append(std::to_string(host_profile_data.start_end) + kSeparator);
387 if (!host_profile_data.custom_info.empty()) {
388 std::string custom_info_str = "{";
389 for (auto it = host_profile_data.custom_info.cbegin(); it != host_profile_data.custom_info.cend(); ++it) {
390 custom_info_str += "{" + it->first + ":" + it->second + "}";
391 }
392 custom_info_str += "}";
393 if (custom_info_str.find(",") != std::string::npos) {
394 custom_info_str = "\"" + custom_info_str + "\"";
395 }
396 row.append(custom_info_str + kSeparator);
397 } else {
398 row.append(kSeparator);
399 }
400 row.append(std::to_string(host_profile_data.memory_usage) + kSeparator);
401 row.append(std::to_string(host_profile_data.time_stamp));
402
403 csv.WriteToCsv(row, true);
404 csv.CloseFile();
405 }
406 #endif
407 } // namespace profiler
408 } // namespace mindspore
409