1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "plugin/device/cpu/hal/profiler/cpu_profiling.h"
18 #include "plugin/device/cpu/hal/profiler/cpu_data_saver.h"
19 #include "utils/log_adapter.h"
20 #include "utils/ms_context.h"
21
22 namespace mindspore {
23 namespace profiler {
24 namespace cpu {
25 namespace {
26 PROFILER_REG(kCPUDevice, CPUProfiler);
27 } // namespace
GetInstance()28 std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() {
29 auto instance = Profiler::GetInstance(kCPUDevice);
30 MS_EXCEPTION_IF_NULL(instance);
31 return std::dynamic_pointer_cast<CPUProfiler>(instance);
32 }
33
Init(const std::string & profiling_path,uint32_t,const std::string &)34 void CPUProfiler::Init(const std::string &profiling_path, uint32_t, const std::string &) {
35 MS_LOG(INFO) << "Initialize CPU Profiling";
36 base_time_ = GetHostMonoTimeStamp();
37 profile_data_path_ = profiling_path;
38 MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_;
39 }
40
StepProfilingEnable(const bool enable_flag)41 void CPUProfiler::StepProfilingEnable(const bool enable_flag) {
42 MS_LOG(INFO) << "CPU Profiler enable flag: " << enable_flag;
43 enable_flag_ = enable_flag;
44 }
45
SetRunTimeData(const std::string & op_name,const uint32_t pid,bool is_parallel)46 void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid, bool is_parallel) {
47 if (!is_parallel) {
48 op_name_ = op_name;
49 pid_ = pid;
50 }
51 {
52 std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
53 auto iter = op_info_map_.find(op_name);
54 if (iter != op_info_map_.end()) {
55 iter->second.op_count += 1;
56 return;
57 }
58 }
59 std::unique_lock<std::shared_mutex> lock(op_map_mutex_);
60 OpInfo op_info;
61 op_info.op_name = op_name;
62 op_info.pid = pid;
63 op_info.op_count = 1;
64 op_info_map_[op_name] = op_info;
65 }
66
SetRuntimeStart(const std::string op_name,const uint64_t start_timestamp)67 void CPUProfiler::SetRuntimeStart(const std::string op_name, const uint64_t start_timestamp) {
68 std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
69 auto iter = op_info_map_.find(op_name);
70 if (iter != op_info_map_.end()) {
71 iter->second.tmp_start_duration.start_timestamp = start_timestamp;
72 auto actor_manager = ActorMgr::GetActorMgrRef();
73 MS_EXCEPTION_IF_NULL(actor_manager);
74 auto thread_pool = actor_manager->GetActorThreadPool();
75 auto worker_ids_map = thread_pool->GetWorkerIdMap();
76 auto id_iter = worker_ids_map.find(std::this_thread::get_id());
77 if (id_iter != worker_ids_map.end()) {
78 iter->second.tmp_start_duration.tid = id_iter->second;
79 }
80 }
81 }
82
SetRuntimeEnd(const std::string op_name,const uint64_t stop_timestamp)83 float CPUProfiler::SetRuntimeEnd(const std::string op_name, const uint64_t stop_timestamp) {
84 float op_time_elapsed = 0;
85 std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
86 auto iter = op_info_map_.find(op_name);
87 if (iter != op_info_map_.end()) {
88 iter->second.tmp_start_duration.duration =
89 (stop_timestamp - iter->second.tmp_start_duration.start_timestamp) / kNanosecondToMillisecond;
90 auto actor_manager = ActorMgr::GetActorMgrRef();
91 MS_EXCEPTION_IF_NULL(actor_manager);
92 auto thread_pool = actor_manager->GetActorThreadPool();
93 auto worker_ids_map = thread_pool->GetWorkerIdMap();
94 auto id_iter = worker_ids_map.find(std::this_thread::get_id());
95 if (id_iter != worker_ids_map.end()) {
96 if (iter->second.tmp_start_duration.tid != id_iter->second) {
97 MS_LOG(EXCEPTION) << "Op " << op_name << " start time thread id must be equal to end thread id.";
98 }
99 }
100 (void)iter->second.start_duration.emplace_back(iter->second.tmp_start_duration);
101 op_time_elapsed = iter->second.tmp_start_duration.duration;
102 }
103 return op_time_elapsed;
104 }
105
OpDataProducerBeginParallel(const std::string op_name,const uint32_t pid)106 void CPUProfiler::OpDataProducerBeginParallel(const std::string op_name, const uint32_t pid) {
107 auto start_timestamp = GetHostMonoTimeStamp();
108 SetRunTimeData(op_name, pid, true);
109 SetRuntimeStart(op_name, start_timestamp);
110
111 RecordGpuOneStepStartEndInfo();
112 }
113
RecordFrameWorkInfo(const CNodePtr & kernel)114 void CPUProfiler::RecordFrameWorkInfo(const CNodePtr &kernel) {
115 CurKernelInputInfo cur_kernel_input_info;
116 CurKernelInfo cur_kernel_info;
117 auto op_name = kernel->fullname_with_scope();
118 auto begin_iter = op_name.rfind('/') + 1;
119 auto end_iter = op_name.rfind('-');
120 if (begin_iter != std::string::npos && end_iter != std::string::npos && begin_iter < end_iter) {
121 cur_kernel_info.op_type = op_name.substr(begin_iter, end_iter - begin_iter);
122 cur_kernel_info.op_name = op_name.substr(begin_iter, op_name.length() - begin_iter);
123 }
124 for (size_t i = 0; i < kernel->size(); i++) {
125 if (kernel->input(i)->Shape() != nullptr) {
126 cur_kernel_input_info.input_id = i;
127 cur_kernel_input_info.shape = kernel->input(i)->Shape()->DumpText();
128 cur_kernel_info.cur_kernel_all_inputs_info.push_back(cur_kernel_input_info);
129 }
130 }
131 std::lock_guard<std::mutex> locker(kernel_mutex_);
132 all_kernel_info_.push_back(cur_kernel_info);
133 }
134
RecordFrameWorkInfo(const std::string & op_name,const std::vector<BaseShapePtr> & input_shapes)135 void CPUProfiler::RecordFrameWorkInfo(const std::string &op_name, const std::vector<BaseShapePtr> &input_shapes) {
136 CurKernelInputInfo cur_kernel_input_info;
137 CurKernelInfo cur_kernel_info;
138 auto begin_iter = op_name.rfind('/') + 1;
139 auto end_iter = op_name.rfind('-');
140 if (begin_iter != std::string::npos && end_iter != std::string::npos && begin_iter < end_iter) {
141 cur_kernel_info.op_type = op_name.substr(begin_iter, end_iter - begin_iter);
142 cur_kernel_info.op_name = op_name.substr(begin_iter, op_name.length() - begin_iter);
143 }
144 size_t input_size = input_shapes.size();
145 for (size_t i = 0; i < input_size; ++i) {
146 if (input_shapes[i] != nullptr) {
147 cur_kernel_input_info.input_id = i;
148 cur_kernel_input_info.shape = input_shapes[i]->DumpText();
149 cur_kernel_info.cur_kernel_all_inputs_info.push_back(cur_kernel_input_info);
150 }
151 }
152 std::lock_guard<std::mutex> locker(kernel_mutex_);
153 all_kernel_info_.push_back(cur_kernel_info);
154 }
155
OpDataProducerEndParallel(const std::string op_name)156 void CPUProfiler::OpDataProducerEndParallel(const std::string op_name) {
157 auto stop_timestamp = GetHostMonoTimeStamp();
158 float op_time_elapsed = SetRuntimeEnd(op_name, stop_timestamp);
159 MS_LOG(DEBUG) << "Host Time Elapsed(ms)," << op_name << "," << op_time_elapsed;
160 Profiler::SetRunTimeData(op_name, op_time_elapsed);
161 }
162
OpDataProducerBegin(const std::string op_name,const uint32_t pid)163 void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) {
164 if (!GetEnableFlag() || !GetOpTimeFlag()) {
165 return;
166 }
167 op_time_start_ = GetHostMonoTimeStamp();
168 op_time_mono_start_ = GetHostMonoTimeStamp();
169 SetRunTimeData(op_name, pid);
170 RecordGpuOneStepStartEndInfo();
171 }
172
OpDataProducerEnd()173 void CPUProfiler::OpDataProducerEnd() {
174 if (!GetEnableFlag() || !GetOpTimeFlag()) {
175 return;
176 }
177 float op_time_elapsed = 0;
178 op_time_stop_ = GetHostMonoTimeStamp();
179 op_time_elapsed = (op_time_stop_ - op_time_start_) / kNanosecondToMillisecond;
180 MS_LOG(DEBUG) << "Host Time Elapsed(ms)," << op_name_ << "," << op_time_elapsed;
181 Profiler::SetRunTimeData(op_name_, op_time_elapsed);
182 Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed);
183 }
184
Stop()185 void CPUProfiler::Stop() {
186 MS_LOG(INFO) << "Stop CPU Profiling";
187 SaveProfileData();
188 ClearInst();
189 }
190
SaveProfileData()191 void CPUProfiler::SaveProfileData() {
192 if (profile_data_path_.empty()) {
193 MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
194 } else {
195 auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
196 MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
197 cpu_data_saver_inst->ParseOpInfo(op_info_map_);
198 cpu_data_saver_inst->ParseMemoryInfo(memory_info_list_);
199 cpu_data_saver_inst->WriteFile(profile_data_path_);
200 if (!all_kernel_info_.empty()) {
201 cpu_data_saver_inst->WriteFrameWork(profile_data_path_, all_kernel_info_);
202 }
203 }
204 }
205
SetGpuHeteroStatus()206 void CPUProfiler::SetGpuHeteroStatus() {
207 if (!is_gpu_hetero_.has_value()) {
208 is_gpu_hetero_ = Profiler::GetInstance(kGPUDevice) != nullptr;
209 }
210 }
211
ClearInst()212 void CPUProfiler::ClearInst() {
213 op_info_map_.clear();
214 all_step_start_end_info_.clear();
215 step_start_end_info_vector_.clear();
216 all_kernel_info_.clear();
217 init_flag_ = false;
218 enable_flag_ = false;
219 has_find_ = false;
220 }
221
RecordGpuOneStepStartEndInfo()222 void CPUProfiler::RecordGpuOneStepStartEndInfo() {
223 SetGpuHeteroStatus();
224 if (!is_gpu_hetero_.value()) {
225 return;
226 }
227
228 if (auto gpu_instance = Profiler::GetInstance(kGPUDevice);
229 gpu_instance != nullptr && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) &&
230 gpu_instance->GetEnableFlag() && gpu_instance->GetOpTimeFlag()) {
231 gpu_instance->RecordOneStepStartEndInfo();
232 }
233 }
234
RecordMemoryPoolInfo(const size_t total_allocated,const size_t total_reserved,const size_t used_by_event_mem)235 void CPUProfiler::RecordMemoryPoolInfo(const size_t total_allocated, const size_t total_reserved,
236 const size_t used_by_event_mem) {
237 if (!GetEnableFlag() || !GetProfileMemoryFlag()) {
238 return;
239 }
240 MemoryPoolInfo memory_info;
241 memory_info.time_stamp = GetHostMonoTimeStamp();
242 memory_info.total_allocated = total_allocated;
243 memory_info.total_reserved = total_reserved;
244 memory_info.total_active = total_allocated + used_by_event_mem;
245 memory_info_list_.push_back(memory_info);
246 }
247 } // namespace cpu
248 } // namespace profiler
249 } // namespace mindspore
250