• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "plugin/device/cpu/hal/profiler/cpu_profiling.h"
18 #include "plugin/device/cpu/hal/profiler/cpu_data_saver.h"
19 #include "utils/log_adapter.h"
20 #include "utils/ms_context.h"
21 
22 namespace mindspore {
23 namespace profiler {
24 namespace cpu {
25 namespace {
26 PROFILER_REG(kCPUDevice, CPUProfiler);
27 }  // namespace
GetInstance()28 std::shared_ptr<CPUProfiler> CPUProfiler::GetInstance() {
29   auto instance = Profiler::GetInstance(kCPUDevice);
30   MS_EXCEPTION_IF_NULL(instance);
31   return std::dynamic_pointer_cast<CPUProfiler>(instance);
32 }
33 
Init(const std::string & profiling_path,uint32_t,const std::string &)34 void CPUProfiler::Init(const std::string &profiling_path, uint32_t, const std::string &) {
35   MS_LOG(INFO) << "Initialize CPU Profiling";
36   base_time_ = GetHostMonoTimeStamp();
37   profile_data_path_ = profiling_path;
38   MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_;
39 }
40 
StepProfilingEnable(const bool enable_flag)41 void CPUProfiler::StepProfilingEnable(const bool enable_flag) {
42   MS_LOG(INFO) << "CPU Profiler enable flag: " << enable_flag;
43   enable_flag_ = enable_flag;
44 }
45 
SetRunTimeData(const std::string & op_name,const uint32_t pid,bool is_parallel)46 void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid, bool is_parallel) {
47   if (!is_parallel) {
48     op_name_ = op_name;
49     pid_ = pid;
50   }
51   {
52     std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
53     auto iter = op_info_map_.find(op_name);
54     if (iter != op_info_map_.end()) {
55       iter->second.op_count += 1;
56       return;
57     }
58   }
59   std::unique_lock<std::shared_mutex> lock(op_map_mutex_);
60   OpInfo op_info;
61   op_info.op_name = op_name;
62   op_info.pid = pid;
63   op_info.op_count = 1;
64   op_info_map_[op_name] = op_info;
65 }
66 
SetRuntimeStart(const std::string op_name,const uint64_t start_timestamp)67 void CPUProfiler::SetRuntimeStart(const std::string op_name, const uint64_t start_timestamp) {
68   std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
69   auto iter = op_info_map_.find(op_name);
70   if (iter != op_info_map_.end()) {
71     iter->second.tmp_start_duration.start_timestamp = start_timestamp;
72     auto actor_manager = ActorMgr::GetActorMgrRef();
73     MS_EXCEPTION_IF_NULL(actor_manager);
74     auto thread_pool = actor_manager->GetActorThreadPool();
75     auto worker_ids_map = thread_pool->GetWorkerIdMap();
76     auto id_iter = worker_ids_map.find(std::this_thread::get_id());
77     if (id_iter != worker_ids_map.end()) {
78       iter->second.tmp_start_duration.tid = id_iter->second;
79     }
80   }
81 }
82 
SetRuntimeEnd(const std::string op_name,const uint64_t stop_timestamp)83 float CPUProfiler::SetRuntimeEnd(const std::string op_name, const uint64_t stop_timestamp) {
84   float op_time_elapsed = 0;
85   std::shared_lock<std::shared_mutex> lock(op_map_mutex_);
86   auto iter = op_info_map_.find(op_name);
87   if (iter != op_info_map_.end()) {
88     iter->second.tmp_start_duration.duration =
89       (stop_timestamp - iter->second.tmp_start_duration.start_timestamp) / kNanosecondToMillisecond;
90     auto actor_manager = ActorMgr::GetActorMgrRef();
91     MS_EXCEPTION_IF_NULL(actor_manager);
92     auto thread_pool = actor_manager->GetActorThreadPool();
93     auto worker_ids_map = thread_pool->GetWorkerIdMap();
94     auto id_iter = worker_ids_map.find(std::this_thread::get_id());
95     if (id_iter != worker_ids_map.end()) {
96       if (iter->second.tmp_start_duration.tid != id_iter->second) {
97         MS_LOG(EXCEPTION) << "Op " << op_name << " start time thread id must be equal to end thread id.";
98       }
99     }
100     (void)iter->second.start_duration.emplace_back(iter->second.tmp_start_duration);
101     op_time_elapsed = iter->second.tmp_start_duration.duration;
102   }
103   return op_time_elapsed;
104 }
105 
OpDataProducerBeginParallel(const std::string op_name,const uint32_t pid)106 void CPUProfiler::OpDataProducerBeginParallel(const std::string op_name, const uint32_t pid) {
107   auto start_timestamp = GetHostMonoTimeStamp();
108   SetRunTimeData(op_name, pid, true);
109   SetRuntimeStart(op_name, start_timestamp);
110 
111   RecordGpuOneStepStartEndInfo();
112 }
113 
RecordFrameWorkInfo(const CNodePtr & kernel)114 void CPUProfiler::RecordFrameWorkInfo(const CNodePtr &kernel) {
115   CurKernelInputInfo cur_kernel_input_info;
116   CurKernelInfo cur_kernel_info;
117   auto op_name = kernel->fullname_with_scope();
118   auto begin_iter = op_name.rfind('/') + 1;
119   auto end_iter = op_name.rfind('-');
120   if (begin_iter != std::string::npos && end_iter != std::string::npos && begin_iter < end_iter) {
121     cur_kernel_info.op_type = op_name.substr(begin_iter, end_iter - begin_iter);
122     cur_kernel_info.op_name = op_name.substr(begin_iter, op_name.length() - begin_iter);
123   }
124   for (size_t i = 0; i < kernel->size(); i++) {
125     if (kernel->input(i)->Shape() != nullptr) {
126       cur_kernel_input_info.input_id = i;
127       cur_kernel_input_info.shape = kernel->input(i)->Shape()->DumpText();
128       cur_kernel_info.cur_kernel_all_inputs_info.push_back(cur_kernel_input_info);
129     }
130   }
131   std::lock_guard<std::mutex> locker(kernel_mutex_);
132   all_kernel_info_.push_back(cur_kernel_info);
133 }
134 
RecordFrameWorkInfo(const std::string & op_name,const std::vector<BaseShapePtr> & input_shapes)135 void CPUProfiler::RecordFrameWorkInfo(const std::string &op_name, const std::vector<BaseShapePtr> &input_shapes) {
136   CurKernelInputInfo cur_kernel_input_info;
137   CurKernelInfo cur_kernel_info;
138   auto begin_iter = op_name.rfind('/') + 1;
139   auto end_iter = op_name.rfind('-');
140   if (begin_iter != std::string::npos && end_iter != std::string::npos && begin_iter < end_iter) {
141     cur_kernel_info.op_type = op_name.substr(begin_iter, end_iter - begin_iter);
142     cur_kernel_info.op_name = op_name.substr(begin_iter, op_name.length() - begin_iter);
143   }
144   size_t input_size = input_shapes.size();
145   for (size_t i = 0; i < input_size; ++i) {
146     if (input_shapes[i] != nullptr) {
147       cur_kernel_input_info.input_id = i;
148       cur_kernel_input_info.shape = input_shapes[i]->DumpText();
149       cur_kernel_info.cur_kernel_all_inputs_info.push_back(cur_kernel_input_info);
150     }
151   }
152   std::lock_guard<std::mutex> locker(kernel_mutex_);
153   all_kernel_info_.push_back(cur_kernel_info);
154 }
155 
OpDataProducerEndParallel(const std::string op_name)156 void CPUProfiler::OpDataProducerEndParallel(const std::string op_name) {
157   auto stop_timestamp = GetHostMonoTimeStamp();
158   float op_time_elapsed = SetRuntimeEnd(op_name, stop_timestamp);
159   MS_LOG(DEBUG) << "Host Time Elapsed(ms)," << op_name << "," << op_time_elapsed;
160   Profiler::SetRunTimeData(op_name, op_time_elapsed);
161 }
162 
OpDataProducerBegin(const std::string op_name,const uint32_t pid)163 void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) {
164   if (!GetEnableFlag() || !GetOpTimeFlag()) {
165     return;
166   }
167   op_time_start_ = GetHostMonoTimeStamp();
168   op_time_mono_start_ = GetHostMonoTimeStamp();
169   SetRunTimeData(op_name, pid);
170   RecordGpuOneStepStartEndInfo();
171 }
172 
OpDataProducerEnd()173 void CPUProfiler::OpDataProducerEnd() {
174   if (!GetEnableFlag() || !GetOpTimeFlag()) {
175     return;
176   }
177   float op_time_elapsed = 0;
178   op_time_stop_ = GetHostMonoTimeStamp();
179   op_time_elapsed = (op_time_stop_ - op_time_start_) / kNanosecondToMillisecond;
180   MS_LOG(DEBUG) << "Host Time Elapsed(ms)," << op_name_ << "," << op_time_elapsed;
181   Profiler::SetRunTimeData(op_name_, op_time_elapsed);
182   Profiler::SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed);
183 }
184 
Stop()185 void CPUProfiler::Stop() {
186   MS_LOG(INFO) << "Stop CPU Profiling";
187   SaveProfileData();
188   ClearInst();
189 }
190 
SaveProfileData()191 void CPUProfiler::SaveProfileData() {
192   if (profile_data_path_.empty()) {
193     MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
194   } else {
195     auto cpu_data_saver_inst = profiler::cpu::CpuDataSaver::GetInstance();
196     MS_EXCEPTION_IF_NULL(cpu_data_saver_inst);
197     cpu_data_saver_inst->ParseOpInfo(op_info_map_);
198     cpu_data_saver_inst->ParseMemoryInfo(memory_info_list_);
199     cpu_data_saver_inst->WriteFile(profile_data_path_);
200     if (!all_kernel_info_.empty()) {
201       cpu_data_saver_inst->WriteFrameWork(profile_data_path_, all_kernel_info_);
202     }
203   }
204 }
205 
SetGpuHeteroStatus()206 void CPUProfiler::SetGpuHeteroStatus() {
207   if (!is_gpu_hetero_.has_value()) {
208     is_gpu_hetero_ = Profiler::GetInstance(kGPUDevice) != nullptr;
209   }
210 }
211 
ClearInst()212 void CPUProfiler::ClearInst() {
213   op_info_map_.clear();
214   all_step_start_end_info_.clear();
215   step_start_end_info_vector_.clear();
216   all_kernel_info_.clear();
217   init_flag_ = false;
218   enable_flag_ = false;
219   has_find_ = false;
220 }
221 
RecordGpuOneStepStartEndInfo()222 void CPUProfiler::RecordGpuOneStepStartEndInfo() {
223   SetGpuHeteroStatus();
224   if (!is_gpu_hetero_.value()) {
225     return;
226   }
227 
228   if (auto gpu_instance = Profiler::GetInstance(kGPUDevice);
229       gpu_instance != nullptr && MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT) &&
230       gpu_instance->GetEnableFlag() && gpu_instance->GetOpTimeFlag()) {
231     gpu_instance->RecordOneStepStartEndInfo();
232   }
233 }
234 
RecordMemoryPoolInfo(const size_t total_allocated,const size_t total_reserved,const size_t used_by_event_mem)235 void CPUProfiler::RecordMemoryPoolInfo(const size_t total_allocated, const size_t total_reserved,
236                                        const size_t used_by_event_mem) {
237   if (!GetEnableFlag() || !GetProfileMemoryFlag()) {
238     return;
239   }
240   MemoryPoolInfo memory_info;
241   memory_info.time_stamp = GetHostMonoTimeStamp();
242   memory_info.total_allocated = total_allocated;
243   memory_info.total_reserved = total_reserved;
244   memory_info.total_active = total_allocated + used_by_event_mem;
245   memory_info_list_.push_back(memory_info);
246 }
247 }  // namespace cpu
248 }  // namespace profiler
249 }  // namespace mindspore
250